1 /* Subroutines used for code generation on the Synopsys DesignWare ARC cpu.
2 Copyright (C) 1994-2016 Free Software Foundation, Inc.
3
4 Sources derived from work done by Sankhya Technologies (www.sankhya.com) on
5 behalf of Synopsys Inc.
6
7 Position Independent Code support added,Code cleaned up,
8 Comments and Support For ARC700 instructions added by
9 Saurabh Verma (saurabh.verma@codito.com)
10 Ramana Radhakrishnan(ramana.radhakrishnan@codito.com)
11
12 Fixing ABI inconsistencies, optimizations for ARC600 / ARC700 pipelines,
13 profiling support added by Joern Rennecke <joern.rennecke@embecosm.com>
14
15 This file is part of GCC.
16
17 GCC is free software; you can redistribute it and/or modify
18 it under the terms of the GNU General Public License as published by
19 the Free Software Foundation; either version 3, or (at your option)
20 any later version.
21
22 GCC is distributed in the hope that it will be useful,
23 but WITHOUT ANY WARRANTY; without even the implied warranty of
24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 GNU General Public License for more details.
26
27 You should have received a copy of the GNU General Public License
28 along with GCC; see the file COPYING3. If not see
29 <http://www.gnu.org/licenses/>. */
30
31 #include "config.h"
32 #include "system.h"
33 #include "coretypes.h"
34 #include "backend.h"
35 #include "target.h"
36 #include "rtl.h"
37 #include "tree.h"
38 #include "cfghooks.h"
39 #include "df.h"
40 #include "tm_p.h"
41 #include "stringpool.h"
42 #include "optabs.h"
43 #include "regs.h"
44 #include "emit-rtl.h"
45 #include "recog.h"
46 #include "diagnostic.h"
47 #include "fold-const.h"
48 #include "varasm.h"
49 #include "stor-layout.h"
50 #include "calls.h"
51 #include "output.h"
52 #include "insn-attr.h"
53 #include "flags.h"
54 #include "explow.h"
55 #include "expr.h"
56 #include "langhooks.h"
57 #include "tm-constrs.h"
58 #include "reload.h" /* For operands_match_p */
59 #include "cfgrtl.h"
60 #include "tree-pass.h"
61 #include "context.h"
62 #include "builtins.h"
63 #include "rtl-iter.h"
64 #include "alias.h"
65
66 /* Which cpu we're compiling for (ARC600, ARC601, ARC700). */
67 static const char *arc_cpu_string = "";
68
69 /* ??? Loads can handle any constant, stores can only handle small ones. */
70 /* OTOH, LIMMs cost extra, so their usefulness is limited. */
71 #define RTX_OK_FOR_OFFSET_P(MODE, X) \
72 (GET_CODE (X) == CONST_INT \
73 && SMALL_INT_RANGE (INTVAL (X), (GET_MODE_SIZE (MODE) - 1) & -4, \
74 (INTVAL (X) & (GET_MODE_SIZE (MODE) - 1) & 3 \
75 ? 0 \
76 : -(-GET_MODE_SIZE (MODE) | -4) >> 1)))
77
78 #define LEGITIMATE_OFFSET_ADDRESS_P(MODE, X, INDEX, STRICT) \
79 (GET_CODE (X) == PLUS \
80 && RTX_OK_FOR_BASE_P (XEXP (X, 0), (STRICT)) \
81 && ((INDEX && RTX_OK_FOR_INDEX_P (XEXP (X, 1), (STRICT)) \
82 && GET_MODE_SIZE ((MODE)) <= 4) \
83 || RTX_OK_FOR_OFFSET_P (MODE, XEXP (X, 1))))
84
85 #define LEGITIMATE_SCALED_ADDRESS_P(MODE, X, STRICT) \
86 (GET_CODE (X) == PLUS \
87 && GET_CODE (XEXP (X, 0)) == MULT \
88 && RTX_OK_FOR_INDEX_P (XEXP (XEXP (X, 0), 0), (STRICT)) \
89 && GET_CODE (XEXP (XEXP (X, 0), 1)) == CONST_INT \
90 && ((GET_MODE_SIZE (MODE) == 2 && INTVAL (XEXP (XEXP (X, 0), 1)) == 2) \
91 || (GET_MODE_SIZE (MODE) == 4 && INTVAL (XEXP (XEXP (X, 0), 1)) == 4)) \
92 && (RTX_OK_FOR_BASE_P (XEXP (X, 1), (STRICT)) \
93 || (flag_pic ? CONST_INT_P (XEXP (X, 1)) : CONSTANT_P (XEXP (X, 1)))))
94
95 #define LEGITIMATE_SMALL_DATA_ADDRESS_P(X) \
96 (GET_CODE (X) == PLUS \
97 && (REG_P (XEXP ((X), 0)) && REGNO (XEXP ((X), 0)) == SDATA_BASE_REGNUM) \
98 && ((GET_CODE (XEXP((X),1)) == SYMBOL_REF \
99 && SYMBOL_REF_SMALL_P (XEXP ((X), 1))) \
100 || (GET_CODE (XEXP ((X), 1)) == CONST \
101 && GET_CODE (XEXP (XEXP ((X), 1), 0)) == PLUS \
102 && GET_CODE (XEXP (XEXP (XEXP ((X), 1), 0), 0)) == SYMBOL_REF \
103 && SYMBOL_REF_SMALL_P (XEXP (XEXP (XEXP ((X), 1), 0), 0)) \
104 && GET_CODE (XEXP(XEXP (XEXP ((X), 1), 0), 1)) == CONST_INT)))
105
106 /* Array of valid operand punctuation characters. */
107 char arc_punct_chars[256];
108
109 /* State used by arc_ccfsm_advance to implement conditional execution. */
110 struct GTY (()) arc_ccfsm
111 {
112 int state;
113 int cc;
114 rtx cond;
115 rtx_insn *target_insn;
116 int target_label;
117 };
118
119 #define arc_ccfsm_current cfun->machine->ccfsm_current
120
121 #define ARC_CCFSM_BRANCH_DELETED_P(STATE) \
122 ((STATE)->state == 1 || (STATE)->state == 2)
123
124 /* Indicate we're conditionalizing insns now. */
125 #define ARC_CCFSM_RECORD_BRANCH_DELETED(STATE) \
126 ((STATE)->state += 2)
127
128 #define ARC_CCFSM_COND_EXEC_P(STATE) \
129 ((STATE)->state == 3 || (STATE)->state == 4 || (STATE)->state == 5 \
130 || current_insn_predicate)
131
132 /* Check if INSN has a 16 bit opcode considering struct arc_ccfsm *STATE. */
133 #define CCFSM_ISCOMPACT(INSN,STATE) \
134 (ARC_CCFSM_COND_EXEC_P (STATE) \
135 ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \
136 || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \
137 : get_attr_iscompact (INSN) != ISCOMPACT_FALSE)
138
139 /* Likewise, but also consider that INSN might be in a delay slot of JUMP. */
140 #define CCFSM_DBR_ISCOMPACT(INSN,JUMP,STATE) \
141 ((ARC_CCFSM_COND_EXEC_P (STATE) \
142 || (JUMP_P (JUMP) \
143 && INSN_ANNULLED_BRANCH_P (JUMP) \
144 && (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (INSN)))) \
145 ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \
146 || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \
147 : get_attr_iscompact (INSN) != ISCOMPACT_FALSE)
148
149 /* The maximum number of insns skipped which will be conditionalised if
150 possible. */
151 /* When optimizing for speed:
152 Let p be the probability that the potentially skipped insns need to
153 be executed, pn the cost of a correctly predicted non-taken branch,
154 mt the cost of a mis/non-predicted taken branch,
155 mn mispredicted non-taken, pt correctly predicted taken ;
156 costs expressed in numbers of instructions like the ones considered
157 skipping.
158 Unfortunately we don't have a measure of predictability - this
159 is linked to probability only in that in the no-eviction-scenario
160 there is a lower bound 1 - 2 * min (p, 1-p), and a somewhat larger
161 value that can be assumed *if* the distribution is perfectly random.
162 A predictability of 1 is perfectly plausible not matter what p is,
163 because the decision could be dependent on an invocation parameter
164 of the program.
165 For large p, we want MAX_INSNS_SKIPPED == pn/(1-p) + mt - pn
166 For small p, we want MAX_INSNS_SKIPPED == pt
167
168 When optimizing for size:
169 We want to skip insn unless we could use 16 opcodes for the
170 non-conditionalized insn to balance the branch length or more.
171 Performance can be tie-breaker. */
172 /* If the potentially-skipped insns are likely to be executed, we'll
173 generally save one non-taken branch
174 o
175 this to be no less than the 1/p */
176 #define MAX_INSNS_SKIPPED 3
177
178 /* A nop is needed between a 4 byte insn that sets the condition codes and
179 a branch that uses them (the same isn't true for an 8 byte insn that sets
180 the condition codes). Set by arc_ccfsm_advance. Used by
181 arc_print_operand. */
182
183 static int get_arc_condition_code (rtx);
184
185 static tree arc_handle_interrupt_attribute (tree *, tree, tree, int, bool *);
186
187 /* Initialized arc_attribute_table to NULL since arc doesnot have any
188 machine specific supported attributes. */
189 const struct attribute_spec arc_attribute_table[] =
190 {
191 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
192 affects_type_identity } */
193 { "interrupt", 1, 1, true, false, false, arc_handle_interrupt_attribute, true },
194 /* Function calls made to this symbol must be done indirectly, because
195 it may lie outside of the 21/25 bit addressing range of a normal function
196 call. */
197 { "long_call", 0, 0, false, true, true, NULL, false },
198 /* Whereas these functions are always known to reside within the 25 bit
199 addressing range of unconditionalized bl. */
200 { "medium_call", 0, 0, false, true, true, NULL, false },
201 /* And these functions are always known to reside within the 21 bit
202 addressing range of blcc. */
203 { "short_call", 0, 0, false, true, true, NULL, false },
204 { NULL, 0, 0, false, false, false, NULL, false }
205 };
206 static int arc_comp_type_attributes (const_tree, const_tree);
207 static void arc_file_start (void);
208 static void arc_internal_label (FILE *, const char *, unsigned long);
209 static void arc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
210 tree);
211 static int arc_address_cost (rtx, machine_mode, addr_space_t, bool);
212 static void arc_encode_section_info (tree decl, rtx rtl, int first);
213
214 static void arc_init_builtins (void);
215 static rtx arc_expand_builtin (tree, rtx, rtx, machine_mode, int);
216
217 static int branch_dest (rtx);
218
219 static void arc_output_pic_addr_const (FILE *, rtx, int);
220 void emit_pic_move (rtx *, machine_mode);
221 bool arc_legitimate_pic_operand_p (rtx);
222 static bool arc_function_ok_for_sibcall (tree, tree);
223 static rtx arc_function_value (const_tree, const_tree, bool);
224 const char * output_shift (rtx *);
225 static void arc_reorg (void);
226 static bool arc_in_small_data_p (const_tree);
227
228 static void arc_init_reg_tables (void);
229 static bool arc_return_in_memory (const_tree, const_tree);
230 static bool arc_vector_mode_supported_p (machine_mode);
231
232 static bool arc_can_use_doloop_p (const widest_int &, const widest_int &,
233 unsigned int, bool);
234 static const char *arc_invalid_within_doloop (const rtx_insn *);
235
236 static void output_short_suffix (FILE *file);
237
238 static bool arc_frame_pointer_required (void);
239
240 static bool arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
241 unsigned int,
242 enum by_pieces_operation op,
243 bool);
244
245 /* Implements target hook vector_mode_supported_p. */
246
247 static bool
arc_vector_mode_supported_p(machine_mode mode)248 arc_vector_mode_supported_p (machine_mode mode)
249 {
250 if (!TARGET_SIMD_SET)
251 return false;
252
253 if ((mode == V4SImode)
254 || (mode == V8HImode))
255 return true;
256
257 return false;
258 }
259
260
261 /* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / review. */
262 static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED;
263 static rtx arc_delegitimize_address (rtx);
264 static bool arc_can_follow_jump (const rtx_insn *follower,
265 const rtx_insn *followee);
266
267 static rtx frame_insn (rtx);
268 static void arc_function_arg_advance (cumulative_args_t, machine_mode,
269 const_tree, bool);
270 static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode);
271
272 static void arc_finalize_pic (void);
273
274 /* initialize the GCC target structure. */
275 #undef TARGET_COMP_TYPE_ATTRIBUTES
276 #define TARGET_COMP_TYPE_ATTRIBUTES arc_comp_type_attributes
277 #undef TARGET_ASM_FILE_START
278 #define TARGET_ASM_FILE_START arc_file_start
279 #undef TARGET_ATTRIBUTE_TABLE
280 #define TARGET_ATTRIBUTE_TABLE arc_attribute_table
281 #undef TARGET_ASM_INTERNAL_LABEL
282 #define TARGET_ASM_INTERNAL_LABEL arc_internal_label
283 #undef TARGET_RTX_COSTS
284 #define TARGET_RTX_COSTS arc_rtx_costs
285 #undef TARGET_ADDRESS_COST
286 #define TARGET_ADDRESS_COST arc_address_cost
287
288 #undef TARGET_ENCODE_SECTION_INFO
289 #define TARGET_ENCODE_SECTION_INFO arc_encode_section_info
290
291 #undef TARGET_CANNOT_FORCE_CONST_MEM
292 #define TARGET_CANNOT_FORCE_CONST_MEM arc_cannot_force_const_mem
293
294 #undef TARGET_INIT_BUILTINS
295 #define TARGET_INIT_BUILTINS arc_init_builtins
296
297 #undef TARGET_EXPAND_BUILTIN
298 #define TARGET_EXPAND_BUILTIN arc_expand_builtin
299
300 #undef TARGET_BUILTIN_DECL
301 #define TARGET_BUILTIN_DECL arc_builtin_decl
302
303 #undef TARGET_ASM_OUTPUT_MI_THUNK
304 #define TARGET_ASM_OUTPUT_MI_THUNK arc_output_mi_thunk
305
306 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
307 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
308
309 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
310 #define TARGET_FUNCTION_OK_FOR_SIBCALL arc_function_ok_for_sibcall
311
312 #undef TARGET_MACHINE_DEPENDENT_REORG
313 #define TARGET_MACHINE_DEPENDENT_REORG arc_reorg
314
315 #undef TARGET_IN_SMALL_DATA_P
316 #define TARGET_IN_SMALL_DATA_P arc_in_small_data_p
317
318 #undef TARGET_PROMOTE_FUNCTION_MODE
319 #define TARGET_PROMOTE_FUNCTION_MODE \
320 default_promote_function_mode_always_promote
321
322 #undef TARGET_PROMOTE_PROTOTYPES
323 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
324
325 #undef TARGET_RETURN_IN_MEMORY
326 #define TARGET_RETURN_IN_MEMORY arc_return_in_memory
327 #undef TARGET_PASS_BY_REFERENCE
328 #define TARGET_PASS_BY_REFERENCE arc_pass_by_reference
329
330 #undef TARGET_SETUP_INCOMING_VARARGS
331 #define TARGET_SETUP_INCOMING_VARARGS arc_setup_incoming_varargs
332
333 #undef TARGET_ARG_PARTIAL_BYTES
334 #define TARGET_ARG_PARTIAL_BYTES arc_arg_partial_bytes
335
336 #undef TARGET_MUST_PASS_IN_STACK
337 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
338
339 #undef TARGET_FUNCTION_VALUE
340 #define TARGET_FUNCTION_VALUE arc_function_value
341
342 #undef TARGET_SCHED_ADJUST_PRIORITY
343 #define TARGET_SCHED_ADJUST_PRIORITY arc_sched_adjust_priority
344
345 #undef TARGET_VECTOR_MODE_SUPPORTED_P
346 #define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p
347
348 #undef TARGET_CAN_USE_DOLOOP_P
349 #define TARGET_CAN_USE_DOLOOP_P arc_can_use_doloop_p
350
351 #undef TARGET_INVALID_WITHIN_DOLOOP
352 #define TARGET_INVALID_WITHIN_DOLOOP arc_invalid_within_doloop
353
354 #undef TARGET_PRESERVE_RELOAD_P
355 #define TARGET_PRESERVE_RELOAD_P arc_preserve_reload_p
356
357 #undef TARGET_CAN_FOLLOW_JUMP
358 #define TARGET_CAN_FOLLOW_JUMP arc_can_follow_jump
359
360 #undef TARGET_DELEGITIMIZE_ADDRESS
361 #define TARGET_DELEGITIMIZE_ADDRESS arc_delegitimize_address
362
363 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
364 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
365 arc_use_by_pieces_infrastructure_p
366
367 /* Usually, we will be able to scale anchor offsets.
368 When this fails, we want LEGITIMIZE_ADDRESS to kick in. */
369 #undef TARGET_MIN_ANCHOR_OFFSET
370 #define TARGET_MIN_ANCHOR_OFFSET (-1024)
371 #undef TARGET_MAX_ANCHOR_OFFSET
372 #define TARGET_MAX_ANCHOR_OFFSET (1020)
373
374 #undef TARGET_SECONDARY_RELOAD
375 #define TARGET_SECONDARY_RELOAD arc_secondary_reload
376
377 #define TARGET_OPTION_OVERRIDE arc_override_options
378
379 #define TARGET_CONDITIONAL_REGISTER_USAGE arc_conditional_register_usage
380
381 #define TARGET_TRAMPOLINE_INIT arc_initialize_trampoline
382
383 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arc_trampoline_adjust_address
384
385 #define TARGET_CAN_ELIMINATE arc_can_eliminate
386
387 #define TARGET_FRAME_POINTER_REQUIRED arc_frame_pointer_required
388
389 #define TARGET_FUNCTION_ARG arc_function_arg
390
391 #define TARGET_FUNCTION_ARG_ADVANCE arc_function_arg_advance
392
393 #define TARGET_LEGITIMATE_CONSTANT_P arc_legitimate_constant_p
394
395 #define TARGET_LEGITIMATE_ADDRESS_P arc_legitimate_address_p
396
397 #define TARGET_MODE_DEPENDENT_ADDRESS_P arc_mode_dependent_address_p
398
399 #define TARGET_LEGITIMIZE_ADDRESS arc_legitimize_address
400
401 #define TARGET_ADJUST_INSN_LENGTH arc_adjust_insn_length
402
403 #define TARGET_INSN_LENGTH_PARAMETERS arc_insn_length_parameters
404
405 #undef TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P
406 #define TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P \
407 arc_no_speculation_in_delay_slots_p
408
409 #undef TARGET_LRA_P
410 #define TARGET_LRA_P arc_lra_p
411 #define TARGET_REGISTER_PRIORITY arc_register_priority
412 /* Stores with scaled offsets have different displacement ranges. */
413 #define TARGET_DIFFERENT_ADDR_DISPLACEMENT_P hook_bool_void_true
414 #define TARGET_SPILL_CLASS arc_spill_class
415
416 #include "target-def.h"
417
418 #undef TARGET_ASM_ALIGNED_HI_OP
419 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
420 #undef TARGET_ASM_ALIGNED_SI_OP
421 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
422
423 #undef TARGET_DWARF_REGISTER_SPAN
424 #define TARGET_DWARF_REGISTER_SPAN arc_dwarf_register_span
425
426 /* Try to keep the (mov:DF _, reg) as early as possible so
427 that the d<add/sub/mul>h-lr insns appear together and can
428 use the peephole2 pattern. */
429
430 static int
arc_sched_adjust_priority(rtx_insn * insn,int priority)431 arc_sched_adjust_priority (rtx_insn *insn, int priority)
432 {
433 rtx set = single_set (insn);
434 if (set
435 && GET_MODE (SET_SRC(set)) == DFmode
436 && GET_CODE (SET_SRC(set)) == REG)
437 {
438 /* Incrementing priority by 20 (empirically derived). */
439 return priority + 20;
440 }
441
442 return priority;
443 }
444
445 /* For ARC base register + offset addressing, the validity of the
446 address is mode-dependent for most of the offset range, as the
447 offset can be scaled by the access size.
448 We don't expose these as mode-dependent addresses in the
449 mode_dependent_address_p target hook, because that would disable
450 lots of optimizations, and most uses of these addresses are for 32
451 or 64 bit accesses anyways, which are fine.
452 However, that leaves some addresses for 8 / 16 bit values not
453 properly reloaded by the generic code, which is why we have to
454 schedule secondary reloads for these. */
455
456 static reg_class_t
arc_secondary_reload(bool in_p,rtx x,reg_class_t cl,machine_mode mode,secondary_reload_info * sri)457 arc_secondary_reload (bool in_p,
458 rtx x,
459 reg_class_t cl,
460 machine_mode mode,
461 secondary_reload_info *sri)
462 {
463 enum rtx_code code = GET_CODE (x);
464
465 if (cl == DOUBLE_REGS)
466 return GENERAL_REGS;
467
468 /* The loop counter register can be stored, but not loaded directly. */
469 if ((cl == LPCOUNT_REG || cl == WRITABLE_CORE_REGS)
470 && in_p && MEM_P (x))
471 return GENERAL_REGS;
472
473 /* If we have a subreg (reg), where reg is a pseudo (that will end in
474 a memory location), then we may need a scratch register to handle
475 the fp/sp+largeoffset address. */
476 if (code == SUBREG)
477 {
478 rtx addr = NULL_RTX;
479 x = SUBREG_REG (x);
480
481 if (REG_P (x))
482 {
483 int regno = REGNO (x);
484 if (regno >= FIRST_PSEUDO_REGISTER)
485 regno = reg_renumber[regno];
486
487 if (regno != -1)
488 return NO_REGS;
489
490 /* It is a pseudo that ends in a stack location. */
491 if (reg_equiv_mem (REGNO (x)))
492 {
493 /* Get the equivalent address and check the range of the
494 offset. */
495 rtx mem = reg_equiv_mem (REGNO (x));
496 addr = find_replacement (&XEXP (mem, 0));
497 }
498 }
499 else
500 {
501 gcc_assert (MEM_P (x));
502 addr = XEXP (x, 0);
503 addr = simplify_rtx (addr);
504 }
505 if (addr && GET_CODE (addr) == PLUS
506 && CONST_INT_P (XEXP (addr, 1))
507 && (!RTX_OK_FOR_OFFSET_P (mode, XEXP (addr, 1))))
508 {
509 switch (mode)
510 {
511 case QImode:
512 sri->icode =
513 in_p ? CODE_FOR_reload_qi_load : CODE_FOR_reload_qi_store;
514 break;
515 case HImode:
516 sri->icode =
517 in_p ? CODE_FOR_reload_hi_load : CODE_FOR_reload_hi_store;
518 break;
519 default:
520 break;
521 }
522 }
523 }
524 return NO_REGS;
525 }
526
527 /* Convert reloads using offsets that are too large to use indirect
528 addressing. */
529
530 void
arc_secondary_reload_conv(rtx reg,rtx mem,rtx scratch,bool store_p)531 arc_secondary_reload_conv (rtx reg, rtx mem, rtx scratch, bool store_p)
532 {
533 rtx addr;
534
535 gcc_assert (GET_CODE (mem) == MEM);
536 addr = XEXP (mem, 0);
537
538 /* Large offset: use a move. FIXME: ld ops accepts limms as
539 offsets. Hence, the following move insn is not required. */
540 emit_move_insn (scratch, addr);
541 mem = replace_equiv_address_nv (mem, scratch);
542
543 /* Now create the move. */
544 if (store_p)
545 emit_insn (gen_rtx_SET (mem, reg));
546 else
547 emit_insn (gen_rtx_SET (reg, mem));
548
549 return;
550 }
551
552 static unsigned arc_ifcvt (void);
553
554 namespace {
555
556 const pass_data pass_data_arc_ifcvt =
557 {
558 RTL_PASS,
559 "arc_ifcvt", /* name */
560 OPTGROUP_NONE, /* optinfo_flags */
561 TV_IFCVT2, /* tv_id */
562 0, /* properties_required */
563 0, /* properties_provided */
564 0, /* properties_destroyed */
565 0, /* todo_flags_start */
566 TODO_df_finish /* todo_flags_finish */
567 };
568
569 class pass_arc_ifcvt : public rtl_opt_pass
570 {
571 public:
pass_arc_ifcvt(gcc::context * ctxt)572 pass_arc_ifcvt(gcc::context *ctxt)
573 : rtl_opt_pass(pass_data_arc_ifcvt, ctxt)
574 {}
575
576 /* opt_pass methods: */
clone()577 opt_pass * clone () { return new pass_arc_ifcvt (m_ctxt); }
execute(function *)578 virtual unsigned int execute (function *) { return arc_ifcvt (); }
579 };
580
581 } // anon namespace
582
583 rtl_opt_pass *
make_pass_arc_ifcvt(gcc::context * ctxt)584 make_pass_arc_ifcvt (gcc::context *ctxt)
585 {
586 return new pass_arc_ifcvt (ctxt);
587 }
588
589 static unsigned arc_predicate_delay_insns (void);
590
591 namespace {
592
593 const pass_data pass_data_arc_predicate_delay_insns =
594 {
595 RTL_PASS,
596 "arc_predicate_delay_insns", /* name */
597 OPTGROUP_NONE, /* optinfo_flags */
598 TV_IFCVT2, /* tv_id */
599 0, /* properties_required */
600 0, /* properties_provided */
601 0, /* properties_destroyed */
602 0, /* todo_flags_start */
603 TODO_df_finish /* todo_flags_finish */
604 };
605
606 class pass_arc_predicate_delay_insns : public rtl_opt_pass
607 {
608 public:
pass_arc_predicate_delay_insns(gcc::context * ctxt)609 pass_arc_predicate_delay_insns(gcc::context *ctxt)
610 : rtl_opt_pass(pass_data_arc_predicate_delay_insns, ctxt)
611 {}
612
613 /* opt_pass methods: */
execute(function *)614 virtual unsigned int execute (function *)
615 {
616 return arc_predicate_delay_insns ();
617 }
618 };
619
620 } // anon namespace
621
622 rtl_opt_pass *
make_pass_arc_predicate_delay_insns(gcc::context * ctxt)623 make_pass_arc_predicate_delay_insns (gcc::context *ctxt)
624 {
625 return new pass_arc_predicate_delay_insns (ctxt);
626 }
627
628 /* Called by OVERRIDE_OPTIONS to initialize various things. */
629
630 void
arc_init(void)631 arc_init (void)
632 {
633 enum attr_tune tune_dflt = TUNE_NONE;
634
635 switch (arc_cpu)
636 {
637 case PROCESSOR_ARC600:
638 arc_cpu_string = "ARC600";
639 tune_dflt = TUNE_ARC600;
640 break;
641
642 case PROCESSOR_ARC601:
643 arc_cpu_string = "ARC601";
644 tune_dflt = TUNE_ARC600;
645 break;
646
647 case PROCESSOR_ARC700:
648 arc_cpu_string = "ARC700";
649 tune_dflt = TUNE_ARC700_4_2_STD;
650 break;
651
652 case PROCESSOR_ARCEM:
653 arc_cpu_string = "EM";
654 break;
655
656 case PROCESSOR_ARCHS:
657 arc_cpu_string = "HS";
658 break;
659
660 default:
661 gcc_unreachable ();
662 }
663
664 if (arc_tune == TUNE_NONE)
665 arc_tune = tune_dflt;
666 /* Note: arc_multcost is only used in rtx_cost if speed is true. */
667 if (arc_multcost < 0)
668 switch (arc_tune)
669 {
670 case TUNE_ARC700_4_2_STD:
671 /* latency 7;
672 max throughput (1 multiply + 4 other insns) / 5 cycles. */
673 arc_multcost = COSTS_N_INSNS (4);
674 if (TARGET_NOMPY_SET)
675 arc_multcost = COSTS_N_INSNS (30);
676 break;
677 case TUNE_ARC700_4_2_XMAC:
678 /* latency 5;
679 max throughput (1 multiply + 2 other insns) / 3 cycles. */
680 arc_multcost = COSTS_N_INSNS (3);
681 if (TARGET_NOMPY_SET)
682 arc_multcost = COSTS_N_INSNS (30);
683 break;
684 case TUNE_ARC600:
685 if (TARGET_MUL64_SET)
686 {
687 arc_multcost = COSTS_N_INSNS (4);
688 break;
689 }
690 /* Fall through. */
691 default:
692 arc_multcost = COSTS_N_INSNS (30);
693 break;
694 }
695
696 /* Support mul64 generation only for ARC600. */
697 if (TARGET_MUL64_SET && (!TARGET_ARC600_FAMILY))
698 error ("-mmul64 not supported for ARC700 or ARCv2");
699
700 /* MPY instructions valid only for ARC700 or ARCv2. */
701 if (TARGET_NOMPY_SET && TARGET_ARC600_FAMILY)
702 error ("-mno-mpy supported only for ARC700 or ARCv2");
703
704 /* mul/mac instructions only for ARC600. */
705 if (TARGET_MULMAC_32BY16_SET && (!TARGET_ARC600_FAMILY))
706 error ("-mmul32x16 supported only for ARC600 or ARC601");
707
708 if (!TARGET_DPFP && TARGET_DPFP_DISABLE_LRSR)
709 error ("-mno-dpfp-lrsr supported only with -mdpfp");
710
711 /* FPX-1. No fast and compact together. */
712 if ((TARGET_DPFP_FAST_SET && TARGET_DPFP_COMPACT_SET)
713 || (TARGET_SPFP_FAST_SET && TARGET_SPFP_COMPACT_SET))
714 error ("FPX fast and compact options cannot be specified together");
715
716 /* FPX-2. No fast-spfp for arc600 or arc601. */
717 if (TARGET_SPFP_FAST_SET && TARGET_ARC600_FAMILY)
718 error ("-mspfp_fast not available on ARC600 or ARC601");
719
720 /* FPX-3. No FPX extensions on pre-ARC600 cores. */
721 if ((TARGET_DPFP || TARGET_SPFP)
722 && (!TARGET_ARCOMPACT_FAMILY && !TARGET_EM))
723 error ("FPX extensions not available on pre-ARC600 cores");
724
725 /* FPX-4. No FPX extensions mixed with FPU extensions for ARC HS
726 cpus. */
727 if ((TARGET_DPFP || TARGET_SPFP)
728 && TARGET_HARD_FLOAT
729 && TARGET_HS)
730 error ("No FPX/FPU mixing allowed");
731
732 /* Only selected multiplier configurations are available for HS. */
733 if (TARGET_HS && ((arc_mpy_option > 2 && arc_mpy_option < 7)
734 || (arc_mpy_option == 1)))
735 error ("This multiplier configuration is not available for HS cores");
736
737 /* Warn for unimplemented PIC in pre-ARC700 cores, and disable flag_pic. */
738 if (flag_pic && TARGET_ARC600_FAMILY)
739 {
740 warning (DK_WARNING,
741 "PIC is not supported for %s. Generating non-PIC code only..",
742 arc_cpu_string);
743 flag_pic = 0;
744 }
745
746 if (TARGET_ATOMIC && !(TARGET_ARC700 || TARGET_HS))
747 error ("-matomic is only supported for ARC700 or ARC HS cores");
748
749 /* ll64 ops only available for HS. */
750 if (TARGET_LL64 && !TARGET_HS)
751 error ("-mll64 is only supported for ARC HS cores");
752
753 /* FPU support only for V2. */
754 if (TARGET_HARD_FLOAT)
755 {
756 if (TARGET_EM
757 && (arc_fpu_build & ~(FPU_SP | FPU_SF | FPU_SC | FPU_SD | FPX_DP)))
758 error ("FPU double precision options are available for ARC HS only");
759 if (TARGET_HS && (arc_fpu_build & FPX_DP))
760 error ("FPU double precision assist "
761 "options are not available for ARC HS");
762 if (!TARGET_HS && !TARGET_EM)
763 error ("FPU options are available for ARCv2 architecture only");
764 }
765
766 arc_init_reg_tables ();
767
768 /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P. */
769 memset (arc_punct_chars, 0, sizeof (arc_punct_chars));
770 arc_punct_chars['#'] = 1;
771 arc_punct_chars['*'] = 1;
772 arc_punct_chars['?'] = 1;
773 arc_punct_chars['!'] = 1;
774 arc_punct_chars['^'] = 1;
775 arc_punct_chars['&'] = 1;
776 arc_punct_chars['+'] = 1;
777 arc_punct_chars['_'] = 1;
778
779 if (optimize > 1 && !TARGET_NO_COND_EXEC)
780 {
781 /* There are two target-independent ifcvt passes, and arc_reorg may do
782 one or more arc_ifcvt calls. */
783 opt_pass *pass_arc_ifcvt_4 = make_pass_arc_ifcvt (g);
784 struct register_pass_info arc_ifcvt4_info
785 = { pass_arc_ifcvt_4, "dbr", 1, PASS_POS_INSERT_AFTER };
786 struct register_pass_info arc_ifcvt5_info
787 = { pass_arc_ifcvt_4->clone (), "shorten", 1, PASS_POS_INSERT_BEFORE };
788
789 register_pass (&arc_ifcvt4_info);
790 register_pass (&arc_ifcvt5_info);
791 }
792
793 if (flag_delayed_branch)
794 {
795 opt_pass *pass_arc_predicate_delay_insns
796 = make_pass_arc_predicate_delay_insns (g);
797 struct register_pass_info arc_predicate_delay_info
798 = { pass_arc_predicate_delay_insns, "dbr", 1, PASS_POS_INSERT_AFTER };
799
800 register_pass (&arc_predicate_delay_info);
801 }
802 }
803
804 /* Check ARC options, generate derived target attributes. */
805
806 static void
arc_override_options(void)807 arc_override_options (void)
808 {
809 if (arc_cpu == PROCESSOR_NONE)
810 arc_cpu = PROCESSOR_ARC700;
811
812 if (arc_size_opt_level == 3)
813 optimize_size = 1;
814
815 if (flag_pic)
816 target_flags |= MASK_NO_SDATA_SET;
817
818 if (flag_no_common == 255)
819 flag_no_common = !TARGET_NO_SDATA_SET;
820
821 /* TARGET_COMPACT_CASESI needs the "q" register class. */
822 if (TARGET_MIXED_CODE)
823 TARGET_Q_CLASS = 1;
824 if (!TARGET_Q_CLASS)
825 TARGET_COMPACT_CASESI = 0;
826 if (TARGET_COMPACT_CASESI)
827 TARGET_CASE_VECTOR_PC_RELATIVE = 1;
828
829 /* These need to be done at start up. It's convenient to do them here. */
830 arc_init ();
831 }
832
833 /* The condition codes of the ARC, and the inverse function. */
834 /* For short branches, the "c" / "nc" names are not defined in the ARC
835 Programmers manual, so we have to use "lo" / "hs"" instead. */
836 static const char *arc_condition_codes[] =
837 {
838 "al", 0, "eq", "ne", "p", "n", "lo", "hs", "v", "nv",
839 "gt", "le", "ge", "lt", "hi", "ls", "pnz", 0
840 };
841
842 enum arc_cc_code_index
843 {
844 ARC_CC_AL, ARC_CC_EQ = ARC_CC_AL+2, ARC_CC_NE, ARC_CC_P, ARC_CC_N,
845 ARC_CC_C, ARC_CC_NC, ARC_CC_V, ARC_CC_NV,
846 ARC_CC_GT, ARC_CC_LE, ARC_CC_GE, ARC_CC_LT, ARC_CC_HI, ARC_CC_LS, ARC_CC_PNZ,
847 ARC_CC_LO = ARC_CC_C, ARC_CC_HS = ARC_CC_NC
848 };
849
850 #define ARC_INVERSE_CONDITION_CODE(X) ((X) ^ 1)
851
852 /* Returns the index of the ARC condition code string in
853 `arc_condition_codes'. COMPARISON should be an rtx like
854 `(eq (...) (...))'. */
855
856 static int
get_arc_condition_code(rtx comparison)857 get_arc_condition_code (rtx comparison)
858 {
859 switch (GET_MODE (XEXP (comparison, 0)))
860 {
861 case CCmode:
862 case SImode: /* For BRcc. */
863 switch (GET_CODE (comparison))
864 {
865 case EQ : return ARC_CC_EQ;
866 case NE : return ARC_CC_NE;
867 case GT : return ARC_CC_GT;
868 case LE : return ARC_CC_LE;
869 case GE : return ARC_CC_GE;
870 case LT : return ARC_CC_LT;
871 case GTU : return ARC_CC_HI;
872 case LEU : return ARC_CC_LS;
873 case LTU : return ARC_CC_LO;
874 case GEU : return ARC_CC_HS;
875 default : gcc_unreachable ();
876 }
877 case CC_ZNmode:
878 switch (GET_CODE (comparison))
879 {
880 case EQ : return ARC_CC_EQ;
881 case NE : return ARC_CC_NE;
882 case GE: return ARC_CC_P;
883 case LT: return ARC_CC_N;
884 case GT : return ARC_CC_PNZ;
885 default : gcc_unreachable ();
886 }
887 case CC_Zmode:
888 switch (GET_CODE (comparison))
889 {
890 case EQ : return ARC_CC_EQ;
891 case NE : return ARC_CC_NE;
892 default : gcc_unreachable ();
893 }
894 case CC_Cmode:
895 switch (GET_CODE (comparison))
896 {
897 case LTU : return ARC_CC_C;
898 case GEU : return ARC_CC_NC;
899 default : gcc_unreachable ();
900 }
901 case CC_FP_GTmode:
902 if (TARGET_ARGONAUT_SET && TARGET_SPFP)
903 switch (GET_CODE (comparison))
904 {
905 case GT : return ARC_CC_N;
906 case UNLE: return ARC_CC_P;
907 default : gcc_unreachable ();
908 }
909 else
910 switch (GET_CODE (comparison))
911 {
912 case GT : return ARC_CC_HI;
913 case UNLE : return ARC_CC_LS;
914 default : gcc_unreachable ();
915 }
916 case CC_FP_GEmode:
917 /* Same for FPX and non-FPX. */
918 switch (GET_CODE (comparison))
919 {
920 case GE : return ARC_CC_HS;
921 case UNLT : return ARC_CC_LO;
922 default : gcc_unreachable ();
923 }
924 case CC_FP_UNEQmode:
925 switch (GET_CODE (comparison))
926 {
927 case UNEQ : return ARC_CC_EQ;
928 case LTGT : return ARC_CC_NE;
929 default : gcc_unreachable ();
930 }
931 case CC_FP_ORDmode:
932 switch (GET_CODE (comparison))
933 {
934 case UNORDERED : return ARC_CC_C;
935 case ORDERED : return ARC_CC_NC;
936 default : gcc_unreachable ();
937 }
938 case CC_FPXmode:
939 switch (GET_CODE (comparison))
940 {
941 case EQ : return ARC_CC_EQ;
942 case NE : return ARC_CC_NE;
943 case UNORDERED : return ARC_CC_C;
944 case ORDERED : return ARC_CC_NC;
945 case LTGT : return ARC_CC_HI;
946 case UNEQ : return ARC_CC_LS;
947 default : gcc_unreachable ();
948 }
949 case CC_FPUmode:
950 switch (GET_CODE (comparison))
951 {
952 case EQ : return ARC_CC_EQ;
953 case NE : return ARC_CC_NE;
954 case GT : return ARC_CC_GT;
955 case GE : return ARC_CC_GE;
956 case LT : return ARC_CC_C;
957 case LE : return ARC_CC_LS;
958 case UNORDERED : return ARC_CC_V;
959 case ORDERED : return ARC_CC_NV;
960 case UNGT : return ARC_CC_HI;
961 case UNGE : return ARC_CC_HS;
962 case UNLT : return ARC_CC_LT;
963 case UNLE : return ARC_CC_LE;
964 /* UNEQ and LTGT do not have representation. */
965 case LTGT : /* Fall through. */
966 case UNEQ : /* Fall through. */
967 default : gcc_unreachable ();
968 }
969 case CC_FPU_UNEQmode:
970 switch (GET_CODE (comparison))
971 {
972 case LTGT : return ARC_CC_NE;
973 case UNEQ : return ARC_CC_EQ;
974 default : gcc_unreachable ();
975 }
976 default : gcc_unreachable ();
977 }
978 /*NOTREACHED*/
979 return (42);
980 }
981
982 /* Return true if COMPARISON has a short form that can accomodate OFFSET. */
983
984 bool
arc_short_comparison_p(rtx comparison,int offset)985 arc_short_comparison_p (rtx comparison, int offset)
986 {
987 gcc_assert (ARC_CC_NC == ARC_CC_HS);
988 gcc_assert (ARC_CC_C == ARC_CC_LO);
989 switch (get_arc_condition_code (comparison))
990 {
991 case ARC_CC_EQ: case ARC_CC_NE:
992 return offset >= -512 && offset <= 506;
993 case ARC_CC_GT: case ARC_CC_LE: case ARC_CC_GE: case ARC_CC_LT:
994 case ARC_CC_HI: case ARC_CC_LS: case ARC_CC_LO: case ARC_CC_HS:
995 return offset >= -64 && offset <= 58;
996 default:
997 return false;
998 }
999 }
1000
1001 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
1002 return the mode to be used for the comparison. */
1003
1004 machine_mode
arc_select_cc_mode(enum rtx_code op,rtx x,rtx y)1005 arc_select_cc_mode (enum rtx_code op, rtx x, rtx y)
1006 {
1007 machine_mode mode = GET_MODE (x);
1008 rtx x1;
1009
1010 /* For an operation that sets the condition codes as a side-effect, the
1011 C and V flags is not set as for cmp, so we can only use comparisons where
1012 this doesn't matter. (For LT and GE we can use "mi" and "pl"
1013 instead.) */
1014 /* ??? We could use "pnz" for greater than zero, however, we could then
1015 get into trouble because the comparison could not be reversed. */
1016 if (GET_MODE_CLASS (mode) == MODE_INT
1017 && y == const0_rtx
1018 && (op == EQ || op == NE
1019 || ((op == LT || op == GE) && GET_MODE_SIZE (GET_MODE (x)) <= 4)))
1020 return CC_ZNmode;
1021
1022 /* add.f for if (a+b) */
1023 if (mode == SImode
1024 && GET_CODE (y) == NEG
1025 && (op == EQ || op == NE))
1026 return CC_ZNmode;
1027
1028 /* Check if this is a test suitable for bxor.f . */
1029 if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y)
1030 && ((INTVAL (y) - 1) & INTVAL (y)) == 0
1031 && INTVAL (y))
1032 return CC_Zmode;
1033
1034 /* Check if this is a test suitable for add / bmsk.f . */
1035 if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y)
1036 && GET_CODE (x) == AND && CONST_INT_P ((x1 = XEXP (x, 1)))
1037 && ((INTVAL (x1) + 1) & INTVAL (x1)) == 0
1038 && (~INTVAL (x1) | INTVAL (y)) < 0
1039 && (~INTVAL (x1) | INTVAL (y)) > -0x800)
1040 return CC_Zmode;
1041
1042 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
1043 && GET_CODE (x) == PLUS
1044 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
1045 return CC_Cmode;
1046
1047 if (TARGET_ARGONAUT_SET
1048 && ((mode == SFmode && TARGET_SPFP) || (mode == DFmode && TARGET_DPFP)))
1049 switch (op)
1050 {
1051 case EQ: case NE: case UNEQ: case LTGT: case ORDERED: case UNORDERED:
1052 return CC_FPXmode;
1053 case LT: case UNGE: case GT: case UNLE:
1054 return CC_FP_GTmode;
1055 case LE: case UNGT: case GE: case UNLT:
1056 return CC_FP_GEmode;
1057 default: gcc_unreachable ();
1058 }
1059 else if (TARGET_HARD_FLOAT
1060 && ((mode == SFmode && TARGET_FP_SP_BASE)
1061 || (mode == DFmode && TARGET_FP_DP_BASE)))
1062 switch (op)
1063 {
1064 case EQ:
1065 case NE:
1066 case UNORDERED:
1067 case ORDERED:
1068 case UNLT:
1069 case UNLE:
1070 case UNGT:
1071 case UNGE:
1072 case LT:
1073 case LE:
1074 case GT:
1075 case GE:
1076 return CC_FPUmode;
1077
1078 case LTGT:
1079 case UNEQ:
1080 return CC_FPU_UNEQmode;
1081
1082 default:
1083 gcc_unreachable ();
1084 }
1085 else if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_OPTFPE)
1086 {
1087 switch (op)
1088 {
1089 case EQ: case NE: return CC_Zmode;
1090 case LT: case UNGE:
1091 case GT: case UNLE: return CC_FP_GTmode;
1092 case LE: case UNGT:
1093 case GE: case UNLT: return CC_FP_GEmode;
1094 case UNEQ: case LTGT: return CC_FP_UNEQmode;
1095 case ORDERED: case UNORDERED: return CC_FP_ORDmode;
1096 default: gcc_unreachable ();
1097 }
1098 }
1099 return CCmode;
1100 }
1101
1102 /* Vectors to keep interesting information about registers where it can easily
1103 be got. We use to use the actual mode value as the bit number, but there
1104 is (or may be) more than 32 modes now. Instead we use two tables: one
1105 indexed by hard register number, and one indexed by mode. */
1106
1107 /* The purpose of arc_mode_class is to shrink the range of modes so that
1108 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
1109 mapped into one arc_mode_class mode. */
1110
1111 enum arc_mode_class {
1112 C_MODE,
1113 S_MODE, D_MODE, T_MODE, O_MODE,
1114 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
1115 V_MODE
1116 };
1117
1118 /* Modes for condition codes. */
1119 #define C_MODES (1 << (int) C_MODE)
1120
1121 /* Modes for single-word and smaller quantities. */
1122 #define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
1123
1124 /* Modes for double-word and smaller quantities. */
1125 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
1126
1127 /* Mode for 8-byte DF values only. */
1128 #define DF_MODES (1 << DF_MODE)
1129
1130 /* Modes for quad-word and smaller quantities. */
1131 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
1132
1133 /* Modes for 128-bit vectors. */
1134 #define V_MODES (1 << (int) V_MODE)
1135
1136 /* Value is 1 if register/mode pair is acceptable on arc. */
1137
1138 unsigned int arc_hard_regno_mode_ok[] = {
1139 T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
1140 T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
1141 T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, D_MODES,
1142 D_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1143
1144 /* ??? Leave these as S_MODES for now. */
1145 S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1146 DF_MODES, 0, DF_MODES, 0, S_MODES, S_MODES, S_MODES, S_MODES,
1147 S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1148 S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, C_MODES, S_MODES,
1149
1150 V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1151 V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1152 V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1153 V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1154
1155 V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1156 V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1157 V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1158 V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1159
1160 S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1161 S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES
1162 };
1163
1164 unsigned int arc_mode_class [NUM_MACHINE_MODES];
1165
1166 enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER];
1167
1168 enum reg_class
arc_preferred_reload_class(rtx,enum reg_class cl)1169 arc_preferred_reload_class (rtx, enum reg_class cl)
1170 {
1171 if ((cl) == CHEAP_CORE_REGS || (cl) == WRITABLE_CORE_REGS)
1172 return GENERAL_REGS;
1173 return cl;
1174 }
1175
1176 /* Initialize the arc_mode_class array. */
1177
1178 static void
arc_init_reg_tables(void)1179 arc_init_reg_tables (void)
1180 {
1181 int i;
1182
1183 for (i = 0; i < NUM_MACHINE_MODES; i++)
1184 {
1185 machine_mode m = (machine_mode) i;
1186
1187 switch (GET_MODE_CLASS (m))
1188 {
1189 case MODE_INT:
1190 case MODE_PARTIAL_INT:
1191 case MODE_COMPLEX_INT:
1192 if (GET_MODE_SIZE (m) <= 4)
1193 arc_mode_class[i] = 1 << (int) S_MODE;
1194 else if (GET_MODE_SIZE (m) == 8)
1195 arc_mode_class[i] = 1 << (int) D_MODE;
1196 else if (GET_MODE_SIZE (m) == 16)
1197 arc_mode_class[i] = 1 << (int) T_MODE;
1198 else if (GET_MODE_SIZE (m) == 32)
1199 arc_mode_class[i] = 1 << (int) O_MODE;
1200 else
1201 arc_mode_class[i] = 0;
1202 break;
1203 case MODE_FLOAT:
1204 case MODE_COMPLEX_FLOAT:
1205 if (GET_MODE_SIZE (m) <= 4)
1206 arc_mode_class[i] = 1 << (int) SF_MODE;
1207 else if (GET_MODE_SIZE (m) == 8)
1208 arc_mode_class[i] = 1 << (int) DF_MODE;
1209 else if (GET_MODE_SIZE (m) == 16)
1210 arc_mode_class[i] = 1 << (int) TF_MODE;
1211 else if (GET_MODE_SIZE (m) == 32)
1212 arc_mode_class[i] = 1 << (int) OF_MODE;
1213 else
1214 arc_mode_class[i] = 0;
1215 break;
1216 case MODE_VECTOR_INT:
1217 arc_mode_class [i] = (1<< (int) V_MODE);
1218 break;
1219 case MODE_CC:
1220 default:
1221 /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so
1222 we must explicitly check for them here. */
1223 if (i == (int) CCmode || i == (int) CC_ZNmode || i == (int) CC_Zmode
1224 || i == (int) CC_Cmode
1225 || i == CC_FP_GTmode || i == CC_FP_GEmode || i == CC_FP_ORDmode
1226 || i == CC_FPUmode || i == CC_FPU_UNEQmode)
1227 arc_mode_class[i] = 1 << (int) C_MODE;
1228 else
1229 arc_mode_class[i] = 0;
1230 break;
1231 }
1232 }
1233 }
1234
1235 /* Core registers 56..59 are used for multiply extension options.
1236 The dsp option uses r56 and r57, these are then named acc1 and acc2.
1237 acc1 is the highpart, and acc2 the lowpart, so which register gets which
1238 number depends on endianness.
1239 The mul64 multiplier options use r57 for mlo, r58 for mmid and r59 for mhi.
1240 Because mlo / mhi form a 64 bit value, we use different gcc internal
1241 register numbers to make them form a register pair as the gcc internals
1242 know it. mmid gets number 57, if still available, and mlo / mhi get
1243 number 58 and 59, depending on endianness. We use DBX_REGISTER_NUMBER
1244 to map this back. */
1245 char rname56[5] = "r56";
1246 char rname57[5] = "r57";
1247 char rname58[5] = "r58";
1248 char rname59[5] = "r59";
1249 char rname29[7] = "ilink1";
1250 char rname30[7] = "ilink2";
1251
1252 static void
arc_conditional_register_usage(void)1253 arc_conditional_register_usage (void)
1254 {
1255 int regno;
1256 int i;
1257 int fix_start = 60, fix_end = 55;
1258
1259 if (TARGET_V2)
1260 {
1261 /* For ARCv2 the core register set is changed. */
1262 strcpy (rname29, "ilink");
1263 strcpy (rname30, "r30");
1264 fixed_regs[30] = call_used_regs[30] = 1;
1265 }
1266
1267 if (TARGET_MUL64_SET)
1268 {
1269 fix_start = 57;
1270 fix_end = 59;
1271
1272 /* We don't provide a name for mmed. In rtl / assembly resource lists,
1273 you are supposed to refer to it as mlo & mhi, e.g
1274 (zero_extract:SI (reg:DI 58) (const_int 32) (16)) .
1275 In an actual asm instruction, you are of course use mmed.
1276 The point of avoiding having a separate register for mmed is that
1277 this way, we don't have to carry clobbers of that reg around in every
1278 isntruction that modifies mlo and/or mhi. */
1279 strcpy (rname57, "");
1280 strcpy (rname58, TARGET_BIG_ENDIAN ? "mhi" : "mlo");
1281 strcpy (rname59, TARGET_BIG_ENDIAN ? "mlo" : "mhi");
1282 }
1283 if (TARGET_MULMAC_32BY16_SET)
1284 {
1285 fix_start = 56;
1286 fix_end = fix_end > 57 ? fix_end : 57;
1287 strcpy (rname56, TARGET_BIG_ENDIAN ? "acc1" : "acc2");
1288 strcpy (rname57, TARGET_BIG_ENDIAN ? "acc2" : "acc1");
1289 }
1290 for (regno = fix_start; regno <= fix_end; regno++)
1291 {
1292 if (!fixed_regs[regno])
1293 warning (0, "multiply option implies r%d is fixed", regno);
1294 fixed_regs [regno] = call_used_regs[regno] = 1;
1295 }
1296 if (TARGET_Q_CLASS)
1297 {
1298 reg_alloc_order[2] = 12;
1299 reg_alloc_order[3] = 13;
1300 reg_alloc_order[4] = 14;
1301 reg_alloc_order[5] = 15;
1302 reg_alloc_order[6] = 1;
1303 reg_alloc_order[7] = 0;
1304 reg_alloc_order[8] = 4;
1305 reg_alloc_order[9] = 5;
1306 reg_alloc_order[10] = 6;
1307 reg_alloc_order[11] = 7;
1308 reg_alloc_order[12] = 8;
1309 reg_alloc_order[13] = 9;
1310 reg_alloc_order[14] = 10;
1311 reg_alloc_order[15] = 11;
1312 }
1313 if (TARGET_SIMD_SET)
1314 {
1315 int i;
1316 for (i = ARC_FIRST_SIMD_VR_REG; i <= ARC_LAST_SIMD_VR_REG; i++)
1317 reg_alloc_order [i] = i;
1318 for (i = ARC_FIRST_SIMD_DMA_CONFIG_REG;
1319 i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++)
1320 reg_alloc_order [i] = i;
1321 }
1322 /* For ARC600, lp_count may not be read in an instruction
1323 following immediately after another one setting it to a new value.
1324 There was some discussion on how to enforce scheduling constraints for
1325 processors with missing interlocks on the gcc mailing list:
1326 http://gcc.gnu.org/ml/gcc/2008-05/msg00021.html .
1327 However, we can't actually use this approach, because for ARC the
1328 delay slot scheduling pass is active, which runs after
1329 machine_dependent_reorg. */
1330 if (TARGET_ARC600)
1331 CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT);
1332 else if (!TARGET_LP_WR_INTERLOCK)
1333 fixed_regs[LP_COUNT] = 1;
1334 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1335 if (!call_used_regs[regno])
1336 CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
1337 for (regno = 32; regno < 60; regno++)
1338 if (!fixed_regs[regno])
1339 SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], regno);
1340 if (!TARGET_ARC600_FAMILY)
1341 {
1342 for (regno = 32; regno <= 60; regno++)
1343 CLEAR_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], regno);
1344
1345 /* If they have used -ffixed-lp_count, make sure it takes
1346 effect. */
1347 if (fixed_regs[LP_COUNT])
1348 {
1349 CLEAR_HARD_REG_BIT (reg_class_contents[LPCOUNT_REG], LP_COUNT);
1350 CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT);
1351 CLEAR_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], LP_COUNT);
1352
1353 /* Instead of taking out SF_MODE like below, forbid it outright. */
1354 arc_hard_regno_mode_ok[60] = 0;
1355 }
1356 else
1357 arc_hard_regno_mode_ok[60] = 1 << (int) S_MODE;
1358 }
1359
1360 /* ARCHS has 64-bit data-path which makes use of the even-odd paired
1361 registers. */
1362 if (TARGET_HS)
1363 {
1364 for (regno = 1; regno < 32; regno +=2)
1365 {
1366 arc_hard_regno_mode_ok[regno] = S_MODES;
1367 }
1368 }
1369
1370 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1371 {
1372 if (i < 29)
1373 {
1374 if (TARGET_Q_CLASS && ((i <= 3) || ((i >= 12) && (i <= 15))))
1375 arc_regno_reg_class[i] = ARCOMPACT16_REGS;
1376 else
1377 arc_regno_reg_class[i] = GENERAL_REGS;
1378 }
1379 else if (i < 60)
1380 arc_regno_reg_class[i]
1381 = (fixed_regs[i]
1382 ? (TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i)
1383 ? CHEAP_CORE_REGS : ALL_CORE_REGS)
1384 : (((!TARGET_ARC600_FAMILY)
1385 && TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i))
1386 ? CHEAP_CORE_REGS : WRITABLE_CORE_REGS));
1387 else
1388 arc_regno_reg_class[i] = NO_REGS;
1389 }
1390
1391 /* ARCOMPACT16_REGS is empty, if TARGET_Q_CLASS has not been activated. */
1392 if (!TARGET_Q_CLASS)
1393 {
1394 CLEAR_HARD_REG_SET(reg_class_contents [ARCOMPACT16_REGS]);
1395 CLEAR_HARD_REG_SET(reg_class_contents [AC16_BASE_REGS]);
1396 }
1397
1398 gcc_assert (FIRST_PSEUDO_REGISTER >= 144);
1399
1400 /* Handle Special Registers. */
1401 arc_regno_reg_class[29] = LINK_REGS; /* ilink1 register. */
1402 if (!TARGET_V2)
1403 arc_regno_reg_class[30] = LINK_REGS; /* ilink2 register. */
1404 arc_regno_reg_class[31] = LINK_REGS; /* blink register. */
1405 arc_regno_reg_class[60] = LPCOUNT_REG;
1406 arc_regno_reg_class[61] = NO_REGS; /* CC_REG: must be NO_REGS. */
1407 arc_regno_reg_class[62] = GENERAL_REGS;
1408
1409 if (TARGET_DPFP)
1410 {
1411 for (i = 40; i < 44; ++i)
1412 {
1413 arc_regno_reg_class[i] = DOUBLE_REGS;
1414
1415 /* Unless they want us to do 'mov d1, 0x00000000' make sure
1416 no attempt is made to use such a register as a destination
1417 operand in *movdf_insn. */
1418 if (!TARGET_ARGONAUT_SET)
1419 {
1420 /* Make sure no 'c', 'w', 'W', or 'Rac' constraint is
1421 interpreted to mean they can use D1 or D2 in their insn. */
1422 CLEAR_HARD_REG_BIT(reg_class_contents[CHEAP_CORE_REGS ], i);
1423 CLEAR_HARD_REG_BIT(reg_class_contents[ALL_CORE_REGS ], i);
1424 CLEAR_HARD_REG_BIT(reg_class_contents[WRITABLE_CORE_REGS ], i);
1425 CLEAR_HARD_REG_BIT(reg_class_contents[MPY_WRITABLE_CORE_REGS], i);
1426 }
1427 }
1428 }
1429 else
1430 {
1431 /* Disable all DOUBLE_REGISTER settings,
1432 if not generating DPFP code. */
1433 arc_regno_reg_class[40] = ALL_REGS;
1434 arc_regno_reg_class[41] = ALL_REGS;
1435 arc_regno_reg_class[42] = ALL_REGS;
1436 arc_regno_reg_class[43] = ALL_REGS;
1437
1438 arc_hard_regno_mode_ok[40] = 0;
1439 arc_hard_regno_mode_ok[42] = 0;
1440
1441 CLEAR_HARD_REG_SET(reg_class_contents [DOUBLE_REGS]);
1442 }
1443
1444 if (TARGET_SIMD_SET)
1445 {
1446 gcc_assert (ARC_FIRST_SIMD_VR_REG == 64);
1447 gcc_assert (ARC_LAST_SIMD_VR_REG == 127);
1448
1449 for (i = ARC_FIRST_SIMD_VR_REG; i <= ARC_LAST_SIMD_VR_REG; i++)
1450 arc_regno_reg_class [i] = SIMD_VR_REGS;
1451
1452 gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_REG == 128);
1453 gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_IN_REG == 128);
1454 gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG == 136);
1455 gcc_assert (ARC_LAST_SIMD_DMA_CONFIG_REG == 143);
1456
1457 for (i = ARC_FIRST_SIMD_DMA_CONFIG_REG;
1458 i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++)
1459 arc_regno_reg_class [i] = SIMD_DMA_CONFIG_REGS;
1460 }
1461
1462 /* pc : r63 */
1463 arc_regno_reg_class[PROGRAM_COUNTER_REGNO] = GENERAL_REGS;
1464
1465 /*ARCV2 Accumulator. */
1466 if (TARGET_V2
1467 && (TARGET_FP_DP_FUSED || TARGET_FP_SP_FUSED))
1468 {
1469 arc_regno_reg_class[ACCL_REGNO] = WRITABLE_CORE_REGS;
1470 arc_regno_reg_class[ACCH_REGNO] = WRITABLE_CORE_REGS;
1471 SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], ACCL_REGNO);
1472 SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], ACCH_REGNO);
1473 SET_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], ACCL_REGNO);
1474 SET_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], ACCH_REGNO);
1475 arc_hard_regno_mode_ok[ACC_REG_FIRST] = D_MODES;
1476 }
1477 }
1478
1479 /* Handle an "interrupt" attribute; arguments as in
1480 struct attribute_spec.handler. */
1481
1482 static tree
arc_handle_interrupt_attribute(tree *,tree name,tree args,int,bool * no_add_attrs)1483 arc_handle_interrupt_attribute (tree *, tree name, tree args, int,
1484 bool *no_add_attrs)
1485 {
1486 gcc_assert (args);
1487
1488 tree value = TREE_VALUE (args);
1489
1490 if (TREE_CODE (value) != STRING_CST)
1491 {
1492 warning (OPT_Wattributes,
1493 "argument of %qE attribute is not a string constant",
1494 name);
1495 *no_add_attrs = true;
1496 }
1497 else if (strcmp (TREE_STRING_POINTER (value), "ilink1")
1498 && strcmp (TREE_STRING_POINTER (value), "ilink2")
1499 && !TARGET_V2)
1500 {
1501 warning (OPT_Wattributes,
1502 "argument of %qE attribute is not \"ilink1\" or \"ilink2\"",
1503 name);
1504 *no_add_attrs = true;
1505 }
1506 else if (TARGET_V2
1507 && strcmp (TREE_STRING_POINTER (value), "ilink"))
1508 {
1509 warning (OPT_Wattributes,
1510 "argument of %qE attribute is not \"ilink\"",
1511 name);
1512 *no_add_attrs = true;
1513 }
1514
1515 return NULL_TREE;
1516 }
1517
1518 /* Return zero if TYPE1 and TYPE are incompatible, one if they are compatible,
1519 and two if they are nearly compatible (which causes a warning to be
1520 generated). */
1521
1522 static int
arc_comp_type_attributes(const_tree type1,const_tree type2)1523 arc_comp_type_attributes (const_tree type1,
1524 const_tree type2)
1525 {
1526 int l1, l2, m1, m2, s1, s2;
1527
1528 /* Check for mismatch of non-default calling convention. */
1529 if (TREE_CODE (type1) != FUNCTION_TYPE)
1530 return 1;
1531
1532 /* Check for mismatched call attributes. */
1533 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
1534 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
1535 m1 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type1)) != NULL;
1536 m2 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type2)) != NULL;
1537 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
1538 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
1539
1540 /* Only bother to check if an attribute is defined. */
1541 if (l1 | l2 | m1 | m2 | s1 | s2)
1542 {
1543 /* If one type has an attribute, the other must have the same attribute. */
1544 if ((l1 != l2) || (m1 != m2) || (s1 != s2))
1545 return 0;
1546
1547 /* Disallow mixed attributes. */
1548 if (l1 + m1 + s1 > 1)
1549 return 0;
1550 }
1551
1552
1553 return 1;
1554 }
1555
1556 /* Set the default attributes for TYPE. */
1557
1558 void
arc_set_default_type_attributes(tree type ATTRIBUTE_UNUSED)1559 arc_set_default_type_attributes (tree type ATTRIBUTE_UNUSED)
1560 {
1561 gcc_unreachable();
1562 }
1563
1564 /* Misc. utilities. */
1565
1566 /* X and Y are two things to compare using CODE. Emit the compare insn and
1567 return the rtx for the cc reg in the proper mode. */
1568
1569 rtx
gen_compare_reg(rtx comparison,machine_mode omode)1570 gen_compare_reg (rtx comparison, machine_mode omode)
1571 {
1572 enum rtx_code code = GET_CODE (comparison);
1573 rtx x = XEXP (comparison, 0);
1574 rtx y = XEXP (comparison, 1);
1575 rtx tmp, cc_reg;
1576 machine_mode mode, cmode;
1577
1578
1579 cmode = GET_MODE (x);
1580 if (cmode == VOIDmode)
1581 cmode = GET_MODE (y);
1582 gcc_assert (cmode == SImode || cmode == SFmode || cmode == DFmode);
1583 if (cmode == SImode)
1584 {
1585 if (!register_operand (x, SImode))
1586 {
1587 if (register_operand (y, SImode))
1588 {
1589 tmp = x;
1590 x = y;
1591 y = tmp;
1592 code = swap_condition (code);
1593 }
1594 else
1595 x = copy_to_mode_reg (SImode, x);
1596 }
1597 if (GET_CODE (y) == SYMBOL_REF && flag_pic)
1598 y = copy_to_mode_reg (SImode, y);
1599 }
1600 else
1601 {
1602 x = force_reg (cmode, x);
1603 y = force_reg (cmode, y);
1604 }
1605 mode = SELECT_CC_MODE (code, x, y);
1606
1607 cc_reg = gen_rtx_REG (mode, CC_REG);
1608
1609 /* ??? FIXME (x-y)==0, as done by both cmpsfpx_raw and
1610 cmpdfpx_raw, is not a correct comparison for floats:
1611 http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
1612 */
1613 if (TARGET_ARGONAUT_SET
1614 && ((cmode == SFmode && TARGET_SPFP) || (cmode == DFmode && TARGET_DPFP)))
1615 {
1616 switch (code)
1617 {
1618 case NE: case EQ: case LT: case UNGE: case LE: case UNGT:
1619 case UNEQ: case LTGT: case ORDERED: case UNORDERED:
1620 break;
1621 case GT: case UNLE: case GE: case UNLT:
1622 code = swap_condition (code);
1623 tmp = x;
1624 x = y;
1625 y = tmp;
1626 break;
1627 default:
1628 gcc_unreachable ();
1629 }
1630 if (cmode == SFmode)
1631 {
1632 emit_insn (gen_cmpsfpx_raw (x, y));
1633 }
1634 else /* DFmode */
1635 {
1636 /* Accepts Dx regs directly by insns. */
1637 emit_insn (gen_cmpdfpx_raw (x, y));
1638 }
1639
1640 if (mode != CC_FPXmode)
1641 emit_insn (gen_rtx_SET (cc_reg,
1642 gen_rtx_COMPARE (mode,
1643 gen_rtx_REG (CC_FPXmode, 61),
1644 const0_rtx)));
1645 }
1646 else if (TARGET_HARD_FLOAT
1647 && ((cmode == SFmode && TARGET_FP_SP_BASE)
1648 || (cmode == DFmode && TARGET_FP_DP_BASE)))
1649 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
1650 else if (GET_MODE_CLASS (cmode) == MODE_FLOAT && TARGET_OPTFPE)
1651 {
1652 rtx op0 = gen_rtx_REG (cmode, 0);
1653 rtx op1 = gen_rtx_REG (cmode, GET_MODE_SIZE (cmode) / UNITS_PER_WORD);
1654 bool swap = false;
1655
1656 switch (code)
1657 {
1658 case NE: case EQ: case GT: case UNLE: case GE: case UNLT:
1659 case UNEQ: case LTGT: case ORDERED: case UNORDERED:
1660 break;
1661 case LT: case UNGE: case LE: case UNGT:
1662 code = swap_condition (code);
1663 swap = true;
1664 break;
1665 default:
1666 gcc_unreachable ();
1667 }
1668 if (currently_expanding_to_rtl)
1669 {
1670 if (swap)
1671 {
1672 tmp = x;
1673 x = y;
1674 y = tmp;
1675 }
1676 emit_move_insn (op0, x);
1677 emit_move_insn (op1, y);
1678 }
1679 else
1680 {
1681 gcc_assert (rtx_equal_p (op0, x));
1682 gcc_assert (rtx_equal_p (op1, y));
1683 if (swap)
1684 {
1685 op0 = y;
1686 op1 = x;
1687 }
1688 }
1689 emit_insn (gen_cmp_float (cc_reg, gen_rtx_COMPARE (mode, op0, op1)));
1690 }
1691 else
1692 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
1693 return gen_rtx_fmt_ee (code, omode, cc_reg, const0_rtx);
1694 }
1695
1696 /* Return true if VALUE, a const_double, will fit in a limm (4 byte number).
1697 We assume the value can be either signed or unsigned. */
1698
1699 bool
arc_double_limm_p(rtx value)1700 arc_double_limm_p (rtx value)
1701 {
1702 HOST_WIDE_INT low, high;
1703
1704 gcc_assert (GET_CODE (value) == CONST_DOUBLE);
1705
1706 if (TARGET_DPFP)
1707 return true;
1708
1709 low = CONST_DOUBLE_LOW (value);
1710 high = CONST_DOUBLE_HIGH (value);
1711
1712 if (low & 0x80000000)
1713 {
1714 return (((unsigned HOST_WIDE_INT) low <= 0xffffffff && high == 0)
1715 || (((low & - (unsigned HOST_WIDE_INT) 0x80000000)
1716 == - (unsigned HOST_WIDE_INT) 0x80000000)
1717 && high == -1));
1718 }
1719 else
1720 {
1721 return (unsigned HOST_WIDE_INT) low <= 0x7fffffff && high == 0;
1722 }
1723 }
1724
1725 /* Do any needed setup for a variadic function. For the ARC, we must
1726 create a register parameter block, and then copy any anonymous arguments
1727 in registers to memory.
1728
1729 CUM has not been updated for the last named argument which has type TYPE
1730 and mode MODE, and we rely on this fact. */
1731
1732 static void
arc_setup_incoming_varargs(cumulative_args_t args_so_far,machine_mode mode,tree type,int * pretend_size,int no_rtl)1733 arc_setup_incoming_varargs (cumulative_args_t args_so_far,
1734 machine_mode mode, tree type,
1735 int *pretend_size, int no_rtl)
1736 {
1737 int first_anon_arg;
1738 CUMULATIVE_ARGS next_cum;
1739
1740 /* We must treat `__builtin_va_alist' as an anonymous arg. */
1741
1742 next_cum = *get_cumulative_args (args_so_far);
1743 arc_function_arg_advance (pack_cumulative_args (&next_cum),
1744 mode, type, true);
1745 first_anon_arg = next_cum;
1746
1747 if (FUNCTION_ARG_REGNO_P (first_anon_arg))
1748 {
1749 /* First anonymous (unnamed) argument is in a reg. */
1750
1751 /* Note that first_reg_offset < MAX_ARC_PARM_REGS. */
1752 int first_reg_offset = first_anon_arg;
1753
1754 if (!no_rtl)
1755 {
1756 rtx regblock
1757 = gen_rtx_MEM (BLKmode, plus_constant (Pmode, arg_pointer_rtx,
1758 FIRST_PARM_OFFSET (0)));
1759 move_block_from_reg (first_reg_offset, regblock,
1760 MAX_ARC_PARM_REGS - first_reg_offset);
1761 }
1762
1763 *pretend_size
1764 = ((MAX_ARC_PARM_REGS - first_reg_offset ) * UNITS_PER_WORD);
1765 }
1766 }
1767
1768 /* Cost functions. */
1769
1770 /* Provide the costs of an addressing mode that contains ADDR.
1771 If ADDR is not a valid address, its cost is irrelevant. */
1772
1773 int
arc_address_cost(rtx addr,machine_mode,addr_space_t,bool speed)1774 arc_address_cost (rtx addr, machine_mode, addr_space_t, bool speed)
1775 {
1776 switch (GET_CODE (addr))
1777 {
1778 case REG :
1779 return speed || satisfies_constraint_Rcq (addr) ? 0 : 1;
1780 case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC:
1781 case PRE_MODIFY: case POST_MODIFY:
1782 return !speed;
1783
1784 case LABEL_REF :
1785 case SYMBOL_REF :
1786 case CONST :
1787 /* Most likely needs a LIMM. */
1788 return COSTS_N_INSNS (1);
1789
1790 case PLUS :
1791 {
1792 register rtx plus0 = XEXP (addr, 0);
1793 register rtx plus1 = XEXP (addr, 1);
1794
1795 if (GET_CODE (plus0) != REG
1796 && (GET_CODE (plus0) != MULT
1797 || !CONST_INT_P (XEXP (plus0, 1))
1798 || (INTVAL (XEXP (plus0, 1)) != 2
1799 && INTVAL (XEXP (plus0, 1)) != 4)))
1800 break;
1801
1802 switch (GET_CODE (plus1))
1803 {
1804 case CONST_INT :
1805 return (!RTX_OK_FOR_OFFSET_P (SImode, plus1)
1806 ? COSTS_N_INSNS (1)
1807 : speed
1808 ? 0
1809 : (satisfies_constraint_Rcq (plus0)
1810 && satisfies_constraint_O (plus1))
1811 ? 0
1812 : 1);
1813 case REG:
1814 return (speed < 1 ? 0
1815 : (satisfies_constraint_Rcq (plus0)
1816 && satisfies_constraint_Rcq (plus1))
1817 ? 0 : 1);
1818 case CONST :
1819 case SYMBOL_REF :
1820 case LABEL_REF :
1821 return COSTS_N_INSNS (1);
1822 default:
1823 break;
1824 }
1825 break;
1826 }
1827 default:
1828 break;
1829 }
1830
1831 return 4;
1832 }
1833
1834 /* Emit instruction X with the frame related bit set. */
1835
1836 static rtx
frame_insn(rtx x)1837 frame_insn (rtx x)
1838 {
1839 x = emit_insn (x);
1840 RTX_FRAME_RELATED_P (x) = 1;
1841 return x;
1842 }
1843
1844 /* Emit a frame insn to move SRC to DST. */
1845
1846 static rtx
frame_move(rtx dst,rtx src)1847 frame_move (rtx dst, rtx src)
1848 {
1849 rtx tmp = gen_rtx_SET (dst, src);
1850 RTX_FRAME_RELATED_P (tmp) = 1;
1851 return frame_insn (tmp);
1852 }
1853
1854 /* Like frame_move, but add a REG_INC note for REG if ADDR contains an
1855 auto increment address, or is zero. */
1856
1857 static rtx
frame_move_inc(rtx dst,rtx src,rtx reg,rtx addr)1858 frame_move_inc (rtx dst, rtx src, rtx reg, rtx addr)
1859 {
1860 rtx insn = frame_move (dst, src);
1861
1862 if (!addr
1863 || GET_CODE (addr) == PRE_DEC || GET_CODE (addr) == POST_INC
1864 || GET_CODE (addr) == PRE_MODIFY || GET_CODE (addr) == POST_MODIFY)
1865 add_reg_note (insn, REG_INC, reg);
1866 return insn;
1867 }
1868
1869 /* Emit a frame insn which adjusts a frame address register REG by OFFSET. */
1870
1871 static rtx
frame_add(rtx reg,HOST_WIDE_INT offset)1872 frame_add (rtx reg, HOST_WIDE_INT offset)
1873 {
1874 gcc_assert ((offset & 0x3) == 0);
1875 if (!offset)
1876 return NULL_RTX;
1877 return frame_move (reg, plus_constant (Pmode, reg, offset));
1878 }
1879
1880 /* Emit a frame insn which adjusts stack pointer by OFFSET. */
1881
1882 static rtx
frame_stack_add(HOST_WIDE_INT offset)1883 frame_stack_add (HOST_WIDE_INT offset)
1884 {
1885 return frame_add (stack_pointer_rtx, offset);
1886 }
1887
1888 /* Traditionally, we push saved registers first in the prologue,
1889 then we allocate the rest of the frame - and reverse in the epilogue.
1890 This has still its merits for ease of debugging, or saving code size
1891 or even execution time if the stack frame is so large that some accesses
1892 can't be encoded anymore with offsets in the instruction code when using
1893 a different scheme.
1894 Also, it would be a good starting point if we got instructions to help
1895 with register save/restore.
1896
1897 However, often stack frames are small, and the pushing / popping has
1898 some costs:
1899 - the stack modification prevents a lot of scheduling.
1900 - frame allocation / deallocation needs extra instructions.
1901 - unless we know that we compile ARC700 user code, we need to put
1902 a memory barrier after frame allocation / before deallocation to
1903 prevent interrupts clobbering our data in the frame.
1904 In particular, we don't have any such guarantees for library functions,
1905 which tend to, on the other hand, to have small frames.
1906
1907 Thus, for small frames, we'd like to use a different scheme:
1908 - The frame is allocated in full with the first prologue instruction,
1909 and deallocated in full with the last epilogue instruction.
1910 Thus, the instructions in-betwen can be freely scheduled.
1911 - If the function has no outgoing arguments on the stack, we can allocate
1912 one register save slot at the top of the stack. This register can then
1913 be saved simultanously with frame allocation, and restored with
1914 frame deallocation.
1915 This register can be picked depending on scheduling considerations,
1916 although same though should go into having some set of registers
1917 to be potentially lingering after a call, and others to be available
1918 immediately - i.e. in the absence of interprocedual optimization, we
1919 can use an ABI-like convention for register allocation to reduce
1920 stalls after function return. */
1921 /* Function prologue/epilogue handlers. */
1922
1923 /* ARCompact stack frames look like:
1924
1925 Before call After call
1926 high +-----------------------+ +-----------------------+
1927 mem | reg parm save area | | reg parm save area |
1928 | only created for | | only created for |
1929 | variable arg fns | | variable arg fns |
1930 AP +-----------------------+ +-----------------------+
1931 | return addr register | | return addr register |
1932 | (if required) | | (if required) |
1933 +-----------------------+ +-----------------------+
1934 | | | |
1935 | reg save area | | reg save area |
1936 | | | |
1937 +-----------------------+ +-----------------------+
1938 | frame pointer | | frame pointer |
1939 | (if required) | | (if required) |
1940 FP +-----------------------+ +-----------------------+
1941 | | | |
1942 | local/temp variables | | local/temp variables |
1943 | | | |
1944 +-----------------------+ +-----------------------+
1945 | | | |
1946 | arguments on stack | | arguments on stack |
1947 | | | |
1948 SP +-----------------------+ +-----------------------+
1949 | reg parm save area |
1950 | only created for |
1951 | variable arg fns |
1952 AP +-----------------------+
1953 | return addr register |
1954 | (if required) |
1955 +-----------------------+
1956 | |
1957 | reg save area |
1958 | |
1959 +-----------------------+
1960 | frame pointer |
1961 | (if required) |
1962 FP +-----------------------+
1963 | |
1964 | local/temp variables |
1965 | |
1966 +-----------------------+
1967 | |
1968 | arguments on stack |
1969 low | |
1970 mem SP +-----------------------+
1971
1972 Notes:
1973 1) The "reg parm save area" does not exist for non variable argument fns.
1974 The "reg parm save area" can be eliminated completely if we created our
1975 own va-arc.h, but that has tradeoffs as well (so it's not done). */
1976
1977 /* Structure to be filled in by arc_compute_frame_size with register
1978 save masks, and offsets for the current function. */
1979 struct GTY (()) arc_frame_info
1980 {
1981 unsigned int total_size; /* # bytes that the entire frame takes up. */
1982 unsigned int extra_size; /* # bytes of extra stuff. */
1983 unsigned int pretend_size; /* # bytes we push and pretend caller did. */
1984 unsigned int args_size; /* # bytes that outgoing arguments take up. */
1985 unsigned int reg_size; /* # bytes needed to store regs. */
1986 unsigned int var_size; /* # bytes that variables take up. */
1987 unsigned int reg_offset; /* Offset from new sp to store regs. */
1988 unsigned int gmask; /* Mask of saved gp registers. */
1989 int initialized; /* Nonzero if frame size already calculated. */
1990 short millicode_start_reg;
1991 short millicode_end_reg;
1992 bool save_return_addr;
1993 };
1994
1995 /* Defining data structures for per-function information. */
1996
1997 typedef struct GTY (()) machine_function
1998 {
1999 enum arc_function_type fn_type;
2000 struct arc_frame_info frame_info;
2001 /* To keep track of unalignment caused by short insns. */
2002 int unalign;
2003 int force_short_suffix; /* Used when disgorging return delay slot insns. */
2004 const char *size_reason;
2005 struct arc_ccfsm ccfsm_current;
2006 /* Map from uid to ccfsm state during branch shortening. */
2007 rtx ccfsm_current_insn;
2008 char arc_reorg_started;
2009 char prescan_initialized;
2010 } machine_function;
2011
2012 /* Type of function DECL.
2013
2014 The result is cached. To reset the cache at the end of a function,
2015 call with DECL = NULL_TREE. */
2016
2017 enum arc_function_type
arc_compute_function_type(struct function * fun)2018 arc_compute_function_type (struct function *fun)
2019 {
2020 tree decl = fun->decl;
2021 tree a;
2022 enum arc_function_type fn_type = fun->machine->fn_type;
2023
2024 if (fn_type != ARC_FUNCTION_UNKNOWN)
2025 return fn_type;
2026
2027 /* Assume we have a normal function (not an interrupt handler). */
2028 fn_type = ARC_FUNCTION_NORMAL;
2029
2030 /* Now see if this is an interrupt handler. */
2031 for (a = DECL_ATTRIBUTES (decl);
2032 a;
2033 a = TREE_CHAIN (a))
2034 {
2035 tree name = TREE_PURPOSE (a), args = TREE_VALUE (a);
2036
2037 if (name == get_identifier ("interrupt")
2038 && list_length (args) == 1
2039 && TREE_CODE (TREE_VALUE (args)) == STRING_CST)
2040 {
2041 tree value = TREE_VALUE (args);
2042
2043 if (!strcmp (TREE_STRING_POINTER (value), "ilink1")
2044 || !strcmp (TREE_STRING_POINTER (value), "ilink"))
2045 fn_type = ARC_FUNCTION_ILINK1;
2046 else if (!strcmp (TREE_STRING_POINTER (value), "ilink2"))
2047 fn_type = ARC_FUNCTION_ILINK2;
2048 else
2049 gcc_unreachable ();
2050 break;
2051 }
2052 }
2053
2054 return fun->machine->fn_type = fn_type;
2055 }
2056
2057 #define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM))
2058 #define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM))
2059
2060 /* Tell prologue and epilogue if register REGNO should be saved / restored.
2061 The return address and frame pointer are treated separately.
2062 Don't consider them here.
2063 Addition for pic: The gp register needs to be saved if the current
2064 function changes it to access gotoff variables.
2065 FIXME: This will not be needed if we used some arbitrary register
2066 instead of r26.
2067 */
2068 #define MUST_SAVE_REGISTER(regno, interrupt_p) \
2069 (((regno) != RETURN_ADDR_REGNUM && (regno) != FRAME_POINTER_REGNUM \
2070 && (df_regs_ever_live_p (regno) && (!call_used_regs[regno] || interrupt_p))) \
2071 || (flag_pic && crtl->uses_pic_offset_table \
2072 && regno == PIC_OFFSET_TABLE_REGNUM) )
2073
2074 #define MUST_SAVE_RETURN_ADDR \
2075 (cfun->machine->frame_info.save_return_addr)
2076
2077 /* Return non-zero if there are registers to be saved or loaded using
2078 millicode thunks. We can only use consecutive sequences starting
2079 with r13, and not going beyond r25.
2080 GMASK is a bitmask of registers to save. This function sets
2081 FRAME->millicod_start_reg .. FRAME->millicode_end_reg to the range
2082 of registers to be saved / restored with a millicode call. */
2083
2084 static int
arc_compute_millicode_save_restore_regs(unsigned int gmask,struct arc_frame_info * frame)2085 arc_compute_millicode_save_restore_regs (unsigned int gmask,
2086 struct arc_frame_info *frame)
2087 {
2088 int regno;
2089
2090 int start_reg = 13, end_reg = 25;
2091
2092 for (regno = start_reg; regno <= end_reg && (gmask & (1L << regno));)
2093 regno++;
2094 end_reg = regno - 1;
2095 /* There is no point in using millicode thunks if we don't save/restore
2096 at least three registers. For non-leaf functions we also have the
2097 blink restore. */
2098 if (regno - start_reg >= 3 - (crtl->is_leaf == 0))
2099 {
2100 frame->millicode_start_reg = 13;
2101 frame->millicode_end_reg = regno - 1;
2102 return 1;
2103 }
2104 return 0;
2105 }
2106
2107 /* Return the bytes needed to compute the frame pointer from the current
2108 stack pointer.
2109
2110 SIZE is the size needed for local variables. */
2111
2112 unsigned int
arc_compute_frame_size(int size)2113 arc_compute_frame_size (int size) /* size = # of var. bytes allocated. */
2114 {
2115 int regno;
2116 unsigned int total_size, var_size, args_size, pretend_size, extra_size;
2117 unsigned int reg_size, reg_offset;
2118 unsigned int gmask;
2119 enum arc_function_type fn_type;
2120 int interrupt_p;
2121 struct arc_frame_info *frame_info = &cfun->machine->frame_info;
2122
2123 size = ARC_STACK_ALIGN (size);
2124
2125 /* 1) Size of locals and temporaries */
2126 var_size = size;
2127
2128 /* 2) Size of outgoing arguments */
2129 args_size = crtl->outgoing_args_size;
2130
2131 /* 3) Calculate space needed for saved registers.
2132 ??? We ignore the extension registers for now. */
2133
2134 /* See if this is an interrupt handler. Call used registers must be saved
2135 for them too. */
2136
2137 reg_size = 0;
2138 gmask = 0;
2139 fn_type = arc_compute_function_type (cfun);
2140 interrupt_p = ARC_INTERRUPT_P (fn_type);
2141
2142 for (regno = 0; regno <= 31; regno++)
2143 {
2144 if (MUST_SAVE_REGISTER (regno, interrupt_p))
2145 {
2146 reg_size += UNITS_PER_WORD;
2147 gmask |= 1 << regno;
2148 }
2149 }
2150
2151 /* 4) Space for back trace data structure.
2152 <return addr reg size> (if required) + <fp size> (if required). */
2153 frame_info->save_return_addr
2154 = (!crtl->is_leaf || df_regs_ever_live_p (RETURN_ADDR_REGNUM));
2155 /* Saving blink reg in case of leaf function for millicode thunk calls. */
2156 if (optimize_size && !TARGET_NO_MILLICODE_THUNK_SET)
2157 {
2158 if (arc_compute_millicode_save_restore_regs (gmask, frame_info))
2159 frame_info->save_return_addr = true;
2160 }
2161
2162 extra_size = 0;
2163 if (MUST_SAVE_RETURN_ADDR)
2164 extra_size = 4;
2165 if (frame_pointer_needed)
2166 extra_size += 4;
2167
2168 /* 5) Space for variable arguments passed in registers */
2169 pretend_size = crtl->args.pretend_args_size;
2170
2171 /* Ensure everything before the locals is aligned appropriately. */
2172 {
2173 unsigned int extra_plus_reg_size;
2174 unsigned int extra_plus_reg_size_aligned;
2175
2176 extra_plus_reg_size = extra_size + reg_size;
2177 extra_plus_reg_size_aligned = ARC_STACK_ALIGN(extra_plus_reg_size);
2178 reg_size = extra_plus_reg_size_aligned - extra_size;
2179 }
2180
2181 /* Compute total frame size. */
2182 total_size = var_size + args_size + extra_size + pretend_size + reg_size;
2183
2184 total_size = ARC_STACK_ALIGN (total_size);
2185
2186 /* Compute offset of register save area from stack pointer:
2187 Frame: pretend_size <blink> reg_size <fp> var_size args_size <--sp
2188 */
2189 reg_offset = (total_size - (pretend_size + reg_size + extra_size)
2190 + (frame_pointer_needed ? 4 : 0));
2191
2192 /* Save computed information. */
2193 frame_info->total_size = total_size;
2194 frame_info->extra_size = extra_size;
2195 frame_info->pretend_size = pretend_size;
2196 frame_info->var_size = var_size;
2197 frame_info->args_size = args_size;
2198 frame_info->reg_size = reg_size;
2199 frame_info->reg_offset = reg_offset;
2200 frame_info->gmask = gmask;
2201 frame_info->initialized = reload_completed;
2202
2203 /* Ok, we're done. */
2204 return total_size;
2205 }
2206
2207 /* Common code to save/restore registers. */
2208 /* BASE_REG is the base register to use for addressing and to adjust.
2209 GMASK is a bitmask of general purpose registers to save/restore.
2210 epilogue_p 0: prologue 1:epilogue 2:epilogue, sibling thunk
2211 If *FIRST_OFFSET is non-zero, add it first to BASE_REG - preferably
2212 using a pre-modify for the first memory access. *FIRST_OFFSET is then
2213 zeroed. */
2214
2215 static void
arc_save_restore(rtx base_reg,unsigned int gmask,int epilogue_p,int * first_offset)2216 arc_save_restore (rtx base_reg,
2217 unsigned int gmask, int epilogue_p, int *first_offset)
2218 {
2219 unsigned int offset = 0;
2220 int regno;
2221 struct arc_frame_info *frame = &cfun->machine->frame_info;
2222 rtx sibthunk_insn = NULL_RTX;
2223
2224 if (gmask)
2225 {
2226 /* Millicode thunks implementation:
2227 Generates calls to millicodes for registers starting from r13 to r25
2228 Present Limitations:
2229 - Only one range supported. The remaining regs will have the ordinary
2230 st and ld instructions for store and loads. Hence a gmask asking
2231 to store r13-14, r16-r25 will only generate calls to store and
2232 load r13 to r14 while store and load insns will be generated for
2233 r16 to r25 in the prologue and epilogue respectively.
2234
2235 - Presently library only supports register ranges starting from r13.
2236 */
2237 if (epilogue_p == 2 || frame->millicode_end_reg > 14)
2238 {
2239 int start_call = frame->millicode_start_reg;
2240 int end_call = frame->millicode_end_reg;
2241 int n_regs = end_call - start_call + 1;
2242 int i = 0, r, off = 0;
2243 rtx insn;
2244 rtx ret_addr = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
2245
2246 if (*first_offset)
2247 {
2248 /* "reg_size" won't be more than 127 . */
2249 gcc_assert (epilogue_p || abs (*first_offset) <= 127);
2250 frame_add (base_reg, *first_offset);
2251 *first_offset = 0;
2252 }
2253 insn = gen_rtx_PARALLEL
2254 (VOIDmode, rtvec_alloc ((epilogue_p == 2) + n_regs + 1));
2255 if (epilogue_p == 2)
2256 i += 2;
2257 else
2258 XVECEXP (insn, 0, n_regs) = gen_rtx_CLOBBER (VOIDmode, ret_addr);
2259 for (r = start_call; r <= end_call; r++, off += UNITS_PER_WORD, i++)
2260 {
2261 rtx reg = gen_rtx_REG (SImode, r);
2262 rtx mem
2263 = gen_frame_mem (SImode, plus_constant (Pmode, base_reg, off));
2264
2265 if (epilogue_p)
2266 XVECEXP (insn, 0, i) = gen_rtx_SET (reg, mem);
2267 else
2268 XVECEXP (insn, 0, i) = gen_rtx_SET (mem, reg);
2269 gmask = gmask & ~(1L << r);
2270 }
2271 if (epilogue_p == 2)
2272 sibthunk_insn = insn;
2273 else
2274 {
2275 insn = frame_insn (insn);
2276 if (epilogue_p)
2277 for (r = start_call; r <= end_call; r++)
2278 {
2279 rtx reg = gen_rtx_REG (SImode, r);
2280 add_reg_note (insn, REG_CFA_RESTORE, reg);
2281 }
2282 }
2283 offset += off;
2284 }
2285
2286 for (regno = 0; regno <= 31; regno++)
2287 {
2288 enum machine_mode mode = SImode;
2289 bool found = false;
2290
2291 if (TARGET_LL64
2292 && (regno % 2 == 0)
2293 && ((gmask & (1L << regno)) != 0)
2294 && ((gmask & (1L << (regno+1))) != 0))
2295 {
2296 found = true;
2297 mode = DImode;
2298 }
2299 else if ((gmask & (1L << regno)) != 0)
2300 {
2301 found = true;
2302 mode = SImode;
2303 }
2304
2305 if (found)
2306 {
2307 rtx reg = gen_rtx_REG (mode, regno);
2308 rtx addr, mem;
2309 int cfa_adjust = *first_offset;
2310
2311 if (*first_offset)
2312 {
2313 gcc_assert (!offset);
2314 addr = plus_constant (Pmode, base_reg, *first_offset);
2315 addr = gen_rtx_PRE_MODIFY (Pmode, base_reg, addr);
2316 *first_offset = 0;
2317 }
2318 else
2319 {
2320 gcc_assert (SMALL_INT (offset));
2321 addr = plus_constant (Pmode, base_reg, offset);
2322 }
2323 mem = gen_frame_mem (mode, addr);
2324 if (epilogue_p)
2325 {
2326 rtx insn =
2327 frame_move_inc (reg, mem, base_reg, addr);
2328 add_reg_note (insn, REG_CFA_RESTORE, reg);
2329 if (cfa_adjust)
2330 {
2331 enum reg_note note = REG_CFA_ADJUST_CFA;
2332 add_reg_note (insn, note,
2333 gen_rtx_SET (stack_pointer_rtx,
2334 plus_constant (Pmode,
2335 stack_pointer_rtx,
2336 cfa_adjust)));
2337 }
2338 }
2339 else
2340 frame_move_inc (mem, reg, base_reg, addr);
2341 offset += UNITS_PER_WORD;
2342 if (mode == DImode)
2343 {
2344 offset += UNITS_PER_WORD;
2345 ++regno;
2346 }
2347 } /* if */
2348 } /* for */
2349 }/* if */
2350 if (sibthunk_insn)
2351 {
2352 int start_call = frame->millicode_start_reg;
2353 int end_call = frame->millicode_end_reg;
2354 int r;
2355
2356 rtx r12 = gen_rtx_REG (Pmode, 12);
2357
2358 frame_insn (gen_rtx_SET (r12, GEN_INT (offset)));
2359 XVECEXP (sibthunk_insn, 0, 0) = ret_rtx;
2360 XVECEXP (sibthunk_insn, 0, 1)
2361 = gen_rtx_SET (stack_pointer_rtx,
2362 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r12));
2363 sibthunk_insn = emit_jump_insn (sibthunk_insn);
2364 RTX_FRAME_RELATED_P (sibthunk_insn) = 1;
2365
2366 /* Would be nice if we could do this earlier, when the PARALLEL
2367 is populated, but these need to be attached after the
2368 emit. */
2369 for (r = start_call; r <= end_call; r++)
2370 {
2371 rtx reg = gen_rtx_REG (SImode, r);
2372 add_reg_note (sibthunk_insn, REG_CFA_RESTORE, reg);
2373 }
2374 }
2375 } /* arc_save_restore */
2376
2377
2378 int arc_return_address_regs[4]
2379 = {0, RETURN_ADDR_REGNUM, ILINK1_REGNUM, ILINK2_REGNUM};
2380
2381 /* Set up the stack and frame pointer (if desired) for the function. */
2382
2383 void
arc_expand_prologue(void)2384 arc_expand_prologue (void)
2385 {
2386 int size = get_frame_size ();
2387 unsigned int gmask = cfun->machine->frame_info.gmask;
2388 /* unsigned int frame_pointer_offset;*/
2389 unsigned int frame_size_to_allocate;
2390 /* (FIXME: The first store will use a PRE_MODIFY; this will usually be r13.
2391 Change the stack layout so that we rather store a high register with the
2392 PRE_MODIFY, thus enabling more short insn generation.) */
2393 int first_offset = 0;
2394
2395 size = ARC_STACK_ALIGN (size);
2396
2397 /* Compute/get total frame size. */
2398 size = (!cfun->machine->frame_info.initialized
2399 ? arc_compute_frame_size (size)
2400 : cfun->machine->frame_info.total_size);
2401
2402 if (flag_stack_usage_info)
2403 current_function_static_stack_size = size;
2404
2405 /* Keep track of frame size to be allocated. */
2406 frame_size_to_allocate = size;
2407
2408 /* These cases shouldn't happen. Catch them now. */
2409 gcc_assert (!(size == 0 && gmask));
2410
2411 /* Allocate space for register arguments if this is a variadic function. */
2412 if (cfun->machine->frame_info.pretend_size != 0)
2413 {
2414 /* Ensure pretend_size is maximum of 8 * word_size. */
2415 gcc_assert (cfun->machine->frame_info.pretend_size <= 32);
2416
2417 frame_stack_add (-(HOST_WIDE_INT)cfun->machine->frame_info.pretend_size);
2418 frame_size_to_allocate -= cfun->machine->frame_info.pretend_size;
2419 }
2420
2421 /* The home-grown ABI says link register is saved first. */
2422 if (MUST_SAVE_RETURN_ADDR)
2423 {
2424 rtx ra = gen_rtx_REG (SImode, RETURN_ADDR_REGNUM);
2425 rtx mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
2426
2427 frame_move_inc (mem, ra, stack_pointer_rtx, 0);
2428 frame_size_to_allocate -= UNITS_PER_WORD;
2429
2430 } /* MUST_SAVE_RETURN_ADDR */
2431
2432 /* Save any needed call-saved regs (and call-used if this is an
2433 interrupt handler) for ARCompact ISA. */
2434 if (cfun->machine->frame_info.reg_size)
2435 {
2436 first_offset = -cfun->machine->frame_info.reg_size;
2437 /* N.B. FRAME_POINTER_MASK and RETURN_ADDR_MASK are cleared in gmask. */
2438 arc_save_restore (stack_pointer_rtx, gmask, 0, &first_offset);
2439 frame_size_to_allocate -= cfun->machine->frame_info.reg_size;
2440 }
2441
2442
2443 /* Save frame pointer if needed. */
2444 if (frame_pointer_needed)
2445 {
2446 rtx addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2447 GEN_INT (-UNITS_PER_WORD + first_offset));
2448 rtx mem = gen_frame_mem (Pmode, gen_rtx_PRE_MODIFY (Pmode,
2449 stack_pointer_rtx,
2450 addr));
2451 frame_move_inc (mem, frame_pointer_rtx, stack_pointer_rtx, 0);
2452 frame_size_to_allocate -= UNITS_PER_WORD;
2453 first_offset = 0;
2454 frame_move (frame_pointer_rtx, stack_pointer_rtx);
2455 }
2456
2457 /* ??? We don't handle the case where the saved regs are more than 252
2458 bytes away from sp. This can be handled by decrementing sp once, saving
2459 the regs, and then decrementing it again. The epilogue doesn't have this
2460 problem as the `ld' insn takes reg+limm values (though it would be more
2461 efficient to avoid reg+limm). */
2462
2463 frame_size_to_allocate -= first_offset;
2464 /* Allocate the stack frame. */
2465 if (frame_size_to_allocate > 0)
2466 frame_stack_add ((HOST_WIDE_INT) 0 - frame_size_to_allocate);
2467
2468 /* Setup the gp register, if needed. */
2469 if (crtl->uses_pic_offset_table)
2470 arc_finalize_pic ();
2471 }
2472
2473 /* Do any necessary cleanup after a function to restore stack, frame,
2474 and regs. */
2475
2476 void
arc_expand_epilogue(int sibcall_p)2477 arc_expand_epilogue (int sibcall_p)
2478 {
2479 int size = get_frame_size ();
2480 enum arc_function_type fn_type = arc_compute_function_type (cfun);
2481
2482 size = ARC_STACK_ALIGN (size);
2483 size = (!cfun->machine->frame_info.initialized
2484 ? arc_compute_frame_size (size)
2485 : cfun->machine->frame_info.total_size);
2486
2487 unsigned int pretend_size = cfun->machine->frame_info.pretend_size;
2488 unsigned int frame_size;
2489 unsigned int size_to_deallocate;
2490 int restored;
2491 int can_trust_sp_p = !cfun->calls_alloca;
2492 int first_offset = 0;
2493 int millicode_p = cfun->machine->frame_info.millicode_end_reg > 0;
2494 rtx insn;
2495
2496 size_to_deallocate = size;
2497
2498 frame_size = size - (pretend_size +
2499 cfun->machine->frame_info.reg_size +
2500 cfun->machine->frame_info.extra_size);
2501
2502 /* ??? There are lots of optimizations that can be done here.
2503 EG: Use fp to restore regs if it's closer.
2504 Maybe in time we'll do them all. For now, always restore regs from
2505 sp, but don't restore sp if we don't have to. */
2506
2507 if (!can_trust_sp_p)
2508 gcc_assert (frame_pointer_needed);
2509
2510 /* Restore stack pointer to the beginning of saved register area for
2511 ARCompact ISA. */
2512 if (frame_size)
2513 {
2514 if (frame_pointer_needed)
2515 frame_move (stack_pointer_rtx, frame_pointer_rtx);
2516 else
2517 first_offset = frame_size;
2518 size_to_deallocate -= frame_size;
2519 }
2520 else if (!can_trust_sp_p)
2521 frame_stack_add (-frame_size);
2522
2523
2524 /* Restore any saved registers. */
2525 if (frame_pointer_needed)
2526 {
2527 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
2528
2529 insn = frame_move_inc (frame_pointer_rtx, gen_frame_mem (Pmode, addr),
2530 stack_pointer_rtx, 0);
2531 add_reg_note (insn, REG_CFA_RESTORE, frame_pointer_rtx);
2532 add_reg_note (insn, REG_CFA_DEF_CFA,
2533 plus_constant (SImode, stack_pointer_rtx,
2534 4));
2535 size_to_deallocate -= UNITS_PER_WORD;
2536 }
2537
2538 /* Load blink after the calls to thunk calls in case of optimize size. */
2539 if (millicode_p)
2540 {
2541 int sibthunk_p = (!sibcall_p
2542 && fn_type == ARC_FUNCTION_NORMAL
2543 && !cfun->machine->frame_info.pretend_size);
2544
2545 gcc_assert (!(cfun->machine->frame_info.gmask
2546 & (FRAME_POINTER_MASK | RETURN_ADDR_MASK)));
2547 arc_save_restore (stack_pointer_rtx,
2548 cfun->machine->frame_info.gmask,
2549 1 + sibthunk_p, &first_offset);
2550 if (sibthunk_p)
2551 return;
2552 }
2553 /* If we are to restore registers, and first_offset would require
2554 a limm to be encoded in a PRE_MODIFY, yet we can add it with a
2555 fast add to the stack pointer, do this now. */
2556 if ((!SMALL_INT (first_offset)
2557 && cfun->machine->frame_info.gmask
2558 && ((TARGET_ARC700 && !optimize_size)
2559 ? first_offset <= 0x800
2560 : satisfies_constraint_C2a (GEN_INT (first_offset))))
2561 /* Also do this if we have both gprs and return
2562 address to restore, and they both would need a LIMM. */
2563 || (MUST_SAVE_RETURN_ADDR
2564 && !SMALL_INT ((cfun->machine->frame_info.reg_size + first_offset) >> 2)
2565 && cfun->machine->frame_info.gmask))
2566 {
2567 frame_stack_add (first_offset);
2568 first_offset = 0;
2569 }
2570 if (MUST_SAVE_RETURN_ADDR)
2571 {
2572 rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
2573 int ra_offs = cfun->machine->frame_info.reg_size + first_offset;
2574 rtx addr = plus_constant (Pmode, stack_pointer_rtx, ra_offs);
2575 HOST_WIDE_INT cfa_adjust = 0;
2576
2577 /* If the load of blink would need a LIMM, but we can add
2578 the offset quickly to sp, do the latter. */
2579 if (!SMALL_INT (ra_offs >> 2)
2580 && !cfun->machine->frame_info.gmask
2581 && ((TARGET_ARC700 && !optimize_size)
2582 ? ra_offs <= 0x800
2583 : satisfies_constraint_C2a (GEN_INT (ra_offs))))
2584 {
2585 size_to_deallocate -= ra_offs - first_offset;
2586 first_offset = 0;
2587 frame_stack_add (ra_offs);
2588 ra_offs = 0;
2589 addr = stack_pointer_rtx;
2590 }
2591 /* See if we can combine the load of the return address with the
2592 final stack adjustment.
2593 We need a separate load if there are still registers to
2594 restore. We also want a separate load if the combined insn
2595 would need a limm, but a separate load doesn't. */
2596 if (ra_offs
2597 && !cfun->machine->frame_info.gmask
2598 && (SMALL_INT (ra_offs) || !SMALL_INT (ra_offs >> 2)))
2599 {
2600 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, addr);
2601 cfa_adjust = ra_offs;
2602 first_offset = 0;
2603 size_to_deallocate -= cfun->machine->frame_info.reg_size;
2604 }
2605 else if (!ra_offs && size_to_deallocate == UNITS_PER_WORD)
2606 {
2607 addr = gen_rtx_POST_INC (Pmode, addr);
2608 cfa_adjust = GET_MODE_SIZE (Pmode);
2609 size_to_deallocate = 0;
2610 }
2611
2612 insn = frame_move_inc (ra, gen_frame_mem (Pmode, addr),
2613 stack_pointer_rtx, addr);
2614 if (cfa_adjust)
2615 {
2616 enum reg_note note = REG_CFA_ADJUST_CFA;
2617
2618 add_reg_note (insn, note,
2619 gen_rtx_SET (stack_pointer_rtx,
2620 plus_constant (SImode, stack_pointer_rtx,
2621 cfa_adjust)));
2622 }
2623 add_reg_note (insn, REG_CFA_RESTORE, ra);
2624 }
2625
2626 if (!millicode_p)
2627 {
2628 if (cfun->machine->frame_info.reg_size)
2629 arc_save_restore (stack_pointer_rtx,
2630 /* The zeroing of these two bits is unnecessary, but leave this in for clarity. */
2631 cfun->machine->frame_info.gmask
2632 & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK), 1, &first_offset);
2633 }
2634
2635
2636 /* The rest of this function does the following:
2637 ARCompact : handle epilogue_delay, restore sp (phase-2), return
2638 */
2639
2640 /* Keep track of how much of the stack pointer we've restored.
2641 It makes the following a lot more readable. */
2642 size_to_deallocate += first_offset;
2643 restored = size - size_to_deallocate;
2644
2645 if (size > restored)
2646 frame_stack_add (size - restored);
2647
2648 /* Emit the return instruction. */
2649 if (sibcall_p == FALSE)
2650 emit_jump_insn (gen_simple_return ());
2651 }
2652
2653 /* Return the offset relative to the stack pointer where the return address
2654 is stored, or -1 if it is not stored. */
2655
2656 int
arc_return_slot_offset()2657 arc_return_slot_offset ()
2658 {
2659 struct arc_frame_info *afi = &cfun->machine->frame_info;
2660
2661 return (afi->save_return_addr
2662 ? afi->total_size - afi->pretend_size - afi->extra_size : -1);
2663 }
2664
2665 /* PIC */
2666
2667 /* Emit special PIC prologues and epilogues. */
2668 /* If the function has any GOTOFF relocations, then the GOTBASE
2669 register has to be setup in the prologue
2670 The instruction needed at the function start for setting up the
2671 GOTBASE register is
2672 add rdest, pc,
2673 ----------------------------------------------------------
2674 The rtl to be emitted for this should be:
2675 set (reg basereg)
2676 (plus (reg pc)
2677 (const (unspec (symref _DYNAMIC) 3)))
2678 ---------------------------------------------------------- */
2679
2680 static void
arc_finalize_pic(void)2681 arc_finalize_pic (void)
2682 {
2683 rtx pat;
2684 rtx baseptr_rtx = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM);
2685
2686 if (crtl->uses_pic_offset_table == 0)
2687 return;
2688
2689 gcc_assert (flag_pic != 0);
2690
2691 pat = gen_rtx_SYMBOL_REF (Pmode, "_DYNAMIC");
2692 pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pat), ARC_UNSPEC_GOT);
2693 pat = gen_rtx_CONST (Pmode, pat);
2694
2695 pat = gen_rtx_SET (baseptr_rtx, pat);
2696
2697 emit_insn (pat);
2698 }
2699
2700 /* !TARGET_BARREL_SHIFTER support. */
2701 /* Emit a shift insn to set OP0 to OP1 shifted by OP2; CODE specifies what
2702 kind of shift. */
2703
2704 void
emit_shift(enum rtx_code code,rtx op0,rtx op1,rtx op2)2705 emit_shift (enum rtx_code code, rtx op0, rtx op1, rtx op2)
2706 {
2707 rtx shift = gen_rtx_fmt_ee (code, SImode, op1, op2);
2708 rtx pat
2709 = ((shift4_operator (shift, SImode) ? gen_shift_si3 : gen_shift_si3_loop)
2710 (op0, op1, op2, shift));
2711 emit_insn (pat);
2712 }
2713
2714 /* Output the assembler code for doing a shift.
2715 We go to a bit of trouble to generate efficient code as the ARC601 only has
2716 single bit shifts. This is taken from the h8300 port. We only have one
2717 mode of shifting and can't access individual bytes like the h8300 can, so
2718 this is greatly simplified (at the expense of not generating hyper-
2719 efficient code).
2720
2721 This function is not used if the variable shift insns are present. */
2722
2723 /* FIXME: This probably can be done using a define_split in arc.md.
2724 Alternately, generate rtx rather than output instructions. */
2725
2726 const char *
output_shift(rtx * operands)2727 output_shift (rtx *operands)
2728 {
2729 /* static int loopend_lab;*/
2730 rtx shift = operands[3];
2731 machine_mode mode = GET_MODE (shift);
2732 enum rtx_code code = GET_CODE (shift);
2733 const char *shift_one;
2734
2735 gcc_assert (mode == SImode);
2736
2737 switch (code)
2738 {
2739 case ASHIFT: shift_one = "add %0,%1,%1"; break;
2740 case ASHIFTRT: shift_one = "asr %0,%1"; break;
2741 case LSHIFTRT: shift_one = "lsr %0,%1"; break;
2742 default: gcc_unreachable ();
2743 }
2744
2745 if (GET_CODE (operands[2]) != CONST_INT)
2746 {
2747 output_asm_insn ("and.f lp_count,%2, 0x1f", operands);
2748 goto shiftloop;
2749 }
2750 else
2751 {
2752 int n;
2753
2754 n = INTVAL (operands[2]);
2755
2756 /* Only consider the lower 5 bits of the shift count. */
2757 n = n & 0x1f;
2758
2759 /* First see if we can do them inline. */
2760 /* ??? We could get better scheduling & shorter code (using short insns)
2761 by using splitters. Alas, that'd be even more verbose. */
2762 if (code == ASHIFT && n <= 9 && n > 2
2763 && dest_reg_operand (operands[4], SImode))
2764 {
2765 output_asm_insn ("mov %4,0\n\tadd3 %0,%4,%1", operands);
2766 for (n -=3 ; n >= 3; n -= 3)
2767 output_asm_insn ("add3 %0,%4,%0", operands);
2768 if (n == 2)
2769 output_asm_insn ("add2 %0,%4,%0", operands);
2770 else if (n)
2771 output_asm_insn ("add %0,%0,%0", operands);
2772 }
2773 else if (n <= 4)
2774 {
2775 while (--n >= 0)
2776 {
2777 output_asm_insn (shift_one, operands);
2778 operands[1] = operands[0];
2779 }
2780 }
2781 /* See if we can use a rotate/and. */
2782 else if (n == BITS_PER_WORD - 1)
2783 {
2784 switch (code)
2785 {
2786 case ASHIFT :
2787 output_asm_insn ("and %0,%1,1\n\tror %0,%0", operands);
2788 break;
2789 case ASHIFTRT :
2790 /* The ARC doesn't have a rol insn. Use something else. */
2791 output_asm_insn ("add.f 0,%1,%1\n\tsbc %0,%0,%0", operands);
2792 break;
2793 case LSHIFTRT :
2794 /* The ARC doesn't have a rol insn. Use something else. */
2795 output_asm_insn ("add.f 0,%1,%1\n\trlc %0,0", operands);
2796 break;
2797 default:
2798 break;
2799 }
2800 }
2801 else if (n == BITS_PER_WORD - 2 && dest_reg_operand (operands[4], SImode))
2802 {
2803 switch (code)
2804 {
2805 case ASHIFT :
2806 output_asm_insn ("and %0,%1,3\n\tror %0,%0\n\tror %0,%0", operands);
2807 break;
2808 case ASHIFTRT :
2809 #if 1 /* Need some scheduling comparisons. */
2810 output_asm_insn ("add.f %4,%1,%1\n\tsbc %0,%0,%0\n\t"
2811 "add.f 0,%4,%4\n\trlc %0,%0", operands);
2812 #else
2813 output_asm_insn ("add.f %4,%1,%1\n\tbxor %0,%4,31\n\t"
2814 "sbc.f %0,%0,%4\n\trlc %0,%0", operands);
2815 #endif
2816 break;
2817 case LSHIFTRT :
2818 #if 1
2819 output_asm_insn ("add.f %4,%1,%1\n\trlc %0,0\n\t"
2820 "add.f 0,%4,%4\n\trlc %0,%0", operands);
2821 #else
2822 output_asm_insn ("add.f %0,%1,%1\n\trlc.f %0,0\n\t"
2823 "and %0,%0,1\n\trlc %0,%0", operands);
2824 #endif
2825 break;
2826 default:
2827 break;
2828 }
2829 }
2830 else if (n == BITS_PER_WORD - 3 && code == ASHIFT)
2831 output_asm_insn ("and %0,%1,7\n\tror %0,%0\n\tror %0,%0\n\tror %0,%0",
2832 operands);
2833 /* Must loop. */
2834 else
2835 {
2836 operands[2] = GEN_INT (n);
2837 output_asm_insn ("mov.f lp_count, %2", operands);
2838
2839 shiftloop:
2840 {
2841 output_asm_insn ("lpnz\t2f", operands);
2842 output_asm_insn (shift_one, operands);
2843 output_asm_insn ("nop", operands);
2844 fprintf (asm_out_file, "2:\t%s end single insn loop\n",
2845 ASM_COMMENT_START);
2846 }
2847 }
2848 }
2849
2850 return "";
2851 }
2852
2853 /* Nested function support. */
2854
2855 /* Directly store VALUE into memory object BLOCK at OFFSET. */
2856
2857 static void
emit_store_direct(rtx block,int offset,int value)2858 emit_store_direct (rtx block, int offset, int value)
2859 {
2860 emit_insn (gen_store_direct (adjust_address (block, SImode, offset),
2861 force_reg (SImode,
2862 gen_int_mode (value, SImode))));
2863 }
2864
2865 /* Emit RTL insns to initialize the variable parts of a trampoline.
2866 FNADDR is an RTX for the address of the function's pure code.
2867 CXT is an RTX for the static chain value for the function. */
2868 /* With potentially multiple shared objects loaded, and multiple stacks
2869 present for multiple thereds where trampolines might reside, a simple
2870 range check will likely not suffice for the profiler to tell if a callee
2871 is a trampoline. We a speedier check by making the trampoline start at
2872 an address that is not 4-byte aligned.
2873 A trampoline looks like this:
2874
2875 nop_s 0x78e0
2876 entry:
2877 ld_s r12,[pcl,12] 0xd403
2878 ld r11,[pcl,12] 0x170c 700b
2879 j_s [r12] 0x7c00
2880 nop_s 0x78e0
2881
2882 The fastest trampoline to execute for trampolines within +-8KB of CTX
2883 would be:
2884 add2 r11,pcl,s12
2885 j [limm] 0x20200f80 limm
2886 and that would also be faster to write to the stack by computing the offset
2887 from CTX to TRAMP at compile time. However, it would really be better to
2888 get rid of the high cost of cache invalidation when generating trampolines,
2889 which requires that the code part of trampolines stays constant, and
2890 additionally either
2891 - making sure that no executable code but trampolines is on the stack,
2892 no icache entries linger for the area of the stack from when before the
2893 stack was allocated, and allocating trampolines in trampoline-only
2894 cache lines
2895 or
2896 - allocate trampolines fram a special pool of pre-allocated trampolines. */
2897
2898 static void
arc_initialize_trampoline(rtx tramp,tree fndecl,rtx cxt)2899 arc_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt)
2900 {
2901 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
2902
2903 emit_store_direct (tramp, 0, TARGET_BIG_ENDIAN ? 0x78e0d403 : 0xd40378e0);
2904 emit_store_direct (tramp, 4, TARGET_BIG_ENDIAN ? 0x170c700b : 0x700b170c);
2905 emit_store_direct (tramp, 8, TARGET_BIG_ENDIAN ? 0x7c0078e0 : 0x78e07c00);
2906 emit_move_insn (adjust_address (tramp, SImode, 12), fnaddr);
2907 emit_move_insn (adjust_address (tramp, SImode, 16), cxt);
2908 emit_insn (gen_flush_icache (adjust_address (tramp, SImode, 0)));
2909 }
2910
2911 /* Allow the profiler to easily distinguish trampolines from normal
2912 functions. */
2913
2914 static rtx
arc_trampoline_adjust_address(rtx addr)2915 arc_trampoline_adjust_address (rtx addr)
2916 {
2917 return plus_constant (Pmode, addr, 2);
2918 }
2919
2920 /* This is set briefly to 1 when we output a ".as" address modifer, and then
2921 reset when we output the scaled address. */
2922 static int output_scaled = 0;
2923
2924 /* Print operand X (an rtx) in assembler syntax to file FILE.
2925 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
2926 For `%' followed by punctuation, CODE is the punctuation and X is null. */
2927 /* In final.c:output_asm_insn:
2928 'l' : label
2929 'a' : address
2930 'c' : constant address if CONSTANT_ADDRESS_P
2931 'n' : negative
2932 Here:
2933 'Z': log2(x+1)-1
2934 'z': log2
2935 'M': log2(~x)
2936 '#': condbranch delay slot suffix
2937 '*': jump delay slot suffix
2938 '?' : nonjump-insn suffix for conditional execution or short instruction
2939 '!' : jump / call suffix for conditional execution or short instruction
2940 '`': fold constant inside unary o-perator, re-recognize, and emit.
2941 'd'
2942 'D'
2943 'R': Second word
2944 'S'
2945 'B': Branch comparison operand - suppress sda reference
2946 'H': Most significant word
2947 'L': Least significant word
2948 'A': ASCII decimal representation of floating point value
2949 'U': Load/store update or scaling indicator
2950 'V': cache bypass indicator for volatile
2951 'P'
2952 'F'
2953 '^'
2954 'O': Operator
2955 'o': original symbol - no @ prepending. */
2956
2957 void
arc_print_operand(FILE * file,rtx x,int code)2958 arc_print_operand (FILE *file, rtx x, int code)
2959 {
2960 switch (code)
2961 {
2962 case 'Z':
2963 if (GET_CODE (x) == CONST_INT)
2964 fprintf (file, "%d",exact_log2(INTVAL (x) + 1) - 1 );
2965 else
2966 output_operand_lossage ("invalid operand to %%Z code");
2967
2968 return;
2969
2970 case 'z':
2971 if (GET_CODE (x) == CONST_INT)
2972 fprintf (file, "%d",exact_log2(INTVAL (x)) );
2973 else
2974 output_operand_lossage ("invalid operand to %%z code");
2975
2976 return;
2977
2978 case 'M':
2979 if (GET_CODE (x) == CONST_INT)
2980 fprintf (file, "%d",exact_log2(~INTVAL (x)) );
2981 else
2982 output_operand_lossage ("invalid operand to %%M code");
2983
2984 return;
2985
2986 case '#' :
2987 /* Conditional branches depending on condition codes.
2988 Note that this is only for branches that were known to depend on
2989 condition codes before delay slot scheduling;
2990 out-of-range brcc / bbit expansions should use '*'.
2991 This distinction is important because of the different
2992 allowable delay slot insns and the output of the delay suffix
2993 for TARGET_AT_DBR_COND_EXEC. */
2994 case '*' :
2995 /* Unconditional branches / branches not depending on condition codes.
2996 This could also be a CALL_INSN.
2997 Output the appropriate delay slot suffix. */
2998 if (final_sequence && final_sequence->len () != 1)
2999 {
3000 rtx_insn *jump = final_sequence->insn (0);
3001 rtx_insn *delay = final_sequence->insn (1);
3002
3003 /* For TARGET_PAD_RETURN we might have grabbed the delay insn. */
3004 if (delay->deleted ())
3005 return;
3006 if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump))
3007 fputs (INSN_FROM_TARGET_P (delay) ? ".d"
3008 : TARGET_AT_DBR_CONDEXEC && code == '#' ? ".d"
3009 : get_attr_type (jump) == TYPE_RETURN && code == '#' ? ""
3010 : ".nd",
3011 file);
3012 else
3013 fputs (".d", file);
3014 }
3015 return;
3016 case '?' : /* with leading "." */
3017 case '!' : /* without leading "." */
3018 /* This insn can be conditionally executed. See if the ccfsm machinery
3019 says it should be conditionalized.
3020 If it shouldn't, we'll check the compact attribute if this insn
3021 has a short variant, which may be used depending on code size and
3022 alignment considerations. */
3023 if (current_insn_predicate)
3024 arc_ccfsm_current.cc
3025 = get_arc_condition_code (current_insn_predicate);
3026 if (ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current))
3027 {
3028 /* Is this insn in a delay slot sequence? */
3029 if (!final_sequence || XVECLEN (final_sequence, 0) < 2
3030 || current_insn_predicate
3031 || CALL_P (final_sequence->insn (0))
3032 || simplejump_p (final_sequence->insn (0)))
3033 {
3034 /* This insn isn't in a delay slot sequence, or conditionalized
3035 independently of its position in a delay slot. */
3036 fprintf (file, "%s%s",
3037 code == '?' ? "." : "",
3038 arc_condition_codes[arc_ccfsm_current.cc]);
3039 /* If this is a jump, there are still short variants. However,
3040 only beq_s / bne_s have the same offset range as b_s,
3041 and the only short conditional returns are jeq_s and jne_s. */
3042 if (code == '!'
3043 && (arc_ccfsm_current.cc == ARC_CC_EQ
3044 || arc_ccfsm_current.cc == ARC_CC_NE
3045 || 0 /* FIXME: check if branch in 7 bit range. */))
3046 output_short_suffix (file);
3047 }
3048 else if (code == '!') /* Jump with delay slot. */
3049 fputs (arc_condition_codes[arc_ccfsm_current.cc], file);
3050 else /* An Instruction in a delay slot of a jump or call. */
3051 {
3052 rtx jump = XVECEXP (final_sequence, 0, 0);
3053 rtx insn = XVECEXP (final_sequence, 0, 1);
3054
3055 /* If the insn is annulled and is from the target path, we need
3056 to inverse the condition test. */
3057 if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump))
3058 {
3059 if (INSN_FROM_TARGET_P (insn))
3060 fprintf (file, "%s%s",
3061 code == '?' ? "." : "",
3062 arc_condition_codes[ARC_INVERSE_CONDITION_CODE (arc_ccfsm_current.cc)]);
3063 else
3064 fprintf (file, "%s%s",
3065 code == '?' ? "." : "",
3066 arc_condition_codes[arc_ccfsm_current.cc]);
3067 if (arc_ccfsm_current.state == 5)
3068 arc_ccfsm_current.state = 0;
3069 }
3070 else
3071 /* This insn is executed for either path, so don't
3072 conditionalize it at all. */
3073 output_short_suffix (file);
3074
3075 }
3076 }
3077 else
3078 output_short_suffix (file);
3079 return;
3080 case'`':
3081 /* FIXME: fold constant inside unary operator, re-recognize, and emit. */
3082 gcc_unreachable ();
3083 case 'd' :
3084 fputs (arc_condition_codes[get_arc_condition_code (x)], file);
3085 return;
3086 case 'D' :
3087 fputs (arc_condition_codes[ARC_INVERSE_CONDITION_CODE
3088 (get_arc_condition_code (x))],
3089 file);
3090 return;
3091 case 'R' :
3092 /* Write second word of DImode or DFmode reference,
3093 register or memory. */
3094 if (GET_CODE (x) == REG)
3095 fputs (reg_names[REGNO (x)+1], file);
3096 else if (GET_CODE (x) == MEM)
3097 {
3098 fputc ('[', file);
3099
3100 /* Handle possible auto-increment. For PRE_INC / PRE_DEC /
3101 PRE_MODIFY, we will have handled the first word already;
3102 For POST_INC / POST_DEC / POST_MODIFY, the access to the
3103 first word will be done later. In either case, the access
3104 to the first word will do the modify, and we only have
3105 to add an offset of four here. */
3106 if (GET_CODE (XEXP (x, 0)) == PRE_INC
3107 || GET_CODE (XEXP (x, 0)) == PRE_DEC
3108 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY
3109 || GET_CODE (XEXP (x, 0)) == POST_INC
3110 || GET_CODE (XEXP (x, 0)) == POST_DEC
3111 || GET_CODE (XEXP (x, 0)) == POST_MODIFY)
3112 output_address (VOIDmode,
3113 plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 4));
3114 else if (output_scaled)
3115 {
3116 rtx addr = XEXP (x, 0);
3117 int size = GET_MODE_SIZE (GET_MODE (x));
3118
3119 output_address (VOIDmode,
3120 plus_constant (Pmode, XEXP (addr, 0),
3121 ((INTVAL (XEXP (addr, 1)) + 4)
3122 >> (size == 2 ? 1 : 2))));
3123 output_scaled = 0;
3124 }
3125 else
3126 output_address (VOIDmode,
3127 plus_constant (Pmode, XEXP (x, 0), 4));
3128 fputc (']', file);
3129 }
3130 else
3131 output_operand_lossage ("invalid operand to %%R code");
3132 return;
3133 case 'S' :
3134 /* FIXME: remove %S option. */
3135 break;
3136 case 'B' /* Branch or other LIMM ref - must not use sda references. */ :
3137 if (CONSTANT_P (x))
3138 {
3139 output_addr_const (file, x);
3140 return;
3141 }
3142 break;
3143 case 'H' :
3144 case 'L' :
3145 if (GET_CODE (x) == REG)
3146 {
3147 /* L = least significant word, H = most significant word. */
3148 if ((WORDS_BIG_ENDIAN != 0) ^ (code == 'L'))
3149 fputs (reg_names[REGNO (x)], file);
3150 else
3151 fputs (reg_names[REGNO (x)+1], file);
3152 }
3153 else if (GET_CODE (x) == CONST_INT
3154 || GET_CODE (x) == CONST_DOUBLE)
3155 {
3156 rtx first, second;
3157
3158 split_double (x, &first, &second);
3159
3160 if((WORDS_BIG_ENDIAN) == 0)
3161 fprintf (file, "0x%08" PRIx64,
3162 code == 'L' ? INTVAL (first) : INTVAL (second));
3163 else
3164 fprintf (file, "0x%08" PRIx64,
3165 code == 'L' ? INTVAL (second) : INTVAL (first));
3166
3167
3168 }
3169 else
3170 output_operand_lossage ("invalid operand to %%H/%%L code");
3171 return;
3172 case 'A' :
3173 {
3174 char str[30];
3175
3176 gcc_assert (GET_CODE (x) == CONST_DOUBLE
3177 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT);
3178
3179 real_to_decimal (str, CONST_DOUBLE_REAL_VALUE (x), sizeof (str), 0, 1);
3180 fprintf (file, "%s", str);
3181 return;
3182 }
3183 case 'U' :
3184 /* Output a load/store with update indicator if appropriate. */
3185 if (GET_CODE (x) == MEM)
3186 {
3187 rtx addr = XEXP (x, 0);
3188 switch (GET_CODE (addr))
3189 {
3190 case PRE_INC: case PRE_DEC: case PRE_MODIFY:
3191 fputs (".a", file); break;
3192 case POST_INC: case POST_DEC: case POST_MODIFY:
3193 fputs (".ab", file); break;
3194 case PLUS:
3195 /* Are we using a scaled index? */
3196 if (GET_CODE (XEXP (addr, 0)) == MULT)
3197 fputs (".as", file);
3198 /* Can we use a scaled offset? */
3199 else if (CONST_INT_P (XEXP (addr, 1))
3200 && GET_MODE_SIZE (GET_MODE (x)) > 1
3201 && (!(INTVAL (XEXP (addr, 1))
3202 & (GET_MODE_SIZE (GET_MODE (x)) - 1) & 3))
3203 /* Does it make a difference? */
3204 && !SMALL_INT_RANGE(INTVAL (XEXP (addr, 1)),
3205 GET_MODE_SIZE (GET_MODE (x)) - 2, 0))
3206 {
3207 fputs (".as", file);
3208 output_scaled = 1;
3209 }
3210 break;
3211 case REG:
3212 break;
3213 default:
3214 gcc_assert (CONSTANT_P (addr)); break;
3215 }
3216 }
3217 else
3218 output_operand_lossage ("invalid operand to %%U code");
3219 return;
3220 case 'V' :
3221 /* Output cache bypass indicator for a load/store insn. Volatile memory
3222 refs are defined to use the cache bypass mechanism. */
3223 if (GET_CODE (x) == MEM)
3224 {
3225 if (MEM_VOLATILE_P (x) && !TARGET_VOLATILE_CACHE_SET )
3226 fputs (".di", file);
3227 }
3228 else
3229 output_operand_lossage ("invalid operand to %%V code");
3230 return;
3231 /* plt code. */
3232 case 'P':
3233 case 0 :
3234 /* Do nothing special. */
3235 break;
3236 case 'F':
3237 fputs (reg_names[REGNO (x)]+1, file);
3238 return;
3239 case '^':
3240 /* This punctuation character is needed because label references are
3241 printed in the output template using %l. This is a front end
3242 character, and when we want to emit a '@' before it, we have to use
3243 this '^'. */
3244
3245 fputc('@',file);
3246 return;
3247 case 'O':
3248 /* Output an operator. */
3249 switch (GET_CODE (x))
3250 {
3251 case PLUS: fputs ("add", file); return;
3252 case SS_PLUS: fputs ("adds", file); return;
3253 case AND: fputs ("and", file); return;
3254 case IOR: fputs ("or", file); return;
3255 case XOR: fputs ("xor", file); return;
3256 case MINUS: fputs ("sub", file); return;
3257 case SS_MINUS: fputs ("subs", file); return;
3258 case ASHIFT: fputs ("asl", file); return;
3259 case ASHIFTRT: fputs ("asr", file); return;
3260 case LSHIFTRT: fputs ("lsr", file); return;
3261 case ROTATERT: fputs ("ror", file); return;
3262 case MULT: fputs ("mpy", file); return;
3263 case ABS: fputs ("abs", file); return; /* Unconditional. */
3264 case NEG: fputs ("neg", file); return;
3265 case SS_NEG: fputs ("negs", file); return;
3266 case NOT: fputs ("not", file); return; /* Unconditional. */
3267 case ZERO_EXTEND:
3268 fputs ("ext", file); /* bmsk allows predication. */
3269 goto size_suffix;
3270 case SIGN_EXTEND: /* Unconditional. */
3271 fputs ("sex", file);
3272 size_suffix:
3273 switch (GET_MODE (XEXP (x, 0)))
3274 {
3275 case QImode: fputs ("b", file); return;
3276 case HImode: fputs ("w", file); return;
3277 default: break;
3278 }
3279 break;
3280 case SS_TRUNCATE:
3281 if (GET_MODE (x) != HImode)
3282 break;
3283 fputs ("sat16", file);
3284 default: break;
3285 }
3286 output_operand_lossage ("invalid operand to %%O code"); return;
3287 case 'o':
3288 if (GET_CODE (x) == SYMBOL_REF)
3289 {
3290 assemble_name (file, XSTR (x, 0));
3291 return;
3292 }
3293 break;
3294 case '&':
3295 if (TARGET_ANNOTATE_ALIGN && cfun->machine->size_reason)
3296 fprintf (file, "; unalign: %d", cfun->machine->unalign);
3297 return;
3298 case '+':
3299 if (TARGET_V2)
3300 fputs ("m", file);
3301 else
3302 fputs ("h", file);
3303 return;
3304 case '_':
3305 if (TARGET_V2)
3306 fputs ("h", file);
3307 else
3308 fputs ("w", file);
3309 return;
3310 default :
3311 /* Unknown flag. */
3312 output_operand_lossage ("invalid operand output code");
3313 }
3314
3315 switch (GET_CODE (x))
3316 {
3317 case REG :
3318 fputs (reg_names[REGNO (x)], file);
3319 break;
3320 case MEM :
3321 {
3322 rtx addr = XEXP (x, 0);
3323 int size = GET_MODE_SIZE (GET_MODE (x));
3324
3325 fputc ('[', file);
3326
3327 switch (GET_CODE (addr))
3328 {
3329 case PRE_INC: case POST_INC:
3330 output_address (VOIDmode,
3331 plus_constant (Pmode, XEXP (addr, 0), size)); break;
3332 case PRE_DEC: case POST_DEC:
3333 output_address (VOIDmode,
3334 plus_constant (Pmode, XEXP (addr, 0), -size));
3335 break;
3336 case PRE_MODIFY: case POST_MODIFY:
3337 output_address (VOIDmode, XEXP (addr, 1)); break;
3338 case PLUS:
3339 if (output_scaled)
3340 {
3341 output_address (VOIDmode,
3342 plus_constant (Pmode, XEXP (addr, 0),
3343 (INTVAL (XEXP (addr, 1))
3344 >> (size == 2 ? 1 : 2))));
3345 output_scaled = 0;
3346 }
3347 else
3348 output_address (VOIDmode, addr);
3349 break;
3350 default:
3351 if (flag_pic && CONSTANT_ADDRESS_P (addr))
3352 arc_output_pic_addr_const (file, addr, code);
3353 else
3354 output_address (VOIDmode, addr);
3355 break;
3356 }
3357 fputc (']', file);
3358 break;
3359 }
3360 case CONST_DOUBLE :
3361 /* We handle SFmode constants here as output_addr_const doesn't. */
3362 if (GET_MODE (x) == SFmode)
3363 {
3364 long l;
3365
3366 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
3367 fprintf (file, "0x%08lx", l);
3368 break;
3369 }
3370 /* Fall through. Let output_addr_const deal with it. */
3371 default :
3372 if (flag_pic)
3373 arc_output_pic_addr_const (file, x, code);
3374 else
3375 {
3376 /* FIXME: Dirty way to handle @var@sda+const. Shd be handled
3377 with asm_output_symbol_ref */
3378 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3379 {
3380 x = XEXP (x, 0);
3381 output_addr_const (file, XEXP (x, 0));
3382 if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF && SYMBOL_REF_SMALL_P (XEXP (x, 0)))
3383 fprintf (file, "@sda");
3384
3385 if (GET_CODE (XEXP (x, 1)) != CONST_INT
3386 || INTVAL (XEXP (x, 1)) >= 0)
3387 fprintf (file, "+");
3388 output_addr_const (file, XEXP (x, 1));
3389 }
3390 else
3391 output_addr_const (file, x);
3392 }
3393 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_SMALL_P (x))
3394 fprintf (file, "@sda");
3395 break;
3396 }
3397 }
3398
3399 /* Print a memory address as an operand to reference that memory location. */
3400
3401 void
arc_print_operand_address(FILE * file,rtx addr)3402 arc_print_operand_address (FILE *file , rtx addr)
3403 {
3404 register rtx base, index = 0;
3405
3406 switch (GET_CODE (addr))
3407 {
3408 case REG :
3409 fputs (reg_names[REGNO (addr)], file);
3410 break;
3411 case SYMBOL_REF :
3412 output_addr_const (file, addr);
3413 if (SYMBOL_REF_SMALL_P (addr))
3414 fprintf (file, "@sda");
3415 break;
3416 case PLUS :
3417 if (GET_CODE (XEXP (addr, 0)) == MULT)
3418 index = XEXP (XEXP (addr, 0), 0), base = XEXP (addr, 1);
3419 else if (CONST_INT_P (XEXP (addr, 0)))
3420 index = XEXP (addr, 0), base = XEXP (addr, 1);
3421 else
3422 base = XEXP (addr, 0), index = XEXP (addr, 1);
3423
3424 gcc_assert (OBJECT_P (base));
3425 arc_print_operand_address (file, base);
3426 if (CONSTANT_P (base) && CONST_INT_P (index))
3427 fputc ('+', file);
3428 else
3429 fputc (',', file);
3430 gcc_assert (OBJECT_P (index));
3431 arc_print_operand_address (file, index);
3432 break;
3433 case CONST:
3434 {
3435 rtx c = XEXP (addr, 0);
3436
3437 gcc_assert (GET_CODE (XEXP (c, 0)) == SYMBOL_REF);
3438 gcc_assert (GET_CODE (XEXP (c, 1)) == CONST_INT);
3439
3440 output_address (VOIDmode, XEXP (addr, 0));
3441
3442 break;
3443 }
3444 case PRE_INC :
3445 case PRE_DEC :
3446 /* We shouldn't get here as we've lost the mode of the memory object
3447 (which says how much to inc/dec by. */
3448 gcc_unreachable ();
3449 break;
3450 default :
3451 if (flag_pic)
3452 arc_output_pic_addr_const (file, addr, 0);
3453 else
3454 output_addr_const (file, addr);
3455 break;
3456 }
3457 }
3458
3459 /* Called via walk_stores. DATA points to a hash table we can use to
3460 establish a unique SYMBOL_REF for each counter, which corresponds to
3461 a caller-callee pair.
3462 X is a store which we want to examine for an UNSPEC_PROF, which
3463 would be an address loaded into a register, or directly used in a MEM.
3464 If we found an UNSPEC_PROF, if we encounter a new counter the first time,
3465 write out a description and a data allocation for a 32 bit counter.
3466 Also, fill in the appropriate symbol_ref into each UNSPEC_PROF instance. */
3467
3468 static void
write_profile_sections(rtx dest ATTRIBUTE_UNUSED,rtx x,void * data)3469 write_profile_sections (rtx dest ATTRIBUTE_UNUSED, rtx x, void *data)
3470 {
3471 rtx *srcp, src;
3472 htab_t htab = (htab_t) data;
3473 rtx *slot;
3474
3475 if (GET_CODE (x) != SET)
3476 return;
3477 srcp = &SET_SRC (x);
3478 if (MEM_P (*srcp))
3479 srcp = &XEXP (*srcp, 0);
3480 else if (MEM_P (SET_DEST (x)))
3481 srcp = &XEXP (SET_DEST (x), 0);
3482 src = *srcp;
3483 if (GET_CODE (src) != CONST)
3484 return;
3485 src = XEXP (src, 0);
3486 if (GET_CODE (src) != UNSPEC || XINT (src, 1) != UNSPEC_PROF)
3487 return;
3488
3489 gcc_assert (XVECLEN (src, 0) == 3);
3490 if (!htab_elements (htab))
3491 {
3492 output_asm_insn (".section .__arc_profile_desc, \"a\"\n"
3493 "\t.long %0 + 1\n",
3494 &XVECEXP (src, 0, 0));
3495 }
3496 slot = (rtx *) htab_find_slot (htab, src, INSERT);
3497 if (*slot == HTAB_EMPTY_ENTRY)
3498 {
3499 static int count_nr;
3500 char buf[24];
3501 rtx count;
3502
3503 *slot = src;
3504 sprintf (buf, "__prof_count%d", count_nr++);
3505 count = gen_rtx_SYMBOL_REF (Pmode, xstrdup (buf));
3506 XVECEXP (src, 0, 2) = count;
3507 output_asm_insn (".section\t.__arc_profile_desc, \"a\"\n"
3508 "\t.long\t%1\n"
3509 "\t.section\t.__arc_profile_counters, \"aw\"\n"
3510 "\t.type\t%o2, @object\n"
3511 "\t.size\t%o2, 4\n"
3512 "%o2:\t.zero 4",
3513 &XVECEXP (src, 0, 0));
3514 *srcp = count;
3515 }
3516 else
3517 *srcp = XVECEXP (*slot, 0, 2);
3518 }
3519
3520 /* Hash function for UNSPEC_PROF htab. Use both the caller's name and
3521 the callee's name (if known). */
3522
3523 static hashval_t
unspec_prof_hash(const void * x)3524 unspec_prof_hash (const void *x)
3525 {
3526 const_rtx u = (const_rtx) x;
3527 const_rtx s1 = XVECEXP (u, 0, 1);
3528
3529 return (htab_hash_string (XSTR (XVECEXP (u, 0, 0), 0))
3530 ^ (s1->code == SYMBOL_REF ? htab_hash_string (XSTR (s1, 0)) : 0));
3531 }
3532
3533 /* Equality function for UNSPEC_PROF htab. Two pieces of UNSPEC_PROF rtl
3534 shall refer to the same counter if both caller name and callee rtl
3535 are identical. */
3536
3537 static int
unspec_prof_htab_eq(const void * x,const void * y)3538 unspec_prof_htab_eq (const void *x, const void *y)
3539 {
3540 const_rtx u0 = (const_rtx) x;
3541 const_rtx u1 = (const_rtx) y;
3542 const_rtx s01 = XVECEXP (u0, 0, 1);
3543 const_rtx s11 = XVECEXP (u1, 0, 1);
3544
3545 return (!strcmp (XSTR (XVECEXP (u0, 0, 0), 0),
3546 XSTR (XVECEXP (u1, 0, 0), 0))
3547 && rtx_equal_p (s01, s11));
3548 }
3549
3550 /* Conditional execution support.
3551
3552 This is based on the ARM port but for now is much simpler.
3553
3554 A finite state machine takes care of noticing whether or not instructions
3555 can be conditionally executed, and thus decrease execution time and code
3556 size by deleting branch instructions. The fsm is controlled by
3557 arc_ccfsm_advance (called by arc_final_prescan_insn), and controls the
3558 actions of PRINT_OPERAND. The patterns in the .md file for the branch
3559 insns also have a hand in this. */
3560 /* The way we leave dealing with non-anulled or annull-false delay slot
3561 insns to the consumer is awkward. */
3562
3563 /* The state of the fsm controlling condition codes are:
3564 0: normal, do nothing special
3565 1: don't output this insn
3566 2: don't output this insn
3567 3: make insns conditional
3568 4: make insns conditional
3569 5: make insn conditional (only for outputting anulled delay slot insns)
3570
3571 special value for cfun->machine->uid_ccfsm_state:
3572 6: return with but one insn before it since function start / call
3573
3574 State transitions (state->state by whom, under what condition):
3575 0 -> 1 arc_ccfsm_advance, if insn is a conditional branch skipping over
3576 some instructions.
3577 0 -> 2 arc_ccfsm_advance, if insn is a conditional branch followed
3578 by zero or more non-jump insns and an unconditional branch with
3579 the same target label as the condbranch.
3580 1 -> 3 branch patterns, after having not output the conditional branch
3581 2 -> 4 branch patterns, after having not output the conditional branch
3582 0 -> 5 branch patterns, for anulled delay slot insn.
3583 3 -> 0 ASM_OUTPUT_INTERNAL_LABEL, if the `target' label is reached
3584 (the target label has CODE_LABEL_NUMBER equal to
3585 arc_ccfsm_target_label).
3586 4 -> 0 arc_ccfsm_advance, if `target' unconditional branch is reached
3587 3 -> 1 arc_ccfsm_advance, finding an 'else' jump skipping over some insns.
3588 5 -> 0 when outputting the delay slot insn
3589
3590 If the jump clobbers the conditions then we use states 2 and 4.
3591
3592 A similar thing can be done with conditional return insns.
3593
3594 We also handle separating branches from sets of the condition code.
3595 This is done here because knowledge of the ccfsm state is required,
3596 we may not be outputting the branch. */
3597
3598 /* arc_final_prescan_insn calls arc_ccfsm_advance to adjust arc_ccfsm_current,
3599 before letting final output INSN. */
3600
3601 static void
arc_ccfsm_advance(rtx_insn * insn,struct arc_ccfsm * state)3602 arc_ccfsm_advance (rtx_insn *insn, struct arc_ccfsm *state)
3603 {
3604 /* BODY will hold the body of INSN. */
3605 register rtx body;
3606
3607 /* This will be 1 if trying to repeat the trick (ie: do the `else' part of
3608 an if/then/else), and things need to be reversed. */
3609 int reverse = 0;
3610
3611 /* If we start with a return insn, we only succeed if we find another one. */
3612 int seeking_return = 0;
3613
3614 /* START_INSN will hold the insn from where we start looking. This is the
3615 first insn after the following code_label if REVERSE is true. */
3616 rtx_insn *start_insn = insn;
3617
3618 /* Type of the jump_insn. Brcc insns don't affect ccfsm changes,
3619 since they don't rely on a cmp preceding the. */
3620 enum attr_type jump_insn_type;
3621
3622 /* Allow -mdebug-ccfsm to turn this off so we can see how well it does.
3623 We can't do this in macro FINAL_PRESCAN_INSN because its called from
3624 final_scan_insn which has `optimize' as a local. */
3625 if (optimize < 2 || TARGET_NO_COND_EXEC)
3626 return;
3627
3628 /* Ignore notes and labels. */
3629 if (!INSN_P (insn))
3630 return;
3631 body = PATTERN (insn);
3632 /* If in state 4, check if the target branch is reached, in order to
3633 change back to state 0. */
3634 if (state->state == 4)
3635 {
3636 if (insn == state->target_insn)
3637 {
3638 state->target_insn = NULL;
3639 state->state = 0;
3640 }
3641 return;
3642 }
3643
3644 /* If in state 3, it is possible to repeat the trick, if this insn is an
3645 unconditional branch to a label, and immediately following this branch
3646 is the previous target label which is only used once, and the label this
3647 branch jumps to is not too far off. Or in other words "we've done the
3648 `then' part, see if we can do the `else' part." */
3649 if (state->state == 3)
3650 {
3651 if (simplejump_p (insn))
3652 {
3653 start_insn = next_nonnote_insn (start_insn);
3654 if (GET_CODE (start_insn) == BARRIER)
3655 {
3656 /* ??? Isn't this always a barrier? */
3657 start_insn = next_nonnote_insn (start_insn);
3658 }
3659 if (GET_CODE (start_insn) == CODE_LABEL
3660 && CODE_LABEL_NUMBER (start_insn) == state->target_label
3661 && LABEL_NUSES (start_insn) == 1)
3662 reverse = TRUE;
3663 else
3664 return;
3665 }
3666 else if (GET_CODE (body) == SIMPLE_RETURN)
3667 {
3668 start_insn = next_nonnote_insn (start_insn);
3669 if (GET_CODE (start_insn) == BARRIER)
3670 start_insn = next_nonnote_insn (start_insn);
3671 if (GET_CODE (start_insn) == CODE_LABEL
3672 && CODE_LABEL_NUMBER (start_insn) == state->target_label
3673 && LABEL_NUSES (start_insn) == 1)
3674 {
3675 reverse = TRUE;
3676 seeking_return = 1;
3677 }
3678 else
3679 return;
3680 }
3681 else
3682 return;
3683 }
3684
3685 if (GET_CODE (insn) != JUMP_INSN
3686 || GET_CODE (PATTERN (insn)) == ADDR_VEC
3687 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
3688 return;
3689
3690 /* We can't predicate BRCC or loop ends.
3691 Also, when generating PIC code, and considering a medium range call,
3692 we can't predicate the call. */
3693 jump_insn_type = get_attr_type (insn);
3694 if (jump_insn_type == TYPE_BRCC
3695 || jump_insn_type == TYPE_BRCC_NO_DELAY_SLOT
3696 || jump_insn_type == TYPE_LOOP_END
3697 || (jump_insn_type == TYPE_CALL && !get_attr_predicable (insn)))
3698 return;
3699
3700 /* This jump might be paralleled with a clobber of the condition codes,
3701 the jump should always come first. */
3702 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
3703 body = XVECEXP (body, 0, 0);
3704
3705 if (reverse
3706 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
3707 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
3708 {
3709 int insns_skipped = 0, fail = FALSE, succeed = FALSE;
3710 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
3711 int then_not_else = TRUE;
3712 /* Nonzero if next insn must be the target label. */
3713 int next_must_be_target_label_p;
3714 rtx_insn *this_insn = start_insn;
3715 rtx label = 0;
3716
3717 /* Register the insn jumped to. */
3718 if (reverse)
3719 {
3720 if (!seeking_return)
3721 label = XEXP (SET_SRC (body), 0);
3722 }
3723 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
3724 label = XEXP (XEXP (SET_SRC (body), 1), 0);
3725 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
3726 {
3727 label = XEXP (XEXP (SET_SRC (body), 2), 0);
3728 then_not_else = FALSE;
3729 }
3730 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == SIMPLE_RETURN)
3731 seeking_return = 1;
3732 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == SIMPLE_RETURN)
3733 {
3734 seeking_return = 1;
3735 then_not_else = FALSE;
3736 }
3737 else
3738 gcc_unreachable ();
3739
3740 /* If this is a non-annulled branch with a delay slot, there is
3741 no need to conditionalize the delay slot. */
3742 if (NEXT_INSN (PREV_INSN (insn)) != insn
3743 && state->state == 0 && !INSN_ANNULLED_BRANCH_P (insn))
3744 {
3745 this_insn = NEXT_INSN (this_insn);
3746 gcc_assert (NEXT_INSN (NEXT_INSN (PREV_INSN (start_insn)))
3747 == NEXT_INSN (this_insn));
3748 }
3749 /* See how many insns this branch skips, and what kind of insns. If all
3750 insns are okay, and the label or unconditional branch to the same
3751 label is not too far away, succeed. */
3752 for (insns_skipped = 0, next_must_be_target_label_p = FALSE;
3753 !fail && !succeed && insns_skipped < MAX_INSNS_SKIPPED;
3754 insns_skipped++)
3755 {
3756 rtx scanbody;
3757
3758 this_insn = next_nonnote_insn (this_insn);
3759 if (!this_insn)
3760 break;
3761
3762 if (next_must_be_target_label_p)
3763 {
3764 if (GET_CODE (this_insn) == BARRIER)
3765 continue;
3766 if (GET_CODE (this_insn) == CODE_LABEL
3767 && this_insn == label)
3768 {
3769 state->state = 1;
3770 succeed = TRUE;
3771 }
3772 else
3773 fail = TRUE;
3774 break;
3775 }
3776
3777 scanbody = PATTERN (this_insn);
3778
3779 switch (GET_CODE (this_insn))
3780 {
3781 case CODE_LABEL:
3782 /* Succeed if it is the target label, otherwise fail since
3783 control falls in from somewhere else. */
3784 if (this_insn == label)
3785 {
3786 state->state = 1;
3787 succeed = TRUE;
3788 }
3789 else
3790 fail = TRUE;
3791 break;
3792
3793 case BARRIER:
3794 /* Succeed if the following insn is the target label.
3795 Otherwise fail.
3796 If return insns are used then the last insn in a function
3797 will be a barrier. */
3798 next_must_be_target_label_p = TRUE;
3799 break;
3800
3801 case CALL_INSN:
3802 /* Can handle a call insn if there are no insns after it.
3803 IE: The next "insn" is the target label. We don't have to
3804 worry about delay slots as such insns are SEQUENCE's inside
3805 INSN's. ??? It is possible to handle such insns though. */
3806 if (get_attr_cond (this_insn) == COND_CANUSE)
3807 next_must_be_target_label_p = TRUE;
3808 else
3809 fail = TRUE;
3810 break;
3811
3812 case JUMP_INSN:
3813 /* If this is an unconditional branch to the same label, succeed.
3814 If it is to another label, do nothing. If it is conditional,
3815 fail. */
3816 /* ??? Probably, the test for the SET and the PC are
3817 unnecessary. */
3818
3819 if (GET_CODE (scanbody) == SET
3820 && GET_CODE (SET_DEST (scanbody)) == PC)
3821 {
3822 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
3823 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
3824 {
3825 state->state = 2;
3826 succeed = TRUE;
3827 }
3828 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
3829 fail = TRUE;
3830 else if (get_attr_cond (this_insn) != COND_CANUSE)
3831 fail = TRUE;
3832 }
3833 else if (GET_CODE (scanbody) == SIMPLE_RETURN
3834 && seeking_return)
3835 {
3836 state->state = 2;
3837 succeed = TRUE;
3838 }
3839 else if (GET_CODE (scanbody) == PARALLEL)
3840 {
3841 if (get_attr_cond (this_insn) != COND_CANUSE)
3842 fail = TRUE;
3843 }
3844 break;
3845
3846 case INSN:
3847 /* We can only do this with insns that can use the condition
3848 codes (and don't set them). */
3849 if (GET_CODE (scanbody) == SET
3850 || GET_CODE (scanbody) == PARALLEL)
3851 {
3852 if (get_attr_cond (this_insn) != COND_CANUSE)
3853 fail = TRUE;
3854 }
3855 /* We can't handle other insns like sequences. */
3856 else
3857 fail = TRUE;
3858 break;
3859
3860 default:
3861 break;
3862 }
3863 }
3864
3865 if (succeed)
3866 {
3867 if ((!seeking_return) && (state->state == 1 || reverse))
3868 state->target_label = CODE_LABEL_NUMBER (label);
3869 else if (seeking_return || state->state == 2)
3870 {
3871 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
3872 {
3873 this_insn = next_nonnote_insn (this_insn);
3874
3875 gcc_assert (!this_insn ||
3876 (GET_CODE (this_insn) != BARRIER
3877 && GET_CODE (this_insn) != CODE_LABEL));
3878 }
3879 if (!this_insn)
3880 {
3881 /* Oh dear! we ran off the end, give up. */
3882 extract_insn_cached (insn);
3883 state->state = 0;
3884 state->target_insn = NULL;
3885 return;
3886 }
3887 state->target_insn = this_insn;
3888 }
3889 else
3890 gcc_unreachable ();
3891
3892 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
3893 what it was. */
3894 if (!reverse)
3895 {
3896 state->cond = XEXP (SET_SRC (body), 0);
3897 state->cc = get_arc_condition_code (XEXP (SET_SRC (body), 0));
3898 }
3899
3900 if (reverse || then_not_else)
3901 state->cc = ARC_INVERSE_CONDITION_CODE (state->cc);
3902 }
3903
3904 /* Restore recog_operand. Getting the attributes of other insns can
3905 destroy this array, but final.c assumes that it remains intact
3906 across this call; since the insn has been recognized already we
3907 call insn_extract direct. */
3908 extract_insn_cached (insn);
3909 }
3910 }
3911
3912 /* Record that we are currently outputting label NUM with prefix PREFIX.
3913 It it's the label we're looking for, reset the ccfsm machinery.
3914
3915 Called from ASM_OUTPUT_INTERNAL_LABEL. */
3916
3917 static void
arc_ccfsm_at_label(const char * prefix,int num,struct arc_ccfsm * state)3918 arc_ccfsm_at_label (const char *prefix, int num, struct arc_ccfsm *state)
3919 {
3920 if (state->state == 3 && state->target_label == num
3921 && !strcmp (prefix, "L"))
3922 {
3923 state->state = 0;
3924 state->target_insn = NULL;
3925 }
3926 }
3927
3928 /* We are considering a conditional branch with the condition COND.
3929 Check if we want to conditionalize a delay slot insn, and if so modify
3930 the ccfsm state accordingly.
3931 REVERSE says branch will branch when the condition is false. */
3932 void
arc_ccfsm_record_condition(rtx cond,bool reverse,rtx_insn * jump,struct arc_ccfsm * state)3933 arc_ccfsm_record_condition (rtx cond, bool reverse, rtx_insn *jump,
3934 struct arc_ccfsm *state)
3935 {
3936 rtx_insn *seq_insn = NEXT_INSN (PREV_INSN (jump));
3937 if (!state)
3938 state = &arc_ccfsm_current;
3939
3940 gcc_assert (state->state == 0);
3941 if (seq_insn != jump)
3942 {
3943 rtx insn = XVECEXP (PATTERN (seq_insn), 0, 1);
3944
3945 if (!as_a<rtx_insn *> (insn)->deleted ()
3946 && INSN_ANNULLED_BRANCH_P (jump)
3947 && (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (insn)))
3948 {
3949 state->cond = cond;
3950 state->cc = get_arc_condition_code (cond);
3951 if (!reverse)
3952 arc_ccfsm_current.cc
3953 = ARC_INVERSE_CONDITION_CODE (state->cc);
3954 rtx pat = PATTERN (insn);
3955 if (GET_CODE (pat) == COND_EXEC)
3956 gcc_assert ((INSN_FROM_TARGET_P (insn)
3957 ? ARC_INVERSE_CONDITION_CODE (state->cc) : state->cc)
3958 == get_arc_condition_code (XEXP (pat, 0)));
3959 else
3960 state->state = 5;
3961 }
3962 }
3963 }
3964
3965 /* Update *STATE as we would when we emit INSN. */
3966
3967 static void
arc_ccfsm_post_advance(rtx_insn * insn,struct arc_ccfsm * state)3968 arc_ccfsm_post_advance (rtx_insn *insn, struct arc_ccfsm *state)
3969 {
3970 enum attr_type type;
3971
3972 if (LABEL_P (insn))
3973 arc_ccfsm_at_label ("L", CODE_LABEL_NUMBER (insn), state);
3974 else if (JUMP_P (insn)
3975 && GET_CODE (PATTERN (insn)) != ADDR_VEC
3976 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
3977 && ((type = get_attr_type (insn)) == TYPE_BRANCH
3978 || (type == TYPE_UNCOND_BRANCH
3979 /* ??? Maybe should also handle TYPE_RETURN here,
3980 but we don't have a testcase for that. */
3981 && ARC_CCFSM_BRANCH_DELETED_P (state))))
3982 {
3983 if (ARC_CCFSM_BRANCH_DELETED_P (state))
3984 ARC_CCFSM_RECORD_BRANCH_DELETED (state);
3985 else
3986 {
3987 rtx src = SET_SRC (PATTERN (insn));
3988 arc_ccfsm_record_condition (XEXP (src, 0), XEXP (src, 1) == pc_rtx,
3989 insn, state);
3990 }
3991 }
3992 else if (arc_ccfsm_current.state == 5)
3993 arc_ccfsm_current.state = 0;
3994 }
3995
3996 /* Return true if the current insn, which is a conditional branch, is to be
3997 deleted. */
3998
3999 bool
arc_ccfsm_branch_deleted_p(void)4000 arc_ccfsm_branch_deleted_p (void)
4001 {
4002 return ARC_CCFSM_BRANCH_DELETED_P (&arc_ccfsm_current);
4003 }
4004
4005 /* Record a branch isn't output because subsequent insns can be
4006 conditionalized. */
4007
4008 void
arc_ccfsm_record_branch_deleted(void)4009 arc_ccfsm_record_branch_deleted (void)
4010 {
4011 ARC_CCFSM_RECORD_BRANCH_DELETED (&arc_ccfsm_current);
4012 }
4013
4014 /* During insn output, indicate if the current insn is predicated. */
4015
4016 bool
arc_ccfsm_cond_exec_p(void)4017 arc_ccfsm_cond_exec_p (void)
4018 {
4019 return (cfun->machine->prescan_initialized
4020 && ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current));
4021 }
4022
4023 /* Like next_active_insn, but return NULL if we find an ADDR_(DIFF_)VEC,
4024 and look inside SEQUENCEs. */
4025
4026 static rtx_insn *
arc_next_active_insn(rtx_insn * insn,struct arc_ccfsm * statep)4027 arc_next_active_insn (rtx_insn *insn, struct arc_ccfsm *statep)
4028 {
4029 rtx pat;
4030
4031 do
4032 {
4033 if (statep)
4034 arc_ccfsm_post_advance (insn, statep);
4035 insn = NEXT_INSN (insn);
4036 if (!insn || BARRIER_P (insn))
4037 return NULL;
4038 if (statep)
4039 arc_ccfsm_advance (insn, statep);
4040 }
4041 while (NOTE_P (insn)
4042 || (cfun->machine->arc_reorg_started
4043 && LABEL_P (insn) && !label_to_alignment (insn))
4044 || (NONJUMP_INSN_P (insn)
4045 && (GET_CODE (PATTERN (insn)) == USE
4046 || GET_CODE (PATTERN (insn)) == CLOBBER)));
4047 if (!LABEL_P (insn))
4048 {
4049 gcc_assert (INSN_P (insn));
4050 pat = PATTERN (insn);
4051 if (GET_CODE (pat) == ADDR_VEC || GET_CODE (pat) == ADDR_DIFF_VEC)
4052 return NULL;
4053 if (GET_CODE (pat) == SEQUENCE)
4054 return as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
4055 }
4056 return insn;
4057 }
4058
4059 /* When deciding if an insn should be output short, we want to know something
4060 about the following insns:
4061 - if another insn follows which we know we can output as a short insn
4062 before an alignment-sensitive point, we can output this insn short:
4063 the decision about the eventual alignment can be postponed.
4064 - if a to-be-aligned label comes next, we should output this insn such
4065 as to get / preserve 4-byte alignment.
4066 - if a likely branch without delay slot insn, or a call with an immediately
4067 following short insn comes next, we should out output this insn such as to
4068 get / preserve 2 mod 4 unalignment.
4069 - do the same for a not completely unlikely branch with a short insn
4070 following before any other branch / label.
4071 - in order to decide if we are actually looking at a branch, we need to
4072 call arc_ccfsm_advance.
4073 - in order to decide if we are looking at a short insn, we should know
4074 if it is conditionalized. To a first order of approximation this is
4075 the case if the state from arc_ccfsm_advance from before this insn
4076 indicates the insn is conditionalized. However, a further refinement
4077 could be to not conditionalize an insn if the destination register(s)
4078 is/are dead in the non-executed case. */
4079 /* Return non-zero if INSN should be output as a short insn. UNALIGN is
4080 zero if the current insn is aligned to a 4-byte-boundary, two otherwise.
4081 If CHECK_ATTR is greater than 0, check the iscompact attribute first. */
4082
4083 int
arc_verify_short(rtx_insn * insn,int,int check_attr)4084 arc_verify_short (rtx_insn *insn, int, int check_attr)
4085 {
4086 enum attr_iscompact iscompact;
4087 struct machine_function *machine;
4088
4089 if (check_attr > 0)
4090 {
4091 iscompact = get_attr_iscompact (insn);
4092 if (iscompact == ISCOMPACT_FALSE)
4093 return 0;
4094 }
4095 machine = cfun->machine;
4096
4097 if (machine->force_short_suffix >= 0)
4098 return machine->force_short_suffix;
4099
4100 return (get_attr_length (insn) & 2) != 0;
4101 }
4102
4103 /* When outputting an instruction (alternative) that can potentially be short,
4104 output the short suffix if the insn is in fact short, and update
4105 cfun->machine->unalign accordingly. */
4106
4107 static void
output_short_suffix(FILE * file)4108 output_short_suffix (FILE *file)
4109 {
4110 rtx_insn *insn = current_output_insn;
4111
4112 if (arc_verify_short (insn, cfun->machine->unalign, 1))
4113 {
4114 fprintf (file, "_s");
4115 cfun->machine->unalign ^= 2;
4116 }
4117 /* Restore recog_operand. */
4118 extract_insn_cached (insn);
4119 }
4120
4121 /* Implement FINAL_PRESCAN_INSN. */
4122
4123 void
arc_final_prescan_insn(rtx_insn * insn,rtx * opvec ATTRIBUTE_UNUSED,int noperands ATTRIBUTE_UNUSED)4124 arc_final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
4125 int noperands ATTRIBUTE_UNUSED)
4126 {
4127 if (TARGET_DUMPISIZE)
4128 fprintf (asm_out_file, "\n; at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4129
4130 /* Output a nop if necessary to prevent a hazard.
4131 Don't do this for delay slots: inserting a nop would
4132 alter semantics, and the only time we would find a hazard is for a
4133 call function result - and in that case, the hazard is spurious to
4134 start with. */
4135 if (PREV_INSN (insn)
4136 && PREV_INSN (NEXT_INSN (insn)) == insn
4137 && arc_hazard (prev_real_insn (insn), insn))
4138 {
4139 current_output_insn =
4140 emit_insn_before (gen_nop (), NEXT_INSN (PREV_INSN (insn)));
4141 final_scan_insn (current_output_insn, asm_out_file, optimize, 1, NULL);
4142 current_output_insn = insn;
4143 }
4144 /* Restore extraction data which might have been clobbered by arc_hazard. */
4145 extract_constrain_insn_cached (insn);
4146
4147 if (!cfun->machine->prescan_initialized)
4148 {
4149 /* Clear lingering state from branch shortening. */
4150 memset (&arc_ccfsm_current, 0, sizeof arc_ccfsm_current);
4151 cfun->machine->prescan_initialized = 1;
4152 }
4153 arc_ccfsm_advance (insn, &arc_ccfsm_current);
4154
4155 cfun->machine->size_reason = 0;
4156 }
4157
4158 /* Given FROM and TO register numbers, say whether this elimination is allowed.
4159 Frame pointer elimination is automatically handled.
4160
4161 All eliminations are permissible. If we need a frame
4162 pointer, we must eliminate ARG_POINTER_REGNUM into
4163 FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4164
4165 static bool
arc_can_eliminate(const int from ATTRIBUTE_UNUSED,const int to)4166 arc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
4167 {
4168 return to == FRAME_POINTER_REGNUM || !arc_frame_pointer_required ();
4169 }
4170
4171 /* Define the offset between two registers, one to be eliminated, and
4172 the other its replacement, at the start of a routine. */
4173
4174 int
arc_initial_elimination_offset(int from,int to)4175 arc_initial_elimination_offset (int from, int to)
4176 {
4177 if (! cfun->machine->frame_info.initialized)
4178 arc_compute_frame_size (get_frame_size ());
4179
4180 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
4181 {
4182 return (cfun->machine->frame_info.extra_size
4183 + cfun->machine->frame_info.reg_size);
4184 }
4185
4186 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4187 {
4188 return (cfun->machine->frame_info.total_size
4189 - cfun->machine->frame_info.pretend_size);
4190 }
4191
4192 if ((from == FRAME_POINTER_REGNUM) && (to == STACK_POINTER_REGNUM))
4193 {
4194 return (cfun->machine->frame_info.total_size
4195 - (cfun->machine->frame_info.pretend_size
4196 + cfun->machine->frame_info.extra_size
4197 + cfun->machine->frame_info.reg_size));
4198 }
4199
4200 gcc_unreachable ();
4201 }
4202
4203 static bool
arc_frame_pointer_required(void)4204 arc_frame_pointer_required (void)
4205 {
4206 return cfun->calls_alloca;
4207 }
4208
4209
4210 /* Return the destination address of a branch. */
4211
4212 int
branch_dest(rtx branch)4213 branch_dest (rtx branch)
4214 {
4215 rtx pat = PATTERN (branch);
4216 rtx dest = (GET_CODE (pat) == PARALLEL
4217 ? SET_SRC (XVECEXP (pat, 0, 0)) : SET_SRC (pat));
4218 int dest_uid;
4219
4220 if (GET_CODE (dest) == IF_THEN_ELSE)
4221 dest = XEXP (dest, XEXP (dest, 1) == pc_rtx ? 2 : 1);
4222
4223 dest = XEXP (dest, 0);
4224 dest_uid = INSN_UID (dest);
4225
4226 return INSN_ADDRESSES (dest_uid);
4227 }
4228
4229
4230 /* Implement TARGET_ENCODE_SECTION_INFO hook. */
4231
4232 static void
arc_encode_section_info(tree decl,rtx rtl,int first)4233 arc_encode_section_info (tree decl, rtx rtl, int first)
4234 {
4235 /* For sdata, SYMBOL_FLAG_LOCAL and SYMBOL_FLAG_FUNCTION.
4236 This clears machine specific flags, so has to come first. */
4237 default_encode_section_info (decl, rtl, first);
4238
4239 /* Check if it is a function, and whether it has the
4240 [long/medium/short]_call attribute specified. */
4241 if (TREE_CODE (decl) == FUNCTION_DECL)
4242 {
4243 rtx symbol = XEXP (rtl, 0);
4244 int flags = SYMBOL_REF_FLAGS (symbol);
4245
4246 tree attr = (TREE_TYPE (decl) != error_mark_node
4247 ? TYPE_ATTRIBUTES (TREE_TYPE (decl)) : NULL_TREE);
4248 tree long_call_attr = lookup_attribute ("long_call", attr);
4249 tree medium_call_attr = lookup_attribute ("medium_call", attr);
4250 tree short_call_attr = lookup_attribute ("short_call", attr);
4251
4252 if (long_call_attr != NULL_TREE)
4253 flags |= SYMBOL_FLAG_LONG_CALL;
4254 else if (medium_call_attr != NULL_TREE)
4255 flags |= SYMBOL_FLAG_MEDIUM_CALL;
4256 else if (short_call_attr != NULL_TREE)
4257 flags |= SYMBOL_FLAG_SHORT_CALL;
4258
4259 SYMBOL_REF_FLAGS (symbol) = flags;
4260 }
4261 }
4262
4263 /* This is how to output a definition of an internal numbered label where
4264 PREFIX is the class of label and NUM is the number within the class. */
4265
arc_internal_label(FILE * stream,const char * prefix,unsigned long labelno)4266 static void arc_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
4267 {
4268 if (cfun)
4269 arc_ccfsm_at_label (prefix, labelno, &arc_ccfsm_current);
4270 default_internal_label (stream, prefix, labelno);
4271 }
4272
4273 /* Set the cpu type and print out other fancy things,
4274 at the top of the file. */
4275
arc_file_start(void)4276 static void arc_file_start (void)
4277 {
4278 default_file_start ();
4279 fprintf (asm_out_file, "\t.cpu %s\n", arc_cpu_string);
4280 }
4281
4282 /* Cost functions. */
4283
4284 /* Compute a (partial) cost for rtx X. Return true if the complete
4285 cost has been computed, and false if subexpressions should be
4286 scanned. In either case, *TOTAL contains the cost result. */
4287
4288 static bool
arc_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed)4289 arc_rtx_costs (rtx x, machine_mode mode, int outer_code,
4290 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
4291 {
4292 int code = GET_CODE (x);
4293
4294 switch (code)
4295 {
4296 /* Small integers are as cheap as registers. */
4297 case CONST_INT:
4298 {
4299 bool nolimm = false; /* Can we do without long immediate? */
4300 bool fast = false; /* Is the result available immediately? */
4301 bool condexec = false; /* Does this allow conditiobnal execution? */
4302 bool compact = false; /* Is a 16 bit opcode available? */
4303 /* CONDEXEC also implies that we can have an unconditional
4304 3-address operation. */
4305
4306 nolimm = compact = condexec = false;
4307 if (UNSIGNED_INT6 (INTVAL (x)))
4308 nolimm = condexec = compact = true;
4309 else
4310 {
4311 if (SMALL_INT (INTVAL (x)))
4312 nolimm = fast = true;
4313 switch (outer_code)
4314 {
4315 case AND: /* bclr, bmsk, ext[bw] */
4316 if (satisfies_constraint_Ccp (x) /* bclr */
4317 || satisfies_constraint_C1p (x) /* bmsk */)
4318 nolimm = fast = condexec = compact = true;
4319 break;
4320 case IOR: /* bset */
4321 if (satisfies_constraint_C0p (x)) /* bset */
4322 nolimm = fast = condexec = compact = true;
4323 break;
4324 case XOR:
4325 if (satisfies_constraint_C0p (x)) /* bxor */
4326 nolimm = fast = condexec = true;
4327 break;
4328 case SET:
4329 if (satisfies_constraint_Crr (x)) /* ror b,u6 */
4330 nolimm = true;
4331 default:
4332 break;
4333 }
4334 }
4335 /* FIXME: Add target options to attach a small cost if
4336 condexec / compact is not true. */
4337 if (nolimm)
4338 {
4339 *total = 0;
4340 return true;
4341 }
4342 }
4343 /* FALLTHRU */
4344
4345 /* 4 byte values can be fetched as immediate constants -
4346 let's give that the cost of an extra insn. */
4347 case CONST:
4348 case LABEL_REF:
4349 case SYMBOL_REF:
4350 *total = COSTS_N_INSNS (1);
4351 return true;
4352
4353 case CONST_DOUBLE:
4354 {
4355 rtx high, low;
4356
4357 if (TARGET_DPFP)
4358 {
4359 *total = COSTS_N_INSNS (1);
4360 return true;
4361 }
4362 /* FIXME: correct the order of high,low */
4363 split_double (x, &high, &low);
4364 *total = COSTS_N_INSNS (!SMALL_INT (INTVAL (high))
4365 + !SMALL_INT (INTVAL (low)));
4366 return true;
4367 }
4368
4369 /* Encourage synth_mult to find a synthetic multiply when reasonable.
4370 If we need more than 12 insns to do a multiply, then go out-of-line,
4371 since the call overhead will be < 10% of the cost of the multiply. */
4372 case ASHIFT:
4373 case ASHIFTRT:
4374 case LSHIFTRT:
4375 if (TARGET_BARREL_SHIFTER)
4376 {
4377 /* If we want to shift a constant, we need a LIMM. */
4378 /* ??? when the optimizers want to know if a constant should be
4379 hoisted, they ask for the cost of the constant. OUTER_CODE is
4380 insufficient context for shifts since we don't know which operand
4381 we are looking at. */
4382 if (CONSTANT_P (XEXP (x, 0)))
4383 {
4384 *total += (COSTS_N_INSNS (2)
4385 + rtx_cost (XEXP (x, 1), mode, (enum rtx_code) code,
4386 0, speed));
4387 return true;
4388 }
4389 *total = COSTS_N_INSNS (1);
4390 }
4391 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4392 *total = COSTS_N_INSNS (16);
4393 else
4394 {
4395 *total = COSTS_N_INSNS (INTVAL (XEXP ((x), 1)));
4396 /* ??? want_to_gcse_p can throw negative shift counts at us,
4397 and then panics when it gets a negative cost as result.
4398 Seen for gcc.c-torture/compile/20020710-1.c -Os . */
4399 if (*total < 0)
4400 *total = 0;
4401 }
4402 return false;
4403
4404 case DIV:
4405 case UDIV:
4406 if (speed)
4407 *total = COSTS_N_INSNS(30);
4408 else
4409 *total = COSTS_N_INSNS(1);
4410 return false;
4411
4412 case MULT:
4413 if ((TARGET_DPFP && GET_MODE (x) == DFmode))
4414 *total = COSTS_N_INSNS (1);
4415 else if (speed)
4416 *total= arc_multcost;
4417 /* We do not want synth_mult sequences when optimizing
4418 for size. */
4419 else if (TARGET_MUL64_SET || TARGET_ARC700_MPY)
4420 *total = COSTS_N_INSNS (1);
4421 else
4422 *total = COSTS_N_INSNS (2);
4423 return false;
4424 case PLUS:
4425 if (GET_CODE (XEXP (x, 0)) == MULT
4426 && _2_4_8_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
4427 {
4428 *total += (rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed)
4429 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, PLUS, 1, speed));
4430 return true;
4431 }
4432 return false;
4433 case MINUS:
4434 if (GET_CODE (XEXP (x, 1)) == MULT
4435 && _2_4_8_operand (XEXP (XEXP (x, 1), 1), VOIDmode))
4436 {
4437 *total += (rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed)
4438 + rtx_cost (XEXP (XEXP (x, 1), 0), mode, PLUS, 1, speed));
4439 return true;
4440 }
4441 return false;
4442 case COMPARE:
4443 {
4444 rtx op0 = XEXP (x, 0);
4445 rtx op1 = XEXP (x, 1);
4446
4447 if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
4448 && XEXP (op0, 1) == const1_rtx)
4449 {
4450 /* btst / bbit0 / bbit1:
4451 Small integers and registers are free; everything else can
4452 be put in a register. */
4453 mode = GET_MODE (XEXP (op0, 0));
4454 *total = (rtx_cost (XEXP (op0, 0), mode, SET, 1, speed)
4455 + rtx_cost (XEXP (op0, 2), mode, SET, 1, speed));
4456 return true;
4457 }
4458 if (GET_CODE (op0) == AND && op1 == const0_rtx
4459 && satisfies_constraint_C1p (XEXP (op0, 1)))
4460 {
4461 /* bmsk.f */
4462 *total = rtx_cost (XEXP (op0, 0), VOIDmode, SET, 1, speed);
4463 return true;
4464 }
4465 /* add.f */
4466 if (GET_CODE (op1) == NEG)
4467 {
4468 /* op0 might be constant, the inside of op1 is rather
4469 unlikely to be so. So swapping the operands might lower
4470 the cost. */
4471 mode = GET_MODE (op0);
4472 *total = (rtx_cost (op0, mode, PLUS, 1, speed)
4473 + rtx_cost (XEXP (op1, 0), mode, PLUS, 0, speed));
4474 }
4475 return false;
4476 }
4477 case EQ: case NE:
4478 if (outer_code == IF_THEN_ELSE
4479 && GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
4480 && XEXP (x, 1) == const0_rtx
4481 && XEXP (XEXP (x, 0), 1) == const1_rtx)
4482 {
4483 /* btst / bbit0 / bbit1:
4484 Small integers and registers are free; everything else can
4485 be put in a register. */
4486 rtx op0 = XEXP (x, 0);
4487
4488 mode = GET_MODE (XEXP (op0, 0));
4489 *total = (rtx_cost (XEXP (op0, 0), mode, SET, 1, speed)
4490 + rtx_cost (XEXP (op0, 2), mode, SET, 1, speed));
4491 return true;
4492 }
4493 /* Fall through. */
4494 /* scc_insn expands into two insns. */
4495 case GTU: case GEU: case LEU:
4496 if (mode == SImode)
4497 *total += COSTS_N_INSNS (1);
4498 return false;
4499 case LTU: /* might use adc. */
4500 if (mode == SImode)
4501 *total += COSTS_N_INSNS (1) - 1;
4502 return false;
4503 default:
4504 return false;
4505 }
4506 }
4507
4508 /* Return true if ADDR is an address that needs to be expressed as an
4509 explicit sum of pcl + offset. */
4510
4511 bool
arc_legitimate_pc_offset_p(rtx addr)4512 arc_legitimate_pc_offset_p (rtx addr)
4513 {
4514 if (GET_CODE (addr) != CONST)
4515 return false;
4516 addr = XEXP (addr, 0);
4517 if (GET_CODE (addr) == PLUS)
4518 {
4519 if (GET_CODE (XEXP (addr, 1)) != CONST_INT)
4520 return false;
4521 addr = XEXP (addr, 0);
4522 }
4523 return (GET_CODE (addr) == UNSPEC
4524 && XVECLEN (addr, 0) == 1
4525 && XINT (addr, 1) == ARC_UNSPEC_GOT
4526 && GET_CODE (XVECEXP (addr, 0, 0)) == SYMBOL_REF);
4527 }
4528
4529 /* Return true if ADDR is a valid pic address.
4530 A valid pic address on arc should look like
4531 const (unspec (SYMBOL_REF/LABEL) (ARC_UNSPEC_GOTOFF/ARC_UNSPEC_GOT)) */
4532
4533 bool
arc_legitimate_pic_addr_p(rtx addr)4534 arc_legitimate_pic_addr_p (rtx addr)
4535 {
4536 if (GET_CODE (addr) == LABEL_REF)
4537 return true;
4538 if (GET_CODE (addr) != CONST)
4539 return false;
4540
4541 addr = XEXP (addr, 0);
4542
4543
4544 if (GET_CODE (addr) == PLUS)
4545 {
4546 if (GET_CODE (XEXP (addr, 1)) != CONST_INT)
4547 return false;
4548 addr = XEXP (addr, 0);
4549 }
4550
4551 if (GET_CODE (addr) != UNSPEC
4552 || XVECLEN (addr, 0) != 1)
4553 return false;
4554
4555 /* Must be @GOT or @GOTOFF. */
4556 if (XINT (addr, 1) != ARC_UNSPEC_GOT
4557 && XINT (addr, 1) != ARC_UNSPEC_GOTOFF)
4558 return false;
4559
4560 if (GET_CODE (XVECEXP (addr, 0, 0)) != SYMBOL_REF
4561 && GET_CODE (XVECEXP (addr, 0, 0)) != LABEL_REF)
4562 return false;
4563
4564 return true;
4565 }
4566
4567
4568
4569 /* Return true if OP contains a symbol reference. */
4570
4571 static bool
symbolic_reference_mentioned_p(rtx op)4572 symbolic_reference_mentioned_p (rtx op)
4573 {
4574 register const char *fmt;
4575 register int i;
4576
4577 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4578 return true;
4579
4580 fmt = GET_RTX_FORMAT (GET_CODE (op));
4581 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4582 {
4583 if (fmt[i] == 'E')
4584 {
4585 register int j;
4586
4587 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4588 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4589 return true;
4590 }
4591
4592 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4593 return true;
4594 }
4595
4596 return false;
4597 }
4598
4599 /* Return true if OP contains a SYMBOL_REF that is not wrapped in an unspec.
4600 If SKIP_LOCAL is true, skip symbols that bind locally.
4601 This is used further down in this file, and, without SKIP_LOCAL,
4602 in the addsi3 / subsi3 expanders when generating PIC code. */
4603
4604 bool
arc_raw_symbolic_reference_mentioned_p(rtx op,bool skip_local)4605 arc_raw_symbolic_reference_mentioned_p (rtx op, bool skip_local)
4606 {
4607 register const char *fmt;
4608 register int i;
4609
4610 if (GET_CODE(op) == UNSPEC)
4611 return false;
4612
4613 if (GET_CODE (op) == SYMBOL_REF)
4614 {
4615 tree decl = SYMBOL_REF_DECL (op);
4616 return !skip_local || !decl || !default_binds_local_p (decl);
4617 }
4618
4619 fmt = GET_RTX_FORMAT (GET_CODE (op));
4620 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4621 {
4622 if (fmt[i] == 'E')
4623 {
4624 register int j;
4625
4626 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4627 if (arc_raw_symbolic_reference_mentioned_p (XVECEXP (op, i, j),
4628 skip_local))
4629 return true;
4630 }
4631
4632 else if (fmt[i] == 'e'
4633 && arc_raw_symbolic_reference_mentioned_p (XEXP (op, i),
4634 skip_local))
4635 return true;
4636 }
4637
4638 return false;
4639 }
4640
4641 /* Legitimize a pic address reference in ORIG.
4642 The return value is the legitimated address.
4643 If OLDX is non-zero, it is the target to assign the address to first. */
4644
4645 rtx
arc_legitimize_pic_address(rtx orig,rtx oldx)4646 arc_legitimize_pic_address (rtx orig, rtx oldx)
4647 {
4648 rtx addr = orig;
4649 rtx pat = orig;
4650 rtx base;
4651
4652 if (oldx == orig)
4653 oldx = NULL;
4654
4655 if (GET_CODE (addr) == LABEL_REF)
4656 ; /* Do nothing. */
4657 else if (GET_CODE (addr) == SYMBOL_REF
4658 && (CONSTANT_POOL_ADDRESS_P (addr)
4659 || SYMBOL_REF_LOCAL_P (addr)))
4660 {
4661 /* This symbol may be referenced via a displacement from the PIC
4662 base address (@GOTOFF). */
4663
4664 /* FIXME: if we had a way to emit pc-relative adds that don't
4665 create a GOT entry, we could do without the use of the gp register. */
4666 crtl->uses_pic_offset_table = 1;
4667 pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ARC_UNSPEC_GOTOFF);
4668 pat = gen_rtx_CONST (Pmode, pat);
4669 pat = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, pat);
4670
4671 if (oldx == NULL)
4672 oldx = gen_reg_rtx (Pmode);
4673
4674 if (oldx != 0)
4675 {
4676 emit_move_insn (oldx, pat);
4677 pat = oldx;
4678 }
4679
4680 }
4681 else if (GET_CODE (addr) == SYMBOL_REF)
4682 {
4683 /* This symbol must be referenced via a load from the
4684 Global Offset Table (@GOTPC). */
4685
4686 pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ARC_UNSPEC_GOT);
4687 pat = gen_rtx_CONST (Pmode, pat);
4688 pat = gen_const_mem (Pmode, pat);
4689
4690 if (oldx == 0)
4691 oldx = gen_reg_rtx (Pmode);
4692
4693 emit_move_insn (oldx, pat);
4694 pat = oldx;
4695 }
4696 else
4697 {
4698 if (GET_CODE (addr) == CONST)
4699 {
4700 addr = XEXP (addr, 0);
4701 if (GET_CODE (addr) == UNSPEC)
4702 {
4703 /* Check that the unspec is one of the ones we generate? */
4704 }
4705 else
4706 gcc_assert (GET_CODE (addr) == PLUS);
4707 }
4708
4709 if (GET_CODE (addr) == PLUS)
4710 {
4711 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
4712
4713 /* Check first to see if this is a constant offset from a @GOTOFF
4714 symbol reference. */
4715 if ((GET_CODE (op0) == LABEL_REF
4716 || (GET_CODE (op0) == SYMBOL_REF
4717 && (CONSTANT_POOL_ADDRESS_P (op0)
4718 || SYMBOL_REF_LOCAL_P (op0))))
4719 && GET_CODE (op1) == CONST_INT)
4720 {
4721 /* FIXME: like above, could do without gp reference. */
4722 crtl->uses_pic_offset_table = 1;
4723 pat
4724 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), ARC_UNSPEC_GOTOFF);
4725 pat = gen_rtx_PLUS (Pmode, pat, op1);
4726 pat = gen_rtx_CONST (Pmode, pat);
4727 pat = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, pat);
4728
4729 if (oldx != 0)
4730 {
4731 emit_move_insn (oldx, pat);
4732 pat = oldx;
4733 }
4734 }
4735 else
4736 {
4737 base = arc_legitimize_pic_address (XEXP (addr, 0), oldx);
4738 pat = arc_legitimize_pic_address (XEXP (addr, 1),
4739 base == oldx ? NULL_RTX : oldx);
4740
4741 if (GET_CODE (pat) == CONST_INT)
4742 pat = plus_constant (Pmode, base, INTVAL (pat));
4743 else
4744 {
4745 if (GET_CODE (pat) == PLUS && CONSTANT_P (XEXP (pat, 1)))
4746 {
4747 base = gen_rtx_PLUS (Pmode, base, XEXP (pat, 0));
4748 pat = XEXP (pat, 1);
4749 }
4750 pat = gen_rtx_PLUS (Pmode, base, pat);
4751 }
4752 }
4753 }
4754 }
4755
4756 return pat;
4757 }
4758
4759 /* Output address constant X to FILE, taking PIC into account. */
4760
4761 void
arc_output_pic_addr_const(FILE * file,rtx x,int code)4762 arc_output_pic_addr_const (FILE * file, rtx x, int code)
4763 {
4764 char buf[256];
4765
4766 restart:
4767 switch (GET_CODE (x))
4768 {
4769 case PC:
4770 if (flag_pic)
4771 putc ('.', file);
4772 else
4773 gcc_unreachable ();
4774 break;
4775
4776 case SYMBOL_REF:
4777 output_addr_const (file, x);
4778
4779 /* Local functions do not get references through the PLT. */
4780 if (code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
4781 fputs ("@plt", file);
4782 break;
4783
4784 case LABEL_REF:
4785 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (XEXP (x, 0)));
4786 assemble_name (file, buf);
4787 break;
4788
4789 case CODE_LABEL:
4790 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
4791 assemble_name (file, buf);
4792 break;
4793
4794 case CONST_INT:
4795 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4796 break;
4797
4798 case CONST:
4799 arc_output_pic_addr_const (file, XEXP (x, 0), code);
4800 break;
4801
4802 case CONST_DOUBLE:
4803 if (GET_MODE (x) == VOIDmode)
4804 {
4805 /* We can use %d if the number is one word and positive. */
4806 if (CONST_DOUBLE_HIGH (x))
4807 fprintf (file, HOST_WIDE_INT_PRINT_DOUBLE_HEX,
4808 CONST_DOUBLE_HIGH (x), CONST_DOUBLE_LOW (x));
4809 else if (CONST_DOUBLE_LOW (x) < 0)
4810 fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (x));
4811 else
4812 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
4813 }
4814 else
4815 /* We can't handle floating point constants;
4816 PRINT_OPERAND must handle them. */
4817 output_operand_lossage ("floating constant misused");
4818 break;
4819
4820 case PLUS:
4821 /* FIXME: Not needed here. */
4822 /* Some assemblers need integer constants to appear last (eg masm). */
4823 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
4824 {
4825 arc_output_pic_addr_const (file, XEXP (x, 1), code);
4826 fprintf (file, "+");
4827 arc_output_pic_addr_const (file, XEXP (x, 0), code);
4828 }
4829 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4830 {
4831 arc_output_pic_addr_const (file, XEXP (x, 0), code);
4832 if (INTVAL (XEXP (x, 1)) >= 0)
4833 fprintf (file, "+");
4834 arc_output_pic_addr_const (file, XEXP (x, 1), code);
4835 }
4836 else
4837 gcc_unreachable();
4838 break;
4839
4840 case MINUS:
4841 /* Avoid outputting things like x-x or x+5-x,
4842 since some assemblers can't handle that. */
4843 x = simplify_subtraction (x);
4844 if (GET_CODE (x) != MINUS)
4845 goto restart;
4846
4847 arc_output_pic_addr_const (file, XEXP (x, 0), code);
4848 fprintf (file, "-");
4849 if (GET_CODE (XEXP (x, 1)) == CONST_INT
4850 && INTVAL (XEXP (x, 1)) < 0)
4851 {
4852 fprintf (file, "(");
4853 arc_output_pic_addr_const (file, XEXP (x, 1), code);
4854 fprintf (file, ")");
4855 }
4856 else
4857 arc_output_pic_addr_const (file, XEXP (x, 1), code);
4858 break;
4859
4860 case ZERO_EXTEND:
4861 case SIGN_EXTEND:
4862 arc_output_pic_addr_const (file, XEXP (x, 0), code);
4863 break;
4864
4865
4866 case UNSPEC:
4867 gcc_assert (XVECLEN (x, 0) == 1);
4868 if (XINT (x, 1) == ARC_UNSPEC_GOT)
4869 fputs ("pcl,", file);
4870 arc_output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
4871 switch (XINT (x, 1))
4872 {
4873 case ARC_UNSPEC_GOT:
4874 fputs ("@gotpc", file);
4875 break;
4876 case ARC_UNSPEC_GOTOFF:
4877 fputs ("@gotoff", file);
4878 break;
4879 case ARC_UNSPEC_PLT:
4880 fputs ("@plt", file);
4881 break;
4882 default:
4883 output_operand_lossage ("invalid UNSPEC as operand: %d", XINT (x,1));
4884 break;
4885 }
4886 break;
4887
4888 default:
4889 output_operand_lossage ("invalid expression as operand");
4890 }
4891 }
4892
4893 #define SYMBOLIC_CONST(X) \
4894 (GET_CODE (X) == SYMBOL_REF \
4895 || GET_CODE (X) == LABEL_REF \
4896 || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X)))
4897
4898 /* Emit insns to move operands[1] into operands[0]. */
4899
4900 void
emit_pic_move(rtx * operands,machine_mode)4901 emit_pic_move (rtx *operands, machine_mode)
4902 {
4903 rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode);
4904
4905 if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1]))
4906 operands[1] = force_reg (Pmode, operands[1]);
4907 else
4908 operands[1] = arc_legitimize_pic_address (operands[1], temp);
4909 }
4910
4911
4912 /* The function returning the number of words, at the beginning of an
4913 argument, must be put in registers. The returned value must be
4914 zero for arguments that are passed entirely in registers or that
4915 are entirely pushed on the stack.
4916
4917 On some machines, certain arguments must be passed partially in
4918 registers and partially in memory. On these machines, typically
4919 the first N words of arguments are passed in registers, and the
4920 rest on the stack. If a multi-word argument (a `double' or a
4921 structure) crosses that boundary, its first few words must be
4922 passed in registers and the rest must be pushed. This function
4923 tells the compiler when this occurs, and how many of the words
4924 should go in registers.
4925
4926 `FUNCTION_ARG' for these arguments should return the first register
4927 to be used by the caller for this argument; likewise
4928 `FUNCTION_INCOMING_ARG', for the called function.
4929
4930 The function is used to implement macro FUNCTION_ARG_PARTIAL_NREGS. */
4931
4932 /* If REGNO is the least arg reg available then what is the total number of arg
4933 regs available. */
4934 #define GPR_REST_ARG_REGS(REGNO) \
4935 ((REGNO) <= MAX_ARC_PARM_REGS ? MAX_ARC_PARM_REGS - (REGNO) : 0 )
4936
4937 /* Since arc parm regs are contiguous. */
4938 #define ARC_NEXT_ARG_REG(REGNO) ( (REGNO) + 1 )
4939
4940 /* Implement TARGET_ARG_PARTIAL_BYTES. */
4941
4942 static int
arc_arg_partial_bytes(cumulative_args_t cum_v,machine_mode mode,tree type,bool named ATTRIBUTE_UNUSED)4943 arc_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
4944 tree type, bool named ATTRIBUTE_UNUSED)
4945 {
4946 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4947 int bytes = (mode == BLKmode
4948 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode));
4949 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4950 int arg_num = *cum;
4951 int ret;
4952
4953 arg_num = ROUND_ADVANCE_CUM (arg_num, mode, type);
4954 ret = GPR_REST_ARG_REGS (arg_num);
4955
4956 /* ICEd at function.c:2361, and ret is copied to data->partial */
4957 ret = (ret >= words ? 0 : ret * UNITS_PER_WORD);
4958
4959 return ret;
4960 }
4961
4962 /* This function is used to control a function argument is passed in a
4963 register, and which register.
4964
4965 The arguments are CUM, of type CUMULATIVE_ARGS, which summarizes
4966 (in a way defined by INIT_CUMULATIVE_ARGS and FUNCTION_ARG_ADVANCE)
4967 all of the previous arguments so far passed in registers; MODE, the
4968 machine mode of the argument; TYPE, the data type of the argument
4969 as a tree node or 0 if that is not known (which happens for C
4970 support library functions); and NAMED, which is 1 for an ordinary
4971 argument and 0 for nameless arguments that correspond to `...' in
4972 the called function's prototype.
4973
4974 The returned value should either be a `reg' RTX for the hard
4975 register in which to pass the argument, or zero to pass the
4976 argument on the stack.
4977
4978 For machines like the Vax and 68000, where normally all arguments
4979 are pushed, zero suffices as a definition.
4980
4981 The usual way to make the ANSI library `stdarg.h' work on a machine
4982 where some arguments are usually passed in registers, is to cause
4983 nameless arguments to be passed on the stack instead. This is done
4984 by making the function return 0 whenever NAMED is 0.
4985
4986 You may use the macro `MUST_PASS_IN_STACK (MODE, TYPE)' in the
4987 definition of this function to determine if this argument is of a
4988 type that must be passed in the stack. If `REG_PARM_STACK_SPACE'
4989 is not defined and the function returns non-zero for such an
4990 argument, the compiler will abort. If `REG_PARM_STACK_SPACE' is
4991 defined, the argument will be computed in the stack and then loaded
4992 into a register.
4993
4994 The function is used to implement macro FUNCTION_ARG. */
4995 /* On the ARC the first MAX_ARC_PARM_REGS args are normally in registers
4996 and the rest are pushed. */
4997
4998 static rtx
arc_function_arg(cumulative_args_t cum_v,machine_mode mode,const_tree type ATTRIBUTE_UNUSED,bool named ATTRIBUTE_UNUSED)4999 arc_function_arg (cumulative_args_t cum_v,
5000 machine_mode mode,
5001 const_tree type ATTRIBUTE_UNUSED,
5002 bool named ATTRIBUTE_UNUSED)
5003 {
5004 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5005 int arg_num = *cum;
5006 rtx ret;
5007 const char *debstr ATTRIBUTE_UNUSED;
5008
5009 arg_num = ROUND_ADVANCE_CUM (arg_num, mode, type);
5010 /* Return a marker for use in the call instruction. */
5011 if (mode == VOIDmode)
5012 {
5013 ret = const0_rtx;
5014 debstr = "<0>";
5015 }
5016 else if (GPR_REST_ARG_REGS (arg_num) > 0)
5017 {
5018 ret = gen_rtx_REG (mode, arg_num);
5019 debstr = reg_names [arg_num];
5020 }
5021 else
5022 {
5023 ret = NULL_RTX;
5024 debstr = "memory";
5025 }
5026 return ret;
5027 }
5028
5029 /* The function to update the summarizer variable *CUM to advance past
5030 an argument in the argument list. The values MODE, TYPE and NAMED
5031 describe that argument. Once this is done, the variable *CUM is
5032 suitable for analyzing the *following* argument with
5033 `FUNCTION_ARG', etc.
5034
5035 This function need not do anything if the argument in question was
5036 passed on the stack. The compiler knows how to track the amount of
5037 stack space used for arguments without any special help.
5038
5039 The function is used to implement macro FUNCTION_ARG_ADVANCE. */
5040 /* For the ARC: the cum set here is passed on to function_arg where we
5041 look at its value and say which reg to use. Strategy: advance the
5042 regnumber here till we run out of arg regs, then set *cum to last
5043 reg. In function_arg, since *cum > last arg reg we would return 0
5044 and thus the arg will end up on the stack. For straddling args of
5045 course function_arg_partial_nregs will come into play. */
5046
5047 static void
arc_function_arg_advance(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)5048 arc_function_arg_advance (cumulative_args_t cum_v,
5049 machine_mode mode,
5050 const_tree type,
5051 bool named ATTRIBUTE_UNUSED)
5052 {
5053 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5054 int bytes = (mode == BLKmode
5055 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode));
5056 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5057 int i;
5058
5059 if (words)
5060 *cum = ROUND_ADVANCE_CUM (*cum, mode, type);
5061 for (i = 0; i < words; i++)
5062 *cum = ARC_NEXT_ARG_REG (*cum);
5063
5064 }
5065
5066 /* Define how to find the value returned by a function.
5067 VALTYPE is the data type of the value (as a tree).
5068 If the precise function being called is known, FN_DECL_OR_TYPE is its
5069 FUNCTION_DECL; otherwise, FN_DECL_OR_TYPE is its type. */
5070
5071 static rtx
arc_function_value(const_tree valtype,const_tree fn_decl_or_type ATTRIBUTE_UNUSED,bool outgoing ATTRIBUTE_UNUSED)5072 arc_function_value (const_tree valtype,
5073 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
5074 bool outgoing ATTRIBUTE_UNUSED)
5075 {
5076 machine_mode mode = TYPE_MODE (valtype);
5077 int unsignedp ATTRIBUTE_UNUSED;
5078
5079 unsignedp = TYPE_UNSIGNED (valtype);
5080 if (INTEGRAL_TYPE_P (valtype) || TREE_CODE (valtype) == OFFSET_TYPE)
5081 PROMOTE_MODE (mode, unsignedp, valtype);
5082 return gen_rtx_REG (mode, 0);
5083 }
5084
5085 /* Returns the return address that is used by builtin_return_address. */
5086
5087 rtx
arc_return_addr_rtx(int count,ATTRIBUTE_UNUSED rtx frame)5088 arc_return_addr_rtx (int count, ATTRIBUTE_UNUSED rtx frame)
5089 {
5090 if (count != 0)
5091 return const0_rtx;
5092
5093 return get_hard_reg_initial_val (Pmode , RETURN_ADDR_REGNUM);
5094 }
5095
5096 /* Nonzero if the constant value X is a legitimate general operand
5097 when generating PIC code. It is given that flag_pic is on and
5098 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5099
5100 bool
arc_legitimate_pic_operand_p(rtx x)5101 arc_legitimate_pic_operand_p (rtx x)
5102 {
5103 return !arc_raw_symbolic_reference_mentioned_p (x, true);
5104 }
5105
5106 /* Determine if a given RTX is a valid constant. We already know this
5107 satisfies CONSTANT_P. */
5108
5109 bool
arc_legitimate_constant_p(machine_mode,rtx x)5110 arc_legitimate_constant_p (machine_mode, rtx x)
5111 {
5112 if (!flag_pic)
5113 return true;
5114
5115 switch (GET_CODE (x))
5116 {
5117 case CONST:
5118 x = XEXP (x, 0);
5119
5120 if (GET_CODE (x) == PLUS)
5121 {
5122 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5123 return false;
5124 x = XEXP (x, 0);
5125 }
5126
5127 /* Only some unspecs are valid as "constants". */
5128 if (GET_CODE (x) == UNSPEC)
5129 switch (XINT (x, 1))
5130 {
5131 case ARC_UNSPEC_PLT:
5132 case ARC_UNSPEC_GOTOFF:
5133 case ARC_UNSPEC_GOT:
5134 case UNSPEC_PROF:
5135 return true;
5136
5137 default:
5138 gcc_unreachable ();
5139 }
5140
5141 /* We must have drilled down to a symbol. */
5142 if (arc_raw_symbolic_reference_mentioned_p (x, false))
5143 return false;
5144
5145 /* Return true. */
5146 break;
5147
5148 case LABEL_REF:
5149 case SYMBOL_REF:
5150 return false;
5151
5152 default:
5153 break;
5154 }
5155
5156 /* Otherwise we handle everything else in the move patterns. */
5157 return true;
5158 }
5159
5160 static bool
arc_legitimate_address_p(machine_mode mode,rtx x,bool strict)5161 arc_legitimate_address_p (machine_mode mode, rtx x, bool strict)
5162 {
5163 if (RTX_OK_FOR_BASE_P (x, strict))
5164 return true;
5165 if (LEGITIMATE_OFFSET_ADDRESS_P (mode, x, TARGET_INDEXED_LOADS, strict))
5166 return true;
5167 if (LEGITIMATE_SCALED_ADDRESS_P (mode, x, strict))
5168 return true;
5169 if (LEGITIMATE_SMALL_DATA_ADDRESS_P (x))
5170 return true;
5171 if (GET_CODE (x) == CONST_INT && LARGE_INT (INTVAL (x)))
5172 return true;
5173 if ((GET_MODE_SIZE (mode) != 16)
5174 && (GET_CODE (x) == SYMBOL_REF
5175 || GET_CODE (x) == LABEL_REF
5176 || GET_CODE (x) == CONST))
5177 {
5178 if (!flag_pic || arc_legitimate_pic_addr_p (x))
5179 return true;
5180 }
5181 if ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC
5182 || GET_CODE (x) == POST_DEC || GET_CODE (x) == POST_INC)
5183 && RTX_OK_FOR_BASE_P (XEXP (x, 0), strict))
5184 return true;
5185 /* We're restricted here by the `st' insn. */
5186 if ((GET_CODE (x) == PRE_MODIFY || GET_CODE (x) == POST_MODIFY)
5187 && GET_CODE (XEXP ((x), 1)) == PLUS
5188 && rtx_equal_p (XEXP ((x), 0), XEXP (XEXP (x, 1), 0))
5189 && LEGITIMATE_OFFSET_ADDRESS_P (QImode, XEXP (x, 1),
5190 TARGET_AUTO_MODIFY_REG, strict))
5191 return true;
5192 return false;
5193 }
5194
5195 /* Return true iff ADDR (a legitimate address expression)
5196 has an effect that depends on the machine mode it is used for. */
5197
5198 static bool
arc_mode_dependent_address_p(const_rtx addr,addr_space_t)5199 arc_mode_dependent_address_p (const_rtx addr, addr_space_t)
5200 {
5201 /* SYMBOL_REF is not mode dependent: it is either a small data reference,
5202 which is valid for loads and stores, or a limm offset, which is valid for
5203 loads. */
5204 /* Scaled indices are scaled by the access mode; likewise for scaled
5205 offsets, which are needed for maximum offset stores. */
5206 if (GET_CODE (addr) == PLUS
5207 && (GET_CODE (XEXP ((addr), 0)) == MULT
5208 || (CONST_INT_P (XEXP ((addr), 1))
5209 && !SMALL_INT (INTVAL (XEXP ((addr), 1))))))
5210 return true;
5211 return false;
5212 }
5213
5214 /* Determine if it's legal to put X into the constant pool. */
5215
5216 static bool
arc_cannot_force_const_mem(machine_mode mode,rtx x)5217 arc_cannot_force_const_mem (machine_mode mode, rtx x)
5218 {
5219 return !arc_legitimate_constant_p (mode, x);
5220 }
5221
5222 /* IDs for all the ARC builtins. */
5223
5224 enum arc_builtin_id
5225 {
5226 #define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK) \
5227 ARC_BUILTIN_ ## NAME,
5228 #include "builtins.def"
5229 #undef DEF_BUILTIN
5230
5231 ARC_BUILTIN_COUNT
5232 };
5233
5234 struct GTY(()) arc_builtin_description
5235 {
5236 enum insn_code icode;
5237 int n_args;
5238 tree fndecl;
5239 };
5240
5241 static GTY(()) struct arc_builtin_description
5242 arc_bdesc[ARC_BUILTIN_COUNT] =
5243 {
5244 #define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK) \
5245 { (enum insn_code) CODE_FOR_ ## ICODE, N_ARGS, NULL_TREE },
5246 #include "builtins.def"
5247 #undef DEF_BUILTIN
5248 };
5249
5250 /* Transform UP into lowercase and write the result to LO.
5251 You must provide enough space for LO. Return LO. */
5252
5253 static char*
arc_tolower(char * lo,const char * up)5254 arc_tolower (char *lo, const char *up)
5255 {
5256 char *lo0 = lo;
5257
5258 for (; *up; up++, lo++)
5259 *lo = TOLOWER (*up);
5260
5261 *lo = '\0';
5262
5263 return lo0;
5264 }
5265
5266 /* Implement `TARGET_BUILTIN_DECL'. */
5267
5268 static tree
arc_builtin_decl(unsigned id,bool initialize_p ATTRIBUTE_UNUSED)5269 arc_builtin_decl (unsigned id, bool initialize_p ATTRIBUTE_UNUSED)
5270 {
5271 if (id < ARC_BUILTIN_COUNT)
5272 return arc_bdesc[id].fndecl;
5273
5274 return error_mark_node;
5275 }
5276
5277 static void
arc_init_builtins(void)5278 arc_init_builtins (void)
5279 {
5280 tree pcvoid_type_node
5281 = build_pointer_type (build_qualified_type (void_type_node,
5282 TYPE_QUAL_CONST));
5283 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node,
5284 V8HImode);
5285
5286 tree void_ftype_void
5287 = build_function_type_list (void_type_node, NULL_TREE);
5288 tree int_ftype_int
5289 = build_function_type_list (integer_type_node, integer_type_node,
5290 NULL_TREE);
5291 tree int_ftype_pcvoid_int
5292 = build_function_type_list (integer_type_node, pcvoid_type_node,
5293 integer_type_node, NULL_TREE);
5294 tree void_ftype_usint_usint
5295 = build_function_type_list (void_type_node, long_unsigned_type_node,
5296 long_unsigned_type_node, NULL_TREE);
5297 tree int_ftype_int_int
5298 = build_function_type_list (integer_type_node, integer_type_node,
5299 integer_type_node, NULL_TREE);
5300 tree usint_ftype_usint
5301 = build_function_type_list (long_unsigned_type_node,
5302 long_unsigned_type_node, NULL_TREE);
5303 tree void_ftype_usint
5304 = build_function_type_list (void_type_node, long_unsigned_type_node,
5305 NULL_TREE);
5306 tree int_ftype_void
5307 = build_function_type_list (integer_type_node, void_type_node,
5308 NULL_TREE);
5309 tree void_ftype_int
5310 = build_function_type_list (void_type_node, integer_type_node,
5311 NULL_TREE);
5312 tree int_ftype_short
5313 = build_function_type_list (integer_type_node, short_integer_type_node,
5314 NULL_TREE);
5315
5316 /* Old ARC SIMD types. */
5317 tree v8hi_ftype_v8hi_v8hi
5318 = build_function_type_list (V8HI_type_node, V8HI_type_node,
5319 V8HI_type_node, NULL_TREE);
5320 tree v8hi_ftype_v8hi_int
5321 = build_function_type_list (V8HI_type_node, V8HI_type_node,
5322 integer_type_node, NULL_TREE);
5323 tree v8hi_ftype_v8hi_int_int
5324 = build_function_type_list (V8HI_type_node, V8HI_type_node,
5325 integer_type_node, integer_type_node,
5326 NULL_TREE);
5327 tree void_ftype_v8hi_int_int
5328 = build_function_type_list (void_type_node, V8HI_type_node,
5329 integer_type_node, integer_type_node,
5330 NULL_TREE);
5331 tree void_ftype_v8hi_int_int_int
5332 = build_function_type_list (void_type_node, V8HI_type_node,
5333 integer_type_node, integer_type_node,
5334 integer_type_node, NULL_TREE);
5335 tree v8hi_ftype_int_int
5336 = build_function_type_list (V8HI_type_node, integer_type_node,
5337 integer_type_node, NULL_TREE);
5338 tree void_ftype_int_int
5339 = build_function_type_list (void_type_node, integer_type_node,
5340 integer_type_node, NULL_TREE);
5341 tree v8hi_ftype_v8hi
5342 = build_function_type_list (V8HI_type_node, V8HI_type_node,
5343 NULL_TREE);
5344
5345 /* Add the builtins. */
5346 #define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK) \
5347 { \
5348 int id = ARC_BUILTIN_ ## NAME; \
5349 const char *Name = "__builtin_arc_" #NAME; \
5350 char *name = (char*) alloca (1 + strlen (Name)); \
5351 \
5352 gcc_assert (id < ARC_BUILTIN_COUNT); \
5353 if (MASK) \
5354 arc_bdesc[id].fndecl \
5355 = add_builtin_function (arc_tolower(name, Name), TYPE, id, \
5356 BUILT_IN_MD, NULL, NULL_TREE); \
5357 }
5358 #include "builtins.def"
5359 #undef DEF_BUILTIN
5360 }
5361
5362 /* Helper to expand __builtin_arc_aligned (void* val, int
5363 alignval). */
5364
5365 static rtx
arc_expand_builtin_aligned(tree exp)5366 arc_expand_builtin_aligned (tree exp)
5367 {
5368 tree arg0 = CALL_EXPR_ARG (exp, 0);
5369 tree arg1 = CALL_EXPR_ARG (exp, 1);
5370 fold (arg1);
5371 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5372 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5373
5374 if (!CONST_INT_P (op1))
5375 {
5376 /* If we can't fold the alignment to a constant integer
5377 whilst optimizing, this is probably a user error. */
5378 if (optimize)
5379 warning (0, "__builtin_arc_aligned with non-constant alignment");
5380 }
5381 else
5382 {
5383 HOST_WIDE_INT alignTest = INTVAL (op1);
5384 /* Check alignTest is positive, and a power of two. */
5385 if (alignTest <= 0 || alignTest != (alignTest & -alignTest))
5386 {
5387 error ("invalid alignment value for __builtin_arc_aligned");
5388 return NULL_RTX;
5389 }
5390
5391 if (CONST_INT_P (op0))
5392 {
5393 HOST_WIDE_INT pnt = INTVAL (op0);
5394
5395 if ((pnt & (alignTest - 1)) == 0)
5396 return const1_rtx;
5397 }
5398 else
5399 {
5400 unsigned align = get_pointer_alignment (arg0);
5401 unsigned numBits = alignTest * BITS_PER_UNIT;
5402
5403 if (align && align >= numBits)
5404 return const1_rtx;
5405 /* Another attempt to ascertain alignment. Check the type
5406 we are pointing to. */
5407 if (POINTER_TYPE_P (TREE_TYPE (arg0))
5408 && TYPE_ALIGN (TREE_TYPE (TREE_TYPE (arg0))) >= numBits)
5409 return const1_rtx;
5410 }
5411 }
5412
5413 /* Default to false. */
5414 return const0_rtx;
5415 }
5416
5417 /* Helper arc_expand_builtin, generates a pattern for the given icode
5418 and arguments. */
5419
5420 static rtx_insn *
apply_GEN_FCN(enum insn_code icode,rtx * arg)5421 apply_GEN_FCN (enum insn_code icode, rtx *arg)
5422 {
5423 switch (insn_data[icode].n_generator_args)
5424 {
5425 case 0:
5426 return GEN_FCN (icode) ();
5427 case 1:
5428 return GEN_FCN (icode) (arg[0]);
5429 case 2:
5430 return GEN_FCN (icode) (arg[0], arg[1]);
5431 case 3:
5432 return GEN_FCN (icode) (arg[0], arg[1], arg[2]);
5433 case 4:
5434 return GEN_FCN (icode) (arg[0], arg[1], arg[2], arg[3]);
5435 case 5:
5436 return GEN_FCN (icode) (arg[0], arg[1], arg[2], arg[3], arg[4]);
5437 default:
5438 gcc_unreachable ();
5439 }
5440 }
5441
5442 /* Expand an expression EXP that calls a built-in function,
5443 with result going to TARGET if that's convenient
5444 (and in mode MODE if that's convenient).
5445 SUBTARGET may be used as the target for computing one of EXP's operands.
5446 IGNORE is nonzero if the value is to be ignored. */
5447
5448 static rtx
arc_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)5449 arc_expand_builtin (tree exp,
5450 rtx target,
5451 rtx subtarget ATTRIBUTE_UNUSED,
5452 machine_mode mode ATTRIBUTE_UNUSED,
5453 int ignore ATTRIBUTE_UNUSED)
5454 {
5455 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
5456 unsigned int id = DECL_FUNCTION_CODE (fndecl);
5457 const struct arc_builtin_description *d = &arc_bdesc[id];
5458 int i, j, n_args = call_expr_nargs (exp);
5459 rtx pat = NULL_RTX;
5460 rtx xop[5];
5461 enum insn_code icode = d->icode;
5462 machine_mode tmode = insn_data[icode].operand[0].mode;
5463 int nonvoid;
5464 tree arg0;
5465 tree arg1;
5466 tree arg2;
5467 tree arg3;
5468 rtx op0;
5469 rtx op1;
5470 rtx op2;
5471 rtx op3;
5472 rtx op4;
5473 machine_mode mode0;
5474 machine_mode mode1;
5475 machine_mode mode2;
5476 machine_mode mode3;
5477 machine_mode mode4;
5478
5479 if (id >= ARC_BUILTIN_COUNT)
5480 internal_error ("bad builtin fcode");
5481
5482 /* 1st part: Expand special builtins. */
5483 switch (id)
5484 {
5485 case ARC_BUILTIN_NOP:
5486 emit_insn (gen_nopv ());
5487 return NULL_RTX;
5488
5489 case ARC_BUILTIN_RTIE:
5490 case ARC_BUILTIN_SYNC:
5491 case ARC_BUILTIN_BRK:
5492 case ARC_BUILTIN_SWI:
5493 case ARC_BUILTIN_UNIMP_S:
5494 gcc_assert (icode != 0);
5495 emit_insn (GEN_FCN (icode) (const1_rtx));
5496 return NULL_RTX;
5497
5498 case ARC_BUILTIN_ALIGNED:
5499 return arc_expand_builtin_aligned (exp);
5500
5501 case ARC_BUILTIN_CLRI:
5502 target = gen_reg_rtx (SImode);
5503 emit_insn (gen_clri (target, const1_rtx));
5504 return target;
5505
5506 case ARC_BUILTIN_TRAP_S:
5507 case ARC_BUILTIN_SLEEP:
5508 arg0 = CALL_EXPR_ARG (exp, 0);
5509 fold (arg0);
5510 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5511
5512 if (!CONST_INT_P (op0) || !satisfies_constraint_L (op0))
5513 {
5514 error ("builtin operand should be an unsigned 6-bit value");
5515 return NULL_RTX;
5516 }
5517 gcc_assert (icode != 0);
5518 emit_insn (GEN_FCN (icode) (op0));
5519 return NULL_RTX;
5520
5521 case ARC_BUILTIN_VDORUN:
5522 case ARC_BUILTIN_VDIRUN:
5523 arg0 = CALL_EXPR_ARG (exp, 0);
5524 arg1 = CALL_EXPR_ARG (exp, 1);
5525 op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
5526 op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
5527
5528 target = gen_rtx_REG (SImode, (id == ARC_BUILTIN_VDIRUN) ? 131 : 139);
5529
5530 mode0 = insn_data[icode].operand[1].mode;
5531 mode1 = insn_data[icode].operand[2].mode;
5532
5533 if (!insn_data[icode].operand[1].predicate (op0, mode0))
5534 op0 = copy_to_mode_reg (mode0, op0);
5535
5536 if (!insn_data[icode].operand[2].predicate (op1, mode1))
5537 op1 = copy_to_mode_reg (mode1, op1);
5538
5539 pat = GEN_FCN (icode) (target, op0, op1);
5540 if (!pat)
5541 return NULL_RTX;
5542
5543 emit_insn (pat);
5544 return NULL_RTX;
5545
5546 case ARC_BUILTIN_VDIWR:
5547 case ARC_BUILTIN_VDOWR:
5548 arg0 = CALL_EXPR_ARG (exp, 0);
5549 arg1 = CALL_EXPR_ARG (exp, 1);
5550 op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
5551 op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
5552
5553 if (!CONST_INT_P (op0)
5554 || !(UNSIGNED_INT3 (INTVAL (op0))))
5555 error ("operand 1 should be an unsigned 3-bit immediate");
5556
5557 mode1 = insn_data[icode].operand[1].mode;
5558
5559 if (icode == CODE_FOR_vdiwr_insn)
5560 target = gen_rtx_REG (SImode,
5561 ARC_FIRST_SIMD_DMA_CONFIG_IN_REG + INTVAL (op0));
5562 else if (icode == CODE_FOR_vdowr_insn)
5563 target = gen_rtx_REG (SImode,
5564 ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG + INTVAL (op0));
5565 else
5566 gcc_unreachable ();
5567
5568 if (!insn_data[icode].operand[2].predicate (op1, mode1))
5569 op1 = copy_to_mode_reg (mode1, op1);
5570
5571 pat = GEN_FCN (icode) (target, op1);
5572 if (!pat)
5573 return NULL_RTX;
5574
5575 emit_insn (pat);
5576 return NULL_RTX;
5577
5578 case ARC_BUILTIN_VASRW:
5579 case ARC_BUILTIN_VSR8:
5580 case ARC_BUILTIN_VSR8AW:
5581 arg0 = CALL_EXPR_ARG (exp, 0);
5582 arg1 = CALL_EXPR_ARG (exp, 1);
5583 op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
5584 op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
5585 op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
5586
5587 target = gen_reg_rtx (V8HImode);
5588 mode0 = insn_data[icode].operand[1].mode;
5589 mode1 = insn_data[icode].operand[2].mode;
5590
5591 if (!insn_data[icode].operand[1].predicate (op0, mode0))
5592 op0 = copy_to_mode_reg (mode0, op0);
5593
5594 if ((!insn_data[icode].operand[2].predicate (op1, mode1))
5595 || !(UNSIGNED_INT3 (INTVAL (op1))))
5596 error ("operand 2 should be an unsigned 3-bit value (I0-I7)");
5597
5598 pat = GEN_FCN (icode) (target, op0, op1, op2);
5599 if (!pat)
5600 return NULL_RTX;
5601
5602 emit_insn (pat);
5603 return target;
5604
5605 case ARC_BUILTIN_VLD32WH:
5606 case ARC_BUILTIN_VLD32WL:
5607 case ARC_BUILTIN_VLD64:
5608 case ARC_BUILTIN_VLD32:
5609 rtx src_vreg;
5610 icode = d->icode;
5611 arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg. */
5612 arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7. */
5613 arg2 = CALL_EXPR_ARG (exp, 2); /* u8. */
5614
5615 src_vreg = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
5616 op0 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
5617 op1 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL);
5618 op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
5619
5620 /* target <- src vreg. */
5621 emit_insn (gen_move_insn (target, src_vreg));
5622
5623 /* target <- vec_concat: target, mem (Ib, u8). */
5624 mode0 = insn_data[icode].operand[3].mode;
5625 mode1 = insn_data[icode].operand[1].mode;
5626
5627 if ((!insn_data[icode].operand[3].predicate (op0, mode0))
5628 || !(UNSIGNED_INT3 (INTVAL (op0))))
5629 error ("operand 1 should be an unsigned 3-bit value (I0-I7)");
5630
5631 if ((!insn_data[icode].operand[1].predicate (op1, mode1))
5632 || !(UNSIGNED_INT8 (INTVAL (op1))))
5633 error ("operand 2 should be an unsigned 8-bit value");
5634
5635 pat = GEN_FCN (icode) (target, op1, op2, op0);
5636 if (!pat)
5637 return NULL_RTX;
5638
5639 emit_insn (pat);
5640 return target;
5641
5642 case ARC_BUILTIN_VLD64W:
5643 case ARC_BUILTIN_VLD128:
5644 arg0 = CALL_EXPR_ARG (exp, 0); /* dest vreg. */
5645 arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7. */
5646
5647 op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
5648 op1 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
5649 op2 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
5650
5651 /* target <- src vreg. */
5652 target = gen_reg_rtx (V8HImode);
5653
5654 /* target <- vec_concat: target, mem (Ib, u8). */
5655 mode0 = insn_data[icode].operand[1].mode;
5656 mode1 = insn_data[icode].operand[2].mode;
5657 mode2 = insn_data[icode].operand[3].mode;
5658
5659 if ((!insn_data[icode].operand[2].predicate (op1, mode1))
5660 || !(UNSIGNED_INT3 (INTVAL (op1))))
5661 error ("operand 1 should be an unsigned 3-bit value (I0-I7)");
5662
5663 if ((!insn_data[icode].operand[3].predicate (op2, mode2))
5664 || !(UNSIGNED_INT8 (INTVAL (op2))))
5665 error ("operand 2 should be an unsigned 8-bit value");
5666
5667 pat = GEN_FCN (icode) (target, op0, op1, op2);
5668
5669 if (!pat)
5670 return NULL_RTX;
5671
5672 emit_insn (pat);
5673 return target;
5674
5675 case ARC_BUILTIN_VST128:
5676 case ARC_BUILTIN_VST64:
5677 arg0 = CALL_EXPR_ARG (exp, 0); /* src vreg. */
5678 arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7. */
5679 arg2 = CALL_EXPR_ARG (exp, 2); /* u8. */
5680
5681 op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
5682 op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
5683 op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL);
5684 op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
5685
5686 mode0 = insn_data[icode].operand[0].mode;
5687 mode1 = insn_data[icode].operand[1].mode;
5688 mode2 = insn_data[icode].operand[2].mode;
5689 mode3 = insn_data[icode].operand[3].mode;
5690
5691 if ((!insn_data[icode].operand[1].predicate (op1, mode1))
5692 || !(UNSIGNED_INT3 (INTVAL (op1))))
5693 error ("operand 2 should be an unsigned 3-bit value (I0-I7)");
5694
5695 if ((!insn_data[icode].operand[2].predicate (op2, mode2))
5696 || !(UNSIGNED_INT8 (INTVAL (op2))))
5697 error ("operand 3 should be an unsigned 8-bit value");
5698
5699 if (!insn_data[icode].operand[3].predicate (op3, mode3))
5700 op3 = copy_to_mode_reg (mode3, op3);
5701
5702 pat = GEN_FCN (icode) (op0, op1, op2, op3);
5703 if (!pat)
5704 return NULL_RTX;
5705
5706 emit_insn (pat);
5707 return NULL_RTX;
5708
5709 case ARC_BUILTIN_VST16_N:
5710 case ARC_BUILTIN_VST32_N:
5711 arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg. */
5712 arg1 = CALL_EXPR_ARG (exp, 1); /* u3. */
5713 arg2 = CALL_EXPR_ARG (exp, 2); /* [I]0-7. */
5714 arg3 = CALL_EXPR_ARG (exp, 3); /* u8. */
5715
5716 op0 = expand_expr (arg3, NULL_RTX, SImode, EXPAND_NORMAL);
5717 op1 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
5718 op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL);
5719 op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
5720 op4 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
5721
5722 mode0 = insn_data[icode].operand[0].mode;
5723 mode2 = insn_data[icode].operand[2].mode;
5724 mode3 = insn_data[icode].operand[3].mode;
5725 mode4 = insn_data[icode].operand[4].mode;
5726
5727 /* Do some correctness checks for the operands. */
5728 if ((!insn_data[icode].operand[0].predicate (op0, mode0))
5729 || !(UNSIGNED_INT8 (INTVAL (op0))))
5730 error ("operand 4 should be an unsigned 8-bit value (0-255)");
5731
5732 if ((!insn_data[icode].operand[2].predicate (op2, mode2))
5733 || !(UNSIGNED_INT3 (INTVAL (op2))))
5734 error ("operand 3 should be an unsigned 3-bit value (I0-I7)");
5735
5736 if (!insn_data[icode].operand[3].predicate (op3, mode3))
5737 op3 = copy_to_mode_reg (mode3, op3);
5738
5739 if ((!insn_data[icode].operand[4].predicate (op4, mode4))
5740 || !(UNSIGNED_INT3 (INTVAL (op4))))
5741 error ("operand 2 should be an unsigned 3-bit value (subreg 0-7)");
5742 else if (icode == CODE_FOR_vst32_n_insn
5743 && ((INTVAL (op4) % 2) != 0))
5744 error ("operand 2 should be an even 3-bit value (subreg 0,2,4,6)");
5745
5746 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
5747 if (!pat)
5748 return NULL_RTX;
5749
5750 emit_insn (pat);
5751 return NULL_RTX;
5752
5753 default:
5754 break;
5755 }
5756
5757 /* 2nd part: Expand regular builtins. */
5758 if (icode == 0)
5759 internal_error ("bad builtin fcode");
5760
5761 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
5762 j = 0;
5763
5764 if (nonvoid)
5765 {
5766 if (target == NULL_RTX
5767 || GET_MODE (target) != tmode
5768 || !insn_data[icode].operand[0].predicate (target, tmode))
5769 {
5770 target = gen_reg_rtx (tmode);
5771 }
5772 xop[j++] = target;
5773 }
5774
5775 gcc_assert (n_args <= 4);
5776 for (i = 0; i < n_args; i++, j++)
5777 {
5778 tree arg = CALL_EXPR_ARG (exp, i);
5779 machine_mode mode = insn_data[icode].operand[j].mode;
5780 rtx op = expand_expr (arg, NULL_RTX, mode, EXPAND_NORMAL);
5781 machine_mode opmode = GET_MODE (op);
5782 char c = insn_data[icode].operand[j].constraint[0];
5783
5784 /* SIMD extension requires exact immediate operand match. */
5785 if ((id > ARC_BUILTIN_SIMD_BEGIN)
5786 && (id < ARC_BUILTIN_SIMD_END)
5787 && (c != 'v')
5788 && (c != 'r'))
5789 {
5790 if (!CONST_INT_P (op))
5791 error ("builtin requires an immediate for operand %d", j);
5792 switch (c)
5793 {
5794 case 'L':
5795 if (!satisfies_constraint_L (op))
5796 error ("operand %d should be a 6 bit unsigned immediate", j);
5797 break;
5798 case 'P':
5799 if (!satisfies_constraint_P (op))
5800 error ("operand %d should be a 8 bit unsigned immediate", j);
5801 break;
5802 case 'K':
5803 if (!satisfies_constraint_K (op))
5804 error ("operand %d should be a 3 bit unsigned immediate", j);
5805 break;
5806 default:
5807 error ("unknown builtin immediate operand type for operand %d",
5808 j);
5809 }
5810 }
5811
5812 if (CONST_INT_P (op))
5813 opmode = mode;
5814
5815 if ((opmode == SImode) && (mode == HImode))
5816 {
5817 opmode = HImode;
5818 op = gen_lowpart (HImode, op);
5819 }
5820
5821 /* In case the insn wants input operands in modes different from
5822 the result, abort. */
5823 gcc_assert (opmode == mode || opmode == VOIDmode);
5824
5825 if (!insn_data[icode].operand[i + nonvoid].predicate (op, mode))
5826 op = copy_to_mode_reg (mode, op);
5827
5828 xop[j] = op;
5829 }
5830
5831 pat = apply_GEN_FCN (icode, xop);
5832 if (pat == NULL_RTX)
5833 return NULL_RTX;
5834
5835 emit_insn (pat);
5836
5837 if (nonvoid)
5838 return target;
5839 else
5840 return const0_rtx;
5841 }
5842
5843 /* Returns true if the operands[opno] is a valid compile-time constant to be
5844 used as register number in the code for builtins. Else it flags an error
5845 and returns false. */
5846
5847 bool
check_if_valid_regno_const(rtx * operands,int opno)5848 check_if_valid_regno_const (rtx *operands, int opno)
5849 {
5850
5851 switch (GET_CODE (operands[opno]))
5852 {
5853 case SYMBOL_REF :
5854 case CONST :
5855 case CONST_INT :
5856 return true;
5857 default:
5858 error ("register number must be a compile-time constant. Try giving higher optimization levels");
5859 break;
5860 }
5861 return false;
5862 }
5863
5864 /* Check that after all the constant folding, whether the operand to
5865 __builtin_arc_sleep is an unsigned int of 6 bits. If not, flag an error. */
5866
5867 bool
check_if_valid_sleep_operand(rtx * operands,int opno)5868 check_if_valid_sleep_operand (rtx *operands, int opno)
5869 {
5870 switch (GET_CODE (operands[opno]))
5871 {
5872 case CONST :
5873 case CONST_INT :
5874 if( UNSIGNED_INT6 (INTVAL (operands[opno])))
5875 return true;
5876 default:
5877 fatal_error (input_location,
5878 "operand for sleep instruction must be an unsigned 6 bit compile-time constant");
5879 break;
5880 }
5881 return false;
5882 }
5883
5884 /* Return true if it is ok to make a tail-call to DECL. */
5885
5886 static bool
arc_function_ok_for_sibcall(tree decl ATTRIBUTE_UNUSED,tree exp ATTRIBUTE_UNUSED)5887 arc_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
5888 tree exp ATTRIBUTE_UNUSED)
5889 {
5890 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5891 if (ARC_INTERRUPT_P (arc_compute_function_type (cfun)))
5892 return false;
5893
5894 /* Everything else is ok. */
5895 return true;
5896 }
5897
5898 /* Output code to add DELTA to the first argument, and then jump
5899 to FUNCTION. Used for C++ multiple inheritance. */
5900
5901 static void
arc_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)5902 arc_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
5903 HOST_WIDE_INT delta,
5904 HOST_WIDE_INT vcall_offset,
5905 tree function)
5906 {
5907 int mi_delta = delta;
5908 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
5909 int shift = 0;
5910 int this_regno
5911 = aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function) ? 1 : 0;
5912 rtx fnaddr;
5913
5914 if (mi_delta < 0)
5915 mi_delta = - mi_delta;
5916
5917 /* Add DELTA. When possible use a plain add, otherwise load it into
5918 a register first. */
5919
5920 while (mi_delta != 0)
5921 {
5922 if ((mi_delta & (3 << shift)) == 0)
5923 shift += 2;
5924 else
5925 {
5926 asm_fprintf (file, "\t%s\t%s, %s, %d\n",
5927 mi_op, reg_names[this_regno], reg_names[this_regno],
5928 mi_delta & (0xff << shift));
5929 mi_delta &= ~(0xff << shift);
5930 shift += 8;
5931 }
5932 }
5933
5934 /* If needed, add *(*THIS + VCALL_OFFSET) to THIS. */
5935 if (vcall_offset != 0)
5936 {
5937 /* ld r12,[this] --> temp = *this
5938 add r12,r12,vcall_offset --> temp = *(*this + vcall_offset)
5939 ld r12,[r12]
5940 add this,this,r12 --> this+ = *(*this + vcall_offset) */
5941 asm_fprintf (file, "\tld\t%s, [%s]\n",
5942 ARC_TEMP_SCRATCH_REG, reg_names[this_regno]);
5943 asm_fprintf (file, "\tadd\t%s, %s, " HOST_WIDE_INT_PRINT_DEC "\n",
5944 ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG, vcall_offset);
5945 asm_fprintf (file, "\tld\t%s, [%s]\n",
5946 ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG);
5947 asm_fprintf (file, "\tadd\t%s, %s, %s\n", reg_names[this_regno],
5948 reg_names[this_regno], ARC_TEMP_SCRATCH_REG);
5949 }
5950
5951 fnaddr = XEXP (DECL_RTL (function), 0);
5952
5953 if (arc_is_longcall_p (fnaddr))
5954 fputs ("\tj\t", file);
5955 else
5956 fputs ("\tb\t", file);
5957 assemble_name (file, XSTR (fnaddr, 0));
5958 fputc ('\n', file);
5959 }
5960
5961 /* Return true if a 32 bit "long_call" should be generated for
5962 this calling SYM_REF. We generate a long_call if the function:
5963
5964 a. has an __attribute__((long call))
5965 or b. the -mlong-calls command line switch has been specified
5966
5967 However we do not generate a long call if the function has an
5968 __attribute__ ((short_call)) or __attribute__ ((medium_call))
5969
5970 This function will be called by C fragments contained in the machine
5971 description file. */
5972
5973 bool
arc_is_longcall_p(rtx sym_ref)5974 arc_is_longcall_p (rtx sym_ref)
5975 {
5976 if (GET_CODE (sym_ref) != SYMBOL_REF)
5977 return false;
5978
5979 return (SYMBOL_REF_LONG_CALL_P (sym_ref)
5980 || (TARGET_LONG_CALLS_SET
5981 && !SYMBOL_REF_SHORT_CALL_P (sym_ref)
5982 && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref)));
5983
5984 }
5985
5986 /* Likewise for short calls. */
5987
5988 bool
arc_is_shortcall_p(rtx sym_ref)5989 arc_is_shortcall_p (rtx sym_ref)
5990 {
5991 if (GET_CODE (sym_ref) != SYMBOL_REF)
5992 return false;
5993
5994 return (SYMBOL_REF_SHORT_CALL_P (sym_ref)
5995 || (!TARGET_LONG_CALLS_SET && !TARGET_MEDIUM_CALLS
5996 && !SYMBOL_REF_LONG_CALL_P (sym_ref)
5997 && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref)));
5998
5999 }
6000
6001 /* Emit profiling code for calling CALLEE. Return true if a special
6002 call pattern needs to be generated. */
6003
6004 bool
arc_profile_call(rtx callee)6005 arc_profile_call (rtx callee)
6006 {
6007 rtx from = XEXP (DECL_RTL (current_function_decl), 0);
6008
6009 if (TARGET_UCB_MCOUNT)
6010 /* Profiling is done by instrumenting the callee. */
6011 return false;
6012
6013 if (CONSTANT_P (callee))
6014 {
6015 rtx count_ptr
6016 = gen_rtx_CONST (Pmode,
6017 gen_rtx_UNSPEC (Pmode,
6018 gen_rtvec (3, from, callee,
6019 CONST0_RTX (Pmode)),
6020 UNSPEC_PROF));
6021 rtx counter = gen_rtx_MEM (SImode, count_ptr);
6022 /* ??? The increment would better be done atomically, but as there is
6023 no proper hardware support, that would be too expensive. */
6024 emit_move_insn (counter,
6025 force_reg (SImode, plus_constant (SImode, counter, 1)));
6026 return false;
6027 }
6028 else
6029 {
6030 rtx count_list_ptr
6031 = gen_rtx_CONST (Pmode,
6032 gen_rtx_UNSPEC (Pmode,
6033 gen_rtvec (3, from, CONST0_RTX (Pmode),
6034 CONST0_RTX (Pmode)),
6035 UNSPEC_PROF));
6036 emit_move_insn (gen_rtx_REG (Pmode, 8), count_list_ptr);
6037 emit_move_insn (gen_rtx_REG (Pmode, 9), callee);
6038 return true;
6039 }
6040 }
6041
6042 /* Worker function for TARGET_RETURN_IN_MEMORY. */
6043
6044 static bool
arc_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)6045 arc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6046 {
6047 if (AGGREGATE_TYPE_P (type) || TREE_ADDRESSABLE (type))
6048 return true;
6049 else
6050 {
6051 HOST_WIDE_INT size = int_size_in_bytes (type);
6052 return (size == -1 || size > (TARGET_V2 ? 16 : 8));
6053 }
6054 }
6055
6056
6057 /* This was in rtlanal.c, and can go in there when we decide we want
6058 to submit the change for inclusion in the GCC tree. */
6059 /* Like note_stores, but allow the callback to have side effects on the rtl
6060 (like the note_stores of yore):
6061 Call FUN on each register or MEM that is stored into or clobbered by X.
6062 (X would be the pattern of an insn). DATA is an arbitrary pointer,
6063 ignored by note_stores, but passed to FUN.
6064 FUN may alter parts of the RTL.
6065
6066 FUN receives three arguments:
6067 1. the REG, MEM, CC0 or PC being stored in or clobbered,
6068 2. the SET or CLOBBER rtx that does the store,
6069 3. the pointer DATA provided to note_stores.
6070
6071 If the item being stored in or clobbered is a SUBREG of a hard register,
6072 the SUBREG will be passed. */
6073
6074 /* For now. */ static
6075 void
walk_stores(rtx x,void (* fun)(rtx,rtx,void *),void * data)6076 walk_stores (rtx x, void (*fun) (rtx, rtx, void *), void *data)
6077 {
6078 int i;
6079
6080 if (GET_CODE (x) == COND_EXEC)
6081 x = COND_EXEC_CODE (x);
6082
6083 if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
6084 {
6085 rtx dest = SET_DEST (x);
6086
6087 while ((GET_CODE (dest) == SUBREG
6088 && (!REG_P (SUBREG_REG (dest))
6089 || REGNO (SUBREG_REG (dest)) >= FIRST_PSEUDO_REGISTER))
6090 || GET_CODE (dest) == ZERO_EXTRACT
6091 || GET_CODE (dest) == STRICT_LOW_PART)
6092 dest = XEXP (dest, 0);
6093
6094 /* If we have a PARALLEL, SET_DEST is a list of EXPR_LIST expressions,
6095 each of whose first operand is a register. */
6096 if (GET_CODE (dest) == PARALLEL)
6097 {
6098 for (i = XVECLEN (dest, 0) - 1; i >= 0; i--)
6099 if (XEXP (XVECEXP (dest, 0, i), 0) != 0)
6100 (*fun) (XEXP (XVECEXP (dest, 0, i), 0), x, data);
6101 }
6102 else
6103 (*fun) (dest, x, data);
6104 }
6105
6106 else if (GET_CODE (x) == PARALLEL)
6107 for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
6108 walk_stores (XVECEXP (x, 0, i), fun, data);
6109 }
6110
6111 static bool
arc_pass_by_reference(cumulative_args_t ca_v ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,const_tree type,bool named ATTRIBUTE_UNUSED)6112 arc_pass_by_reference (cumulative_args_t ca_v ATTRIBUTE_UNUSED,
6113 machine_mode mode ATTRIBUTE_UNUSED,
6114 const_tree type,
6115 bool named ATTRIBUTE_UNUSED)
6116 {
6117 return (type != 0
6118 && (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
6119 || TREE_ADDRESSABLE (type)));
6120 }
6121
6122 /* Implement TARGET_CAN_USE_DOLOOP_P. */
6123
6124 static bool
arc_can_use_doloop_p(const widest_int & iterations,const widest_int &,unsigned int loop_depth,bool entered_at_top)6125 arc_can_use_doloop_p (const widest_int &iterations, const widest_int &,
6126 unsigned int loop_depth, bool entered_at_top)
6127 {
6128 if (loop_depth > 1)
6129 return false;
6130 /* Setting up the loop with two sr instructions costs 6 cycles. */
6131 if (TARGET_ARC700
6132 && !entered_at_top
6133 && wi::gtu_p (iterations, 0)
6134 && wi::leu_p (iterations, flag_pic ? 6 : 3))
6135 return false;
6136 return true;
6137 }
6138
6139 /* NULL if INSN insn is valid within a low-overhead loop.
6140 Otherwise return why doloop cannot be applied. */
6141
6142 static const char *
arc_invalid_within_doloop(const rtx_insn * insn)6143 arc_invalid_within_doloop (const rtx_insn *insn)
6144 {
6145 if (CALL_P (insn))
6146 return "Function call in the loop.";
6147 return NULL;
6148 }
6149
6150 /* The same functionality as arc_hazard. It is called in machine
6151 reorg before any other optimization. Hence, the NOP size is taken
6152 into account when doing branch shortening. */
6153
6154 static void
workaround_arc_anomaly(void)6155 workaround_arc_anomaly (void)
6156 {
6157 rtx_insn *insn, *succ0;
6158
6159 /* For any architecture: call arc_hazard here. */
6160 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6161 {
6162 succ0 = next_real_insn (insn);
6163 if (arc_hazard (insn, succ0))
6164 {
6165 emit_insn_before (gen_nopv (), succ0);
6166 }
6167 }
6168 }
6169
6170 static int arc_reorg_in_progress = 0;
6171
6172 /* ARC's machince specific reorg function. */
6173
6174 static void
arc_reorg(void)6175 arc_reorg (void)
6176 {
6177 rtx_insn *insn;
6178 rtx pattern;
6179 rtx pc_target;
6180 long offset;
6181 int changed;
6182
6183 workaround_arc_anomaly ();
6184
6185 cfun->machine->arc_reorg_started = 1;
6186 arc_reorg_in_progress = 1;
6187
6188 /* Emit special sections for profiling. */
6189 if (crtl->profile)
6190 {
6191 section *save_text_section;
6192 rtx_insn *insn;
6193 int size = get_max_uid () >> 4;
6194 htab_t htab = htab_create (size, unspec_prof_hash, unspec_prof_htab_eq,
6195 NULL);
6196
6197 save_text_section = in_section;
6198 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6199 if (NONJUMP_INSN_P (insn))
6200 walk_stores (PATTERN (insn), write_profile_sections, htab);
6201 if (htab_elements (htab))
6202 in_section = 0;
6203 switch_to_section (save_text_section);
6204 htab_delete (htab);
6205 }
6206
6207 /* Link up loop ends with their loop start. */
6208 {
6209 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6210 if (GET_CODE (insn) == JUMP_INSN
6211 && recog_memoized (insn) == CODE_FOR_doloop_end_i)
6212 {
6213 rtx_insn *top_label
6214 = as_a <rtx_insn *> (XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 1), 0));
6215 rtx num = GEN_INT (CODE_LABEL_NUMBER (top_label));
6216 rtx_insn *lp, *prev = prev_nonnote_insn (top_label);
6217 rtx_insn *lp_simple = NULL;
6218 rtx_insn *next = NULL;
6219 rtx op0 = XEXP (XVECEXP (PATTERN (insn), 0, 1), 0);
6220 HOST_WIDE_INT loop_end_id
6221 = -INTVAL (XEXP (XVECEXP (PATTERN (insn), 0, 4), 0));
6222 int seen_label = 0;
6223
6224 for (lp = prev;
6225 (lp && NONJUMP_INSN_P (lp)
6226 && recog_memoized (lp) != CODE_FOR_doloop_begin_i);
6227 lp = prev_nonnote_insn (lp))
6228 ;
6229 if (!lp || !NONJUMP_INSN_P (lp)
6230 || dead_or_set_regno_p (lp, LP_COUNT))
6231 {
6232 for (prev = next = insn, lp = NULL ; prev || next;)
6233 {
6234 if (prev)
6235 {
6236 if (NONJUMP_INSN_P (prev)
6237 && recog_memoized (prev) == CODE_FOR_doloop_begin_i
6238 && (INTVAL (XEXP (XVECEXP (PATTERN (prev), 0, 5), 0))
6239 == loop_end_id))
6240 {
6241 lp = prev;
6242 break;
6243 }
6244 else if (LABEL_P (prev))
6245 seen_label = 1;
6246 prev = prev_nonnote_insn (prev);
6247 }
6248 if (next)
6249 {
6250 if (NONJUMP_INSN_P (next)
6251 && recog_memoized (next) == CODE_FOR_doloop_begin_i
6252 && (INTVAL (XEXP (XVECEXP (PATTERN (next), 0, 5), 0))
6253 == loop_end_id))
6254 {
6255 lp = next;
6256 break;
6257 }
6258 next = next_nonnote_insn (next);
6259 }
6260 }
6261 prev = NULL;
6262 }
6263 else
6264 lp_simple = lp;
6265 if (lp && !dead_or_set_regno_p (lp, LP_COUNT))
6266 {
6267 rtx begin_cnt = XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0);
6268 if (INTVAL (XEXP (XVECEXP (PATTERN (lp), 0, 4), 0)))
6269 /* The loop end insn has been duplicated. That can happen
6270 when there is a conditional block at the very end of
6271 the loop. */
6272 goto failure;
6273 /* If Register allocation failed to allocate to the right
6274 register, There is no point into teaching reload to
6275 fix this up with reloads, as that would cost more
6276 than using an ordinary core register with the
6277 doloop_fallback pattern. */
6278 if ((true_regnum (op0) != LP_COUNT || !REG_P (begin_cnt))
6279 /* Likewise, if the loop setup is evidently inside the loop,
6280 we loose. */
6281 || (!lp_simple && lp != next && !seen_label))
6282 {
6283 remove_insn (lp);
6284 goto failure;
6285 }
6286 /* It is common that the optimizers copy the loop count from
6287 another register, and doloop_begin_i is stuck with the
6288 source of the move. Making doloop_begin_i only accept "l"
6289 is nonsentical, as this then makes reload evict the pseudo
6290 used for the loop end. The underlying cause is that the
6291 optimizers don't understand that the register allocation for
6292 doloop_begin_i should be treated as part of the loop.
6293 Try to work around this problem by verifying the previous
6294 move exists. */
6295 if (true_regnum (begin_cnt) != LP_COUNT)
6296 {
6297 rtx_insn *mov;
6298 rtx set, note;
6299
6300 for (mov = prev_nonnote_insn (lp); mov;
6301 mov = prev_nonnote_insn (mov))
6302 {
6303 if (!NONJUMP_INSN_P (mov))
6304 mov = 0;
6305 else if ((set = single_set (mov))
6306 && rtx_equal_p (SET_SRC (set), begin_cnt)
6307 && rtx_equal_p (SET_DEST (set), op0))
6308 break;
6309 }
6310 if (mov)
6311 {
6312 XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0) = op0;
6313 note = find_regno_note (lp, REG_DEAD, REGNO (begin_cnt));
6314 if (note)
6315 remove_note (lp, note);
6316 }
6317 else
6318 {
6319 remove_insn (lp);
6320 goto failure;
6321 }
6322 }
6323 XEXP (XVECEXP (PATTERN (insn), 0, 4), 0) = num;
6324 XEXP (XVECEXP (PATTERN (lp), 0, 4), 0) = num;
6325 if (next == lp)
6326 XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const2_rtx;
6327 else if (!lp_simple)
6328 XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const1_rtx;
6329 else if (prev != lp)
6330 {
6331 remove_insn (lp);
6332 add_insn_after (lp, prev, NULL);
6333 }
6334 if (!lp_simple)
6335 {
6336 XEXP (XVECEXP (PATTERN (lp), 0, 7), 0)
6337 = gen_rtx_LABEL_REF (Pmode, top_label);
6338 add_reg_note (lp, REG_LABEL_OPERAND, top_label);
6339 LABEL_NUSES (top_label)++;
6340 }
6341 /* We can avoid tedious loop start / end setting for empty loops
6342 be merely setting the loop count to its final value. */
6343 if (next_active_insn (top_label) == insn)
6344 {
6345 rtx lc_set
6346 = gen_rtx_SET (XEXP (XVECEXP (PATTERN (lp), 0, 3), 0),
6347 const0_rtx);
6348
6349 rtx_insn *lc_set_insn = emit_insn_before (lc_set, insn);
6350 delete_insn (lp);
6351 delete_insn (insn);
6352 insn = lc_set_insn;
6353 }
6354 /* If the loop is non-empty with zero length, we can't make it
6355 a zero-overhead loop. That can happen for empty asms. */
6356 else
6357 {
6358 rtx_insn *scan;
6359
6360 for (scan = top_label;
6361 (scan && scan != insn
6362 && (!NONJUMP_INSN_P (scan) || !get_attr_length (scan)));
6363 scan = NEXT_INSN (scan));
6364 if (scan == insn)
6365 {
6366 remove_insn (lp);
6367 goto failure;
6368 }
6369 }
6370 }
6371 else
6372 {
6373 /* Sometimes the loop optimizer makes a complete hash of the
6374 loop. If it were only that the loop is not entered at the
6375 top, we could fix this up by setting LP_START with SR .
6376 However, if we can't find the loop begin were it should be,
6377 chances are that it does not even dominate the loop, but is
6378 inside the loop instead. Using SR there would kill
6379 performance.
6380 We use the doloop_fallback pattern here, which executes
6381 in two cycles on the ARC700 when predicted correctly. */
6382 failure:
6383 if (!REG_P (op0))
6384 {
6385 rtx op3 = XEXP (XVECEXP (PATTERN (insn), 0, 5), 0);
6386
6387 emit_insn_before (gen_move_insn (op3, op0), insn);
6388 PATTERN (insn)
6389 = gen_doloop_fallback_m (op3, JUMP_LABEL (insn), op0);
6390 }
6391 else
6392 XVEC (PATTERN (insn), 0)
6393 = gen_rtvec (2, XVECEXP (PATTERN (insn), 0, 0),
6394 XVECEXP (PATTERN (insn), 0, 1));
6395 INSN_CODE (insn) = -1;
6396 }
6397 }
6398 }
6399
6400 /* FIXME: should anticipate ccfsm action, generate special patterns for
6401 to-be-deleted branches that have no delay slot and have at least the
6402 length of the size increase forced on other insns that are conditionalized.
6403 This can also have an insn_list inside that enumerates insns which are
6404 not actually conditionalized because the destinations are dead in the
6405 not-execute case.
6406 Could also tag branches that we want to be unaligned if they get no delay
6407 slot, or even ones that we don't want to do delay slot sheduling for
6408 because we can unalign them.
6409
6410 However, there are cases when conditional execution is only possible after
6411 delay slot scheduling:
6412
6413 - If a delay slot is filled with a nocond/set insn from above, the previous
6414 basic block can become elegible for conditional execution.
6415 - If a delay slot is filled with a nocond insn from the fall-through path,
6416 the branch with that delay slot can become eligble for conditional
6417 execution (however, with the same sort of data flow analysis that dbr
6418 does, we could have figured out before that we don't need to
6419 conditionalize this insn.)
6420 - If a delay slot insn is filled with an insn from the target, the
6421 target label gets its uses decremented (even deleted if falling to zero),
6422 thus possibly creating more condexec opportunities there.
6423 Therefore, we should still be prepared to apply condexec optimization on
6424 non-prepared branches if the size increase of conditionalized insns is no
6425 more than the size saved from eliminating the branch. An invocation option
6426 could also be used to reserve a bit of extra size for condbranches so that
6427 this'll work more often (could also test in arc_reorg if the block is
6428 'close enough' to be eligible for condexec to make this likely, and
6429 estimate required size increase). */
6430 /* Generate BRcc insns, by combining cmp and Bcc insns wherever possible. */
6431 if (TARGET_NO_BRCC_SET)
6432 return;
6433
6434 do
6435 {
6436 init_insn_lengths();
6437 changed = 0;
6438
6439 if (optimize > 1 && !TARGET_NO_COND_EXEC)
6440 {
6441 arc_ifcvt ();
6442 unsigned int flags = pass_data_arc_ifcvt.todo_flags_finish;
6443 df_finish_pass ((flags & TODO_df_verify) != 0);
6444 }
6445
6446 /* Call shorten_branches to calculate the insn lengths. */
6447 shorten_branches (get_insns());
6448 cfun->machine->ccfsm_current_insn = NULL_RTX;
6449
6450 if (!INSN_ADDRESSES_SET_P())
6451 fatal_error (input_location, "Insn addresses not set after shorten_branches");
6452
6453 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6454 {
6455 rtx label;
6456 enum attr_type insn_type;
6457
6458 /* If a non-jump insn (or a casesi jump table), continue. */
6459 if (GET_CODE (insn) != JUMP_INSN ||
6460 GET_CODE (PATTERN (insn)) == ADDR_VEC
6461 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
6462 continue;
6463
6464 /* If we already have a brcc, note if it is suitable for brcc_s.
6465 Be a bit generous with the brcc_s range so that we can take
6466 advantage of any code shortening from delay slot scheduling. */
6467 if (recog_memoized (insn) == CODE_FOR_cbranchsi4_scratch)
6468 {
6469 rtx pat = PATTERN (insn);
6470 rtx op = XEXP (SET_SRC (XVECEXP (pat, 0, 0)), 0);
6471 rtx *ccp = &XEXP (XVECEXP (pat, 0, 1), 0);
6472
6473 offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
6474 if ((offset >= -140 && offset < 140)
6475 && rtx_equal_p (XEXP (op, 1), const0_rtx)
6476 && compact_register_operand (XEXP (op, 0), VOIDmode)
6477 && equality_comparison_operator (op, VOIDmode))
6478 PUT_MODE (*ccp, CC_Zmode);
6479 else if (GET_MODE (*ccp) == CC_Zmode)
6480 PUT_MODE (*ccp, CC_ZNmode);
6481 continue;
6482 }
6483 if ((insn_type = get_attr_type (insn)) == TYPE_BRCC
6484 || insn_type == TYPE_BRCC_NO_DELAY_SLOT)
6485 continue;
6486
6487 /* OK. so we have a jump insn. */
6488 /* We need to check that it is a bcc. */
6489 /* Bcc => set (pc) (if_then_else ) */
6490 pattern = PATTERN (insn);
6491 if (GET_CODE (pattern) != SET
6492 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
6493 || ANY_RETURN_P (XEXP (SET_SRC (pattern), 1)))
6494 continue;
6495
6496 /* Now check if the jump is beyond the s9 range. */
6497 if (CROSSING_JUMP_P (insn))
6498 continue;
6499 offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
6500
6501 if(offset > 253 || offset < -254)
6502 continue;
6503
6504 pc_target = SET_SRC (pattern);
6505
6506 /* Avoid FPU instructions. */
6507 if ((GET_MODE (XEXP (XEXP (pc_target, 0), 0)) == CC_FPUmode)
6508 || (GET_MODE (XEXP (XEXP (pc_target, 0), 0)) == CC_FPU_UNEQmode))
6509 continue;
6510
6511 /* Now go back and search for the set cc insn. */
6512
6513 label = XEXP (pc_target, 1);
6514
6515 {
6516 rtx pat;
6517 rtx_insn *scan, *link_insn = NULL;
6518
6519 for (scan = PREV_INSN (insn);
6520 scan && GET_CODE (scan) != CODE_LABEL;
6521 scan = PREV_INSN (scan))
6522 {
6523 if (! INSN_P (scan))
6524 continue;
6525 pat = PATTERN (scan);
6526 if (GET_CODE (pat) == SET
6527 && cc_register (SET_DEST (pat), VOIDmode))
6528 {
6529 link_insn = scan;
6530 break;
6531 }
6532 }
6533 if (!link_insn)
6534 continue;
6535 else
6536 /* Check if this is a data dependency. */
6537 {
6538 rtx op, cc_clob_rtx, op0, op1, brcc_insn, note;
6539 rtx cmp0, cmp1;
6540
6541 /* Ok this is the set cc. copy args here. */
6542 op = XEXP (pc_target, 0);
6543
6544 op0 = cmp0 = XEXP (SET_SRC (pat), 0);
6545 op1 = cmp1 = XEXP (SET_SRC (pat), 1);
6546 if (GET_CODE (op0) == ZERO_EXTRACT
6547 && XEXP (op0, 1) == const1_rtx
6548 && (GET_CODE (op) == EQ
6549 || GET_CODE (op) == NE))
6550 {
6551 /* btst / b{eq,ne} -> bbit{0,1} */
6552 op0 = XEXP (cmp0, 0);
6553 op1 = XEXP (cmp0, 2);
6554 }
6555 else if (!register_operand (op0, VOIDmode)
6556 || !general_operand (op1, VOIDmode))
6557 continue;
6558 /* Be careful not to break what cmpsfpx_raw is
6559 trying to create for checking equality of
6560 single-precision floats. */
6561 else if (TARGET_SPFP
6562 && GET_MODE (op0) == SFmode
6563 && GET_MODE (op1) == SFmode)
6564 continue;
6565
6566 /* None of the two cmp operands should be set between the
6567 cmp and the branch. */
6568 if (reg_set_between_p (op0, link_insn, insn))
6569 continue;
6570
6571 if (reg_set_between_p (op1, link_insn, insn))
6572 continue;
6573
6574 /* Since the MODE check does not work, check that this is
6575 CC reg's last set location before insn, and also no
6576 instruction between the cmp and branch uses the
6577 condition codes. */
6578 if ((reg_set_between_p (SET_DEST (pat), link_insn, insn))
6579 || (reg_used_between_p (SET_DEST (pat), link_insn, insn)))
6580 continue;
6581
6582 /* CC reg should be dead after insn. */
6583 if (!find_regno_note (insn, REG_DEAD, CC_REG))
6584 continue;
6585
6586 op = gen_rtx_fmt_ee (GET_CODE (op),
6587 GET_MODE (op), cmp0, cmp1);
6588 /* If we create a LIMM where there was none before,
6589 we only benefit if we can avoid a scheduling bubble
6590 for the ARC600. Otherwise, we'd only forgo chances
6591 at short insn generation, and risk out-of-range
6592 branches. */
6593 if (!brcc_nolimm_operator (op, VOIDmode)
6594 && !long_immediate_operand (op1, VOIDmode)
6595 && (TARGET_ARC700
6596 || next_active_insn (link_insn) != insn))
6597 continue;
6598
6599 /* Emit bbit / brcc (or brcc_s if possible).
6600 CC_Zmode indicates that brcc_s is possible. */
6601
6602 if (op0 != cmp0)
6603 cc_clob_rtx = gen_rtx_REG (CC_ZNmode, CC_REG);
6604 else if ((offset >= -140 && offset < 140)
6605 && rtx_equal_p (op1, const0_rtx)
6606 && compact_register_operand (op0, VOIDmode)
6607 && (GET_CODE (op) == EQ
6608 || GET_CODE (op) == NE))
6609 cc_clob_rtx = gen_rtx_REG (CC_Zmode, CC_REG);
6610 else
6611 cc_clob_rtx = gen_rtx_REG (CCmode, CC_REG);
6612
6613 brcc_insn
6614 = gen_rtx_IF_THEN_ELSE (VOIDmode, op, label, pc_rtx);
6615 brcc_insn = gen_rtx_SET (pc_rtx, brcc_insn);
6616 cc_clob_rtx = gen_rtx_CLOBBER (VOIDmode, cc_clob_rtx);
6617 brcc_insn
6618 = gen_rtx_PARALLEL
6619 (VOIDmode, gen_rtvec (2, brcc_insn, cc_clob_rtx));
6620 brcc_insn = emit_jump_insn_before (brcc_insn, insn);
6621
6622 JUMP_LABEL (brcc_insn) = JUMP_LABEL (insn);
6623 note = find_reg_note (insn, REG_BR_PROB, 0);
6624 if (note)
6625 {
6626 XEXP (note, 1) = REG_NOTES (brcc_insn);
6627 REG_NOTES (brcc_insn) = note;
6628 }
6629 note = find_reg_note (link_insn, REG_DEAD, op0);
6630 if (note)
6631 {
6632 remove_note (link_insn, note);
6633 XEXP (note, 1) = REG_NOTES (brcc_insn);
6634 REG_NOTES (brcc_insn) = note;
6635 }
6636 note = find_reg_note (link_insn, REG_DEAD, op1);
6637 if (note)
6638 {
6639 XEXP (note, 1) = REG_NOTES (brcc_insn);
6640 REG_NOTES (brcc_insn) = note;
6641 }
6642
6643 changed = 1;
6644
6645 /* Delete the bcc insn. */
6646 set_insn_deleted (insn);
6647
6648 /* Delete the cmp insn. */
6649 set_insn_deleted (link_insn);
6650
6651 }
6652 }
6653 }
6654 /* Clear out insn_addresses. */
6655 INSN_ADDRESSES_FREE ();
6656
6657 } while (changed);
6658
6659 if (INSN_ADDRESSES_SET_P())
6660 fatal_error (input_location, "insn addresses not freed");
6661
6662 arc_reorg_in_progress = 0;
6663 }
6664
6665 /* Check if the operands are valid for BRcc.d generation
6666 Valid Brcc.d patterns are
6667 Brcc.d b, c, s9
6668 Brcc.d b, u6, s9
6669
6670 For cc={GT, LE, GTU, LEU}, u6=63 can not be allowed,
6671 since they are encoded by the assembler as {GE, LT, HS, LS} 64, which
6672 does not have a delay slot
6673
6674 Assumed precondition: Second operand is either a register or a u6 value. */
6675
6676 bool
valid_brcc_with_delay_p(rtx * operands)6677 valid_brcc_with_delay_p (rtx *operands)
6678 {
6679 if (optimize_size && GET_MODE (operands[4]) == CC_Zmode)
6680 return false;
6681 return brcc_nolimm_operator (operands[0], VOIDmode);
6682 }
6683
6684 /* ??? Hack. This should no really be here. See PR32143. */
6685 static bool
arc_decl_anon_ns_mem_p(const_tree decl)6686 arc_decl_anon_ns_mem_p (const_tree decl)
6687 {
6688 while (1)
6689 {
6690 if (decl == NULL_TREE || decl == error_mark_node)
6691 return false;
6692 if (TREE_CODE (decl) == NAMESPACE_DECL
6693 && DECL_NAME (decl) == NULL_TREE)
6694 return true;
6695 /* Classes and namespaces inside anonymous namespaces have
6696 TREE_PUBLIC == 0, so we can shortcut the search. */
6697 else if (TYPE_P (decl))
6698 return (TREE_PUBLIC (TYPE_NAME (decl)) == 0);
6699 else if (TREE_CODE (decl) == NAMESPACE_DECL)
6700 return (TREE_PUBLIC (decl) == 0);
6701 else
6702 decl = DECL_CONTEXT (decl);
6703 }
6704 }
6705
6706 /* Implement TARGET_IN_SMALL_DATA_P. Return true if it would be safe to
6707 access DECL using %gp_rel(...)($gp). */
6708
6709 static bool
arc_in_small_data_p(const_tree decl)6710 arc_in_small_data_p (const_tree decl)
6711 {
6712 HOST_WIDE_INT size;
6713
6714 if (TREE_CODE (decl) == STRING_CST || TREE_CODE (decl) == FUNCTION_DECL)
6715 return false;
6716
6717
6718 /* We don't yet generate small-data references for -mabicalls. See related
6719 -G handling in override_options. */
6720 if (TARGET_NO_SDATA_SET)
6721 return false;
6722
6723 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl) != 0)
6724 {
6725 const char *name;
6726
6727 /* Reject anything that isn't in a known small-data section. */
6728 name = DECL_SECTION_NAME (decl);
6729 if (strcmp (name, ".sdata") != 0 && strcmp (name, ".sbss") != 0)
6730 return false;
6731
6732 /* If a symbol is defined externally, the assembler will use the
6733 usual -G rules when deciding how to implement macros. */
6734 if (!DECL_EXTERNAL (decl))
6735 return true;
6736 }
6737 /* Only global variables go into sdata section for now. */
6738 else if (1)
6739 {
6740 /* Don't put constants into the small data section: we want them
6741 to be in ROM rather than RAM. */
6742 if (TREE_CODE (decl) != VAR_DECL)
6743 return false;
6744
6745 if (TREE_READONLY (decl)
6746 && !TREE_SIDE_EFFECTS (decl)
6747 && (!DECL_INITIAL (decl) || TREE_CONSTANT (DECL_INITIAL (decl))))
6748 return false;
6749
6750 /* TREE_PUBLIC might change after the first call, because of the patch
6751 for PR19238. */
6752 if (default_binds_local_p_1 (decl, 1)
6753 || arc_decl_anon_ns_mem_p (decl))
6754 return false;
6755
6756 /* To ensure -mvolatile-cache works
6757 ld.di does not have a gp-relative variant. */
6758 if (TREE_THIS_VOLATILE (decl))
6759 return false;
6760 }
6761
6762 /* Disable sdata references to weak variables. */
6763 if (DECL_WEAK (decl))
6764 return false;
6765
6766 size = int_size_in_bytes (TREE_TYPE (decl));
6767
6768 /* if (AGGREGATE_TYPE_P (TREE_TYPE (decl))) */
6769 /* return false; */
6770
6771 /* Allow only <=4B long data types into sdata. */
6772 return (size > 0 && size <= 4);
6773 }
6774
6775 /* Return true if X is a small data address that can be rewritten
6776 as a gp+symref. */
6777
6778 static bool
arc_rewrite_small_data_p(const_rtx x)6779 arc_rewrite_small_data_p (const_rtx x)
6780 {
6781 if (GET_CODE (x) == CONST)
6782 x = XEXP (x, 0);
6783
6784 if (GET_CODE (x) == PLUS)
6785 {
6786 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6787 x = XEXP (x, 0);
6788 }
6789
6790 return (GET_CODE (x) == SYMBOL_REF
6791 && SYMBOL_REF_SMALL_P(x));
6792 }
6793
6794 /* If possible, rewrite OP so that it refers to small data using
6795 explicit relocations. */
6796
6797 rtx
arc_rewrite_small_data(rtx op)6798 arc_rewrite_small_data (rtx op)
6799 {
6800 op = copy_insn (op);
6801 subrtx_ptr_iterator::array_type array;
6802 FOR_EACH_SUBRTX_PTR (iter, array, &op, ALL)
6803 {
6804 rtx *loc = *iter;
6805 if (arc_rewrite_small_data_p (*loc))
6806 {
6807 gcc_assert (SDATA_BASE_REGNUM == PIC_OFFSET_TABLE_REGNUM);
6808 *loc = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, *loc);
6809 if (loc != &op)
6810 {
6811 if (GET_CODE (op) == MEM && &XEXP (op, 0) == loc)
6812 ; /* OK. */
6813 else if (GET_CODE (op) == MEM
6814 && GET_CODE (XEXP (op, 0)) == PLUS
6815 && GET_CODE (XEXP (XEXP (op, 0), 0)) == MULT)
6816 *loc = force_reg (Pmode, *loc);
6817 else
6818 gcc_unreachable ();
6819 }
6820 iter.skip_subrtxes ();
6821 }
6822 else if (GET_CODE (*loc) == PLUS
6823 && rtx_equal_p (XEXP (*loc, 0), pic_offset_table_rtx))
6824 iter.skip_subrtxes ();
6825 }
6826 return op;
6827 }
6828
6829 /* Return true if OP refers to small data symbols directly, not through
6830 a PLUS. */
6831
6832 bool
small_data_pattern(rtx op,machine_mode)6833 small_data_pattern (rtx op, machine_mode)
6834 {
6835 if (GET_CODE (op) == SEQUENCE)
6836 return false;
6837 subrtx_iterator::array_type array;
6838 FOR_EACH_SUBRTX (iter, array, op, ALL)
6839 {
6840 const_rtx x = *iter;
6841 if (GET_CODE (x) == PLUS
6842 && rtx_equal_p (XEXP (x, 0), pic_offset_table_rtx))
6843 iter.skip_subrtxes ();
6844 else if (arc_rewrite_small_data_p (x))
6845 return true;
6846 }
6847 return false;
6848 }
6849
6850 /* Return true if OP is an acceptable memory operand for ARCompact
6851 16-bit gp-relative load instructions.
6852 op shd look like : [r26, symref@sda]
6853 i.e. (mem (plus (reg 26) (symref with smalldata flag set))
6854 */
6855 /* volatile cache option still to be handled. */
6856
6857 bool
compact_sda_memory_operand(rtx op,machine_mode mode)6858 compact_sda_memory_operand (rtx op, machine_mode mode)
6859 {
6860 rtx addr;
6861 int size;
6862
6863 /* Eliminate non-memory operations. */
6864 if (GET_CODE (op) != MEM)
6865 return false;
6866
6867 if (mode == VOIDmode)
6868 mode = GET_MODE (op);
6869
6870 size = GET_MODE_SIZE (mode);
6871
6872 /* dword operations really put out 2 instructions, so eliminate them. */
6873 if (size > UNITS_PER_WORD)
6874 return false;
6875
6876 /* Decode the address now. */
6877 addr = XEXP (op, 0);
6878
6879 return LEGITIMATE_SMALL_DATA_ADDRESS_P (addr);
6880 }
6881
6882 /* Implement ASM_OUTPUT_ALIGNED_DECL_LOCAL. */
6883
6884 void
arc_asm_output_aligned_decl_local(FILE * stream,tree decl,const char * name,unsigned HOST_WIDE_INT size,unsigned HOST_WIDE_INT align,unsigned HOST_WIDE_INT globalize_p)6885 arc_asm_output_aligned_decl_local (FILE * stream, tree decl, const char * name,
6886 unsigned HOST_WIDE_INT size,
6887 unsigned HOST_WIDE_INT align,
6888 unsigned HOST_WIDE_INT globalize_p)
6889 {
6890 int in_small_data = arc_in_small_data_p (decl);
6891
6892 if (in_small_data)
6893 switch_to_section (get_named_section (NULL, ".sbss", 0));
6894 /* named_section (0,".sbss",0); */
6895 else
6896 switch_to_section (bss_section);
6897
6898 if (globalize_p)
6899 (*targetm.asm_out.globalize_label) (stream, name);
6900
6901 ASM_OUTPUT_ALIGN (stream, floor_log2 ((align) / BITS_PER_UNIT));
6902 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
6903 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
6904 ASM_OUTPUT_LABEL (stream, name);
6905
6906 if (size != 0)
6907 ASM_OUTPUT_SKIP (stream, size);
6908 }
6909
6910 static bool
arc_preserve_reload_p(rtx in)6911 arc_preserve_reload_p (rtx in)
6912 {
6913 return (GET_CODE (in) == PLUS
6914 && RTX_OK_FOR_BASE_P (XEXP (in, 0), true)
6915 && CONST_INT_P (XEXP (in, 1))
6916 && !((INTVAL (XEXP (in, 1)) & 511)));
6917 }
6918
6919 int
arc_register_move_cost(machine_mode,enum reg_class from_class,enum reg_class to_class)6920 arc_register_move_cost (machine_mode,
6921 enum reg_class from_class, enum reg_class to_class)
6922 {
6923 /* The ARC600 has no bypass for extension registers, hence a nop might be
6924 needed to be inserted after a write so that reads are safe. */
6925 if (TARGET_ARC600)
6926 {
6927 if (to_class == MPY_WRITABLE_CORE_REGS)
6928 return 3;
6929 /* Instructions modifying LP_COUNT need 4 additional cycles before
6930 the register will actually contain the value. */
6931 else if (to_class == LPCOUNT_REG)
6932 return 6;
6933 else if (to_class == WRITABLE_CORE_REGS)
6934 return 6;
6935 }
6936
6937 /* The ARC700 stalls for 3 cycles when *reading* from lp_count. */
6938 if (TARGET_ARC700
6939 && (from_class == LPCOUNT_REG || from_class == ALL_CORE_REGS
6940 || from_class == WRITABLE_CORE_REGS))
6941 return 8;
6942
6943 /* Force an attempt to 'mov Dy,Dx' to spill. */
6944 if (TARGET_ARC700 && TARGET_DPFP
6945 && from_class == DOUBLE_REGS && to_class == DOUBLE_REGS)
6946 return 100;
6947
6948 return 2;
6949 }
6950
6951 /* Emit code for an addsi3 instruction with OPERANDS.
6952 COND_P indicates if this will use conditional execution.
6953 Return the length of the instruction.
6954 If OUTPUT_P is false, don't actually output the instruction, just return
6955 its length. */
6956 int
arc_output_addsi(rtx * operands,bool cond_p,bool output_p)6957 arc_output_addsi (rtx *operands, bool cond_p, bool output_p)
6958 {
6959 char format[32];
6960
6961 int match = operands_match_p (operands[0], operands[1]);
6962 int match2 = operands_match_p (operands[0], operands[2]);
6963 int intval = (REG_P (operands[2]) ? 1
6964 : CONST_INT_P (operands[2]) ? INTVAL (operands[2]) : 0xbadc057);
6965 int neg_intval = -intval;
6966 int short_0 = satisfies_constraint_Rcq (operands[0]);
6967 int short_p = (!cond_p && short_0 && satisfies_constraint_Rcq (operands[1]));
6968 int ret = 0;
6969
6970 #define ADDSI_OUTPUT1(FORMAT) do {\
6971 if (output_p) \
6972 output_asm_insn (FORMAT, operands);\
6973 return ret; \
6974 } while (0)
6975 #define ADDSI_OUTPUT(LIST) do {\
6976 if (output_p) \
6977 sprintf LIST;\
6978 ADDSI_OUTPUT1 (format);\
6979 return ret; \
6980 } while (0)
6981
6982 /* First try to emit a 16 bit insn. */
6983 ret = 2;
6984 if (!cond_p
6985 /* If we are actually about to output this insn, don't try a 16 bit
6986 variant if we already decided that we don't want that
6987 (I.e. we upsized this insn to align some following insn.)
6988 E.g. add_s r0,sp,70 is 16 bit, but add r0,sp,70 requires a LIMM -
6989 but add1 r0,sp,35 doesn't. */
6990 && (!output_p || (get_attr_length (current_output_insn) & 2)))
6991 {
6992 if (short_p
6993 && (REG_P (operands[2])
6994 ? (match || satisfies_constraint_Rcq (operands[2]))
6995 : (unsigned) intval <= (match ? 127 : 7)))
6996 ADDSI_OUTPUT1 ("add%? %0,%1,%2");
6997 if (short_0 && REG_P (operands[1]) && match2)
6998 ADDSI_OUTPUT1 ("add%? %0,%2,%1");
6999 if ((short_0 || REGNO (operands[0]) == STACK_POINTER_REGNUM)
7000 && REGNO (operands[1]) == STACK_POINTER_REGNUM && !(intval & ~124))
7001 ADDSI_OUTPUT1 ("add%? %0,%1,%2");
7002
7003 if ((short_p && (unsigned) neg_intval <= (match ? 31 : 7))
7004 || (REGNO (operands[0]) == STACK_POINTER_REGNUM
7005 && match && !(neg_intval & ~124)))
7006 ADDSI_OUTPUT1 ("sub%? %0,%1,%n2");
7007 }
7008
7009 /* Now try to emit a 32 bit insn without long immediate. */
7010 ret = 4;
7011 if (!match && match2 && REG_P (operands[1]))
7012 ADDSI_OUTPUT1 ("add%? %0,%2,%1");
7013 if (match || !cond_p)
7014 {
7015 int limit = (match && !cond_p) ? 0x7ff : 0x3f;
7016 int range_factor = neg_intval & intval;
7017 int shift;
7018
7019 if (intval == (HOST_WIDE_INT) (HOST_WIDE_INT_M1U << 31))
7020 ADDSI_OUTPUT1 ("bxor%? %0,%1,31");
7021
7022 /* If we can use a straight add / sub instead of a {add,sub}[123] of
7023 same size, do, so - the insn latency is lower. */
7024 /* -0x800 is a 12-bit constant for add /add3 / sub / sub3, but
7025 0x800 is not. */
7026 if ((intval >= 0 && intval <= limit)
7027 || (intval == -0x800 && limit == 0x7ff))
7028 ADDSI_OUTPUT1 ("add%? %0,%1,%2");
7029 else if ((intval < 0 && neg_intval <= limit)
7030 || (intval == 0x800 && limit == 0x7ff))
7031 ADDSI_OUTPUT1 ("sub%? %0,%1,%n2");
7032 shift = range_factor >= 8 ? 3 : (range_factor >> 1);
7033 gcc_assert (shift == 0 || shift == 1 || shift == 2 || shift == 3);
7034 gcc_assert ((((1 << shift) - 1) & intval) == 0);
7035 if (((intval < 0 && intval != -0x4000)
7036 /* sub[123] is slower than add_s / sub, only use it if it
7037 avoids a long immediate. */
7038 && neg_intval <= limit << shift)
7039 || (intval == 0x4000 && limit == 0x7ff))
7040 ADDSI_OUTPUT ((format, "sub%d%%? %%0,%%1,%d",
7041 shift, neg_intval >> shift));
7042 else if ((intval >= 0 && intval <= limit << shift)
7043 || (intval == -0x4000 && limit == 0x7ff))
7044 ADDSI_OUTPUT ((format, "add%d%%? %%0,%%1,%d", shift, intval >> shift));
7045 }
7046 /* Try to emit a 16 bit opcode with long immediate. */
7047 ret = 6;
7048 if (short_p && match)
7049 ADDSI_OUTPUT1 ("add%? %0,%1,%S2");
7050
7051 /* We have to use a 32 bit opcode, and with a long immediate. */
7052 ret = 8;
7053 ADDSI_OUTPUT1 (intval < 0 ? "sub%? %0,%1,%n2" : "add%? %0,%1,%S2");
7054 }
7055
7056 /* Emit code for an commutative_cond_exec instruction with OPERANDS.
7057 Return the length of the instruction.
7058 If OUTPUT_P is false, don't actually output the instruction, just return
7059 its length. */
7060 int
arc_output_commutative_cond_exec(rtx * operands,bool output_p)7061 arc_output_commutative_cond_exec (rtx *operands, bool output_p)
7062 {
7063 enum rtx_code commutative_op = GET_CODE (operands[3]);
7064 const char *pat = NULL;
7065
7066 /* Canonical rtl should not have a constant in the first operand position. */
7067 gcc_assert (!CONSTANT_P (operands[1]));
7068
7069 switch (commutative_op)
7070 {
7071 case AND:
7072 if (satisfies_constraint_C1p (operands[2]))
7073 pat = "bmsk%? %0,%1,%Z2";
7074 else if (satisfies_constraint_Ccp (operands[2]))
7075 pat = "bclr%? %0,%1,%M2";
7076 else if (satisfies_constraint_CnL (operands[2]))
7077 pat = "bic%? %0,%1,%n2-1";
7078 break;
7079 case IOR:
7080 if (satisfies_constraint_C0p (operands[2]))
7081 pat = "bset%? %0,%1,%z2";
7082 break;
7083 case XOR:
7084 if (satisfies_constraint_C0p (operands[2]))
7085 pat = "bxor%? %0,%1,%z2";
7086 break;
7087 case PLUS:
7088 return arc_output_addsi (operands, true, output_p);
7089 default: break;
7090 }
7091 if (output_p)
7092 output_asm_insn (pat ? pat : "%O3.%d5 %0,%1,%2", operands);
7093 if (pat || REG_P (operands[2]) || satisfies_constraint_L (operands[2]))
7094 return 4;
7095 return 8;
7096 }
7097
7098 /* Helper function of arc_expand_movmem. ADDR points to a chunk of memory.
7099 Emit code and return an potentially modified address such that offsets
7100 up to SIZE are can be added to yield a legitimate address.
7101 if REUSE is set, ADDR is a register that may be modified. */
7102
7103 static rtx
force_offsettable(rtx addr,HOST_WIDE_INT size,bool reuse)7104 force_offsettable (rtx addr, HOST_WIDE_INT size, bool reuse)
7105 {
7106 rtx base = addr;
7107 rtx offs = const0_rtx;
7108
7109 if (GET_CODE (base) == PLUS)
7110 {
7111 offs = XEXP (base, 1);
7112 base = XEXP (base, 0);
7113 }
7114 if (!REG_P (base)
7115 || (REGNO (base) != STACK_POINTER_REGNUM
7116 && REGNO_PTR_FRAME_P (REGNO (addr)))
7117 || !CONST_INT_P (offs) || !SMALL_INT (INTVAL (offs))
7118 || !SMALL_INT (INTVAL (offs) + size))
7119 {
7120 if (reuse)
7121 emit_insn (gen_add2_insn (addr, offs));
7122 else
7123 addr = copy_to_mode_reg (Pmode, addr);
7124 }
7125 return addr;
7126 }
7127
7128 /* Like move_by_pieces, but take account of load latency, and actual
7129 offset ranges. Return true on success. */
7130
7131 bool
arc_expand_movmem(rtx * operands)7132 arc_expand_movmem (rtx *operands)
7133 {
7134 rtx dst = operands[0];
7135 rtx src = operands[1];
7136 rtx dst_addr, src_addr;
7137 HOST_WIDE_INT size;
7138 int align = INTVAL (operands[3]);
7139 unsigned n_pieces;
7140 int piece = align;
7141 rtx store[2];
7142 rtx tmpx[2];
7143 int i;
7144
7145 if (!CONST_INT_P (operands[2]))
7146 return false;
7147 size = INTVAL (operands[2]);
7148 /* move_by_pieces_ninsns is static, so we can't use it. */
7149 if (align >= 4)
7150 {
7151 if (TARGET_LL64)
7152 n_pieces = (size + 4) / 8U + ((size >> 1) & 1) + (size & 1);
7153 else
7154 n_pieces = (size + 2) / 4U + (size & 1);
7155 }
7156 else if (align == 2)
7157 n_pieces = (size + 1) / 2U;
7158 else
7159 n_pieces = size;
7160 if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15))
7161 return false;
7162 /* Force 32 bit aligned and larger datum to use 64 bit transfers, if
7163 possible. */
7164 if (TARGET_LL64 && (piece >= 4) && (size >= 8))
7165 piece = 8;
7166 else if (piece > 4)
7167 piece = 4;
7168 dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0);
7169 src_addr = force_offsettable (XEXP (operands[1], 0), size, 0);
7170 store[0] = store[1] = NULL_RTX;
7171 tmpx[0] = tmpx[1] = NULL_RTX;
7172 for (i = 0; size > 0; i ^= 1, size -= piece)
7173 {
7174 rtx tmp;
7175 machine_mode mode;
7176
7177 while (piece > size)
7178 piece >>= 1;
7179 mode = smallest_mode_for_size (piece * BITS_PER_UNIT, MODE_INT);
7180 /* If we don't re-use temporaries, the scheduler gets carried away,
7181 and the register pressure gets unnecessarily high. */
7182 if (0 && tmpx[i] && GET_MODE (tmpx[i]) == mode)
7183 tmp = tmpx[i];
7184 else
7185 tmpx[i] = tmp = gen_reg_rtx (mode);
7186 dst_addr = force_offsettable (dst_addr, piece, 1);
7187 src_addr = force_offsettable (src_addr, piece, 1);
7188 if (store[i])
7189 emit_insn (store[i]);
7190 emit_move_insn (tmp, change_address (src, mode, src_addr));
7191 store[i] = gen_move_insn (change_address (dst, mode, dst_addr), tmp);
7192 dst_addr = plus_constant (Pmode, dst_addr, piece);
7193 src_addr = plus_constant (Pmode, src_addr, piece);
7194 }
7195 if (store[i])
7196 emit_insn (store[i]);
7197 if (store[i^1])
7198 emit_insn (store[i^1]);
7199 return true;
7200 }
7201
7202 /* Prepare operands for move in MODE. Return true iff the move has
7203 been emitted. */
7204
7205 bool
prepare_move_operands(rtx * operands,machine_mode mode)7206 prepare_move_operands (rtx *operands, machine_mode mode)
7207 {
7208 /* We used to do this only for MODE_INT Modes, but addresses to floating
7209 point variables may well be in the small data section. */
7210 if (1)
7211 {
7212 if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[0], Pmode))
7213 operands[0] = arc_rewrite_small_data (operands[0]);
7214 else if (mode == SImode && flag_pic && SYMBOLIC_CONST (operands[1]))
7215 {
7216 emit_pic_move (operands, SImode);
7217
7218 /* Disable any REG_EQUALs associated with the symref
7219 otherwise the optimization pass undoes the work done
7220 here and references the variable directly. */
7221 }
7222 else if (GET_CODE (operands[0]) != MEM
7223 && !TARGET_NO_SDATA_SET
7224 && small_data_pattern (operands[1], Pmode))
7225 {
7226 /* This is to take care of address calculations involving sdata
7227 variables. */
7228 operands[1] = arc_rewrite_small_data (operands[1]);
7229
7230 emit_insn (gen_rtx_SET (operands[0],operands[1]));
7231 /* ??? This note is useless, since it only restates the set itself.
7232 We should rather use the original SYMBOL_REF. However, there is
7233 the problem that we are lying to the compiler about these
7234 SYMBOL_REFs to start with. symbol@sda should be encoded specially
7235 so that we can tell it apart from an actual symbol. */
7236 set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]);
7237
7238 /* Take care of the REG_EQUAL note that will be attached to mark the
7239 output reg equal to the initial symbol_ref after this code is
7240 executed. */
7241 emit_move_insn (operands[0], operands[0]);
7242 return true;
7243 }
7244 }
7245
7246 if (MEM_P (operands[0])
7247 && !(reload_in_progress || reload_completed))
7248 {
7249 operands[1] = force_reg (mode, operands[1]);
7250 if (!move_dest_operand (operands[0], mode))
7251 {
7252 rtx addr = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
7253 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
7254 except that we can't use that function because it is static. */
7255 rtx pat = change_address (operands[0], mode, addr);
7256 MEM_COPY_ATTRIBUTES (pat, operands[0]);
7257 operands[0] = pat;
7258 }
7259 if (!cse_not_expected)
7260 {
7261 rtx pat = XEXP (operands[0], 0);
7262
7263 pat = arc_legitimize_address_0 (pat, pat, mode);
7264 if (pat)
7265 {
7266 pat = change_address (operands[0], mode, pat);
7267 MEM_COPY_ATTRIBUTES (pat, operands[0]);
7268 operands[0] = pat;
7269 }
7270 }
7271 }
7272
7273 if (MEM_P (operands[1]) && !cse_not_expected)
7274 {
7275 rtx pat = XEXP (operands[1], 0);
7276
7277 pat = arc_legitimize_address_0 (pat, pat, mode);
7278 if (pat)
7279 {
7280 pat = change_address (operands[1], mode, pat);
7281 MEM_COPY_ATTRIBUTES (pat, operands[1]);
7282 operands[1] = pat;
7283 }
7284 }
7285
7286 return false;
7287 }
7288
7289 /* Prepare OPERANDS for an extension using CODE to OMODE.
7290 Return true iff the move has been emitted. */
7291
7292 bool
prepare_extend_operands(rtx * operands,enum rtx_code code,machine_mode omode)7293 prepare_extend_operands (rtx *operands, enum rtx_code code,
7294 machine_mode omode)
7295 {
7296 if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[1], Pmode))
7297 {
7298 /* This is to take care of address calculations involving sdata
7299 variables. */
7300 operands[1]
7301 = gen_rtx_fmt_e (code, omode, arc_rewrite_small_data (operands[1]));
7302 emit_insn (gen_rtx_SET (operands[0], operands[1]));
7303 set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]);
7304
7305 /* Take care of the REG_EQUAL note that will be attached to mark the
7306 output reg equal to the initial extension after this code is
7307 executed. */
7308 emit_move_insn (operands[0], operands[0]);
7309 return true;
7310 }
7311 return false;
7312 }
7313
7314 /* Output a library call to a function called FNAME that has been arranged
7315 to be local to any dso. */
7316
7317 const char *
arc_output_libcall(const char * fname)7318 arc_output_libcall (const char *fname)
7319 {
7320 unsigned len = strlen (fname);
7321 static char buf[64];
7322
7323 gcc_assert (len < sizeof buf - 35);
7324 if (TARGET_LONG_CALLS_SET
7325 || (TARGET_MEDIUM_CALLS && arc_ccfsm_cond_exec_p ()))
7326 {
7327 if (flag_pic)
7328 sprintf (buf, "add r12,pcl,@%s-(.&-4)\n\tjl%%!%%* [r12]", fname);
7329 else
7330 sprintf (buf, "jl%%! @%s", fname);
7331 }
7332 else
7333 sprintf (buf, "bl%%!%%* @%s", fname);
7334 return buf;
7335 }
7336
7337 /* Return the SImode highpart of the DImode value IN. */
7338
7339 rtx
disi_highpart(rtx in)7340 disi_highpart (rtx in)
7341 {
7342 return simplify_gen_subreg (SImode, in, DImode, TARGET_BIG_ENDIAN ? 0 : 4);
7343 }
7344
7345 /* Return length adjustment for INSN.
7346 For ARC600:
7347 A write to a core reg greater or equal to 32 must not be immediately
7348 followed by a use. Anticipate the length requirement to insert a nop
7349 between PRED and SUCC to prevent a hazard. */
7350
7351 static int
arc600_corereg_hazard(rtx_insn * pred,rtx_insn * succ)7352 arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ)
7353 {
7354 if (!TARGET_ARC600)
7355 return 0;
7356 /* If SUCC is a doloop_end_i with a preceding label, we must output a nop
7357 in front of SUCC anyway, so there will be separation between PRED and
7358 SUCC. */
7359 if (recog_memoized (succ) == CODE_FOR_doloop_end_i
7360 && LABEL_P (prev_nonnote_insn (succ)))
7361 return 0;
7362 if (recog_memoized (succ) == CODE_FOR_doloop_begin_i)
7363 return 0;
7364 if (GET_CODE (PATTERN (pred)) == SEQUENCE)
7365 pred = as_a <rtx_sequence *> (PATTERN (pred))->insn (1);
7366 if (GET_CODE (PATTERN (succ)) == SEQUENCE)
7367 succ = as_a <rtx_sequence *> (PATTERN (succ))->insn (0);
7368 if (recog_memoized (pred) == CODE_FOR_mulsi_600
7369 || recog_memoized (pred) == CODE_FOR_umul_600
7370 || recog_memoized (pred) == CODE_FOR_mac_600
7371 || recog_memoized (pred) == CODE_FOR_mul64_600
7372 || recog_memoized (pred) == CODE_FOR_mac64_600
7373 || recog_memoized (pred) == CODE_FOR_umul64_600
7374 || recog_memoized (pred) == CODE_FOR_umac64_600)
7375 return 0;
7376 subrtx_iterator::array_type array;
7377 FOR_EACH_SUBRTX (iter, array, PATTERN (pred), NONCONST)
7378 {
7379 const_rtx x = *iter;
7380 switch (GET_CODE (x))
7381 {
7382 case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
7383 break;
7384 default:
7385 /* This is also fine for PRE/POST_MODIFY, because they
7386 contain a SET. */
7387 continue;
7388 }
7389 rtx dest = XEXP (x, 0);
7390 /* Check if this sets a an extension register. N.B. we use 61 for the
7391 condition codes, which is definitely not an extension register. */
7392 if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61
7393 /* Check if the same register is used by the PAT. */
7394 && (refers_to_regno_p
7395 (REGNO (dest),
7396 REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U,
7397 PATTERN (succ), 0)))
7398 return 4;
7399 }
7400 return 0;
7401 }
7402
7403 /* Given a rtx, check if it is an assembly instruction or not. */
7404
7405 static int
arc_asm_insn_p(rtx x)7406 arc_asm_insn_p (rtx x)
7407 {
7408 int i, j;
7409
7410 if (x == 0)
7411 return 0;
7412
7413 switch (GET_CODE (x))
7414 {
7415 case ASM_OPERANDS:
7416 case ASM_INPUT:
7417 return 1;
7418
7419 case SET:
7420 return arc_asm_insn_p (SET_SRC (x));
7421
7422 case PARALLEL:
7423 j = 0;
7424 for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
7425 j += arc_asm_insn_p (XVECEXP (x, 0, i));
7426 if ( j > 0)
7427 return 1;
7428 break;
7429
7430 default:
7431 break;
7432 }
7433
7434 return 0;
7435 }
7436
7437 /* We might have a CALL to a non-returning function before a loop end.
7438 ??? Although the manual says that's OK (the target is outside the
7439 loop, and the loop counter unused there), the assembler barfs on
7440 this for ARC600, so we must insert a nop before such a call too.
7441 For ARC700, and ARCv2 is not allowed to have the last ZOL
7442 instruction a jump to a location where lp_count is modified. */
7443
7444 static bool
arc_loop_hazard(rtx_insn * pred,rtx_insn * succ)7445 arc_loop_hazard (rtx_insn *pred, rtx_insn *succ)
7446 {
7447 rtx_insn *jump = NULL;
7448 rtx label_rtx = NULL_RTX;
7449 rtx_insn *label = NULL;
7450 basic_block succ_bb;
7451
7452 if (recog_memoized (succ) != CODE_FOR_doloop_end_i)
7453 return false;
7454
7455 /* Phase 1: ARC600 and ARCv2HS doesn't allow any control instruction
7456 (i.e., jump/call) as the last instruction of a ZOL. */
7457 if (TARGET_ARC600 || TARGET_HS)
7458 if (JUMP_P (pred) || CALL_P (pred)
7459 || arc_asm_insn_p (PATTERN (pred))
7460 || GET_CODE (PATTERN (pred)) == SEQUENCE)
7461 return true;
7462
7463 /* Phase 2: Any architecture, it is not allowed to have the last ZOL
7464 instruction a jump to a location where lp_count is modified. */
7465
7466 /* Phase 2a: Dig for the jump instruction. */
7467 if (JUMP_P (pred))
7468 jump = pred;
7469 else if (GET_CODE (PATTERN (pred)) == SEQUENCE
7470 && JUMP_P (XVECEXP (PATTERN (pred), 0, 0)))
7471 jump = as_a <rtx_insn *> XVECEXP (PATTERN (pred), 0, 0);
7472 else
7473 return false;
7474
7475 /* Phase 2b: Make sure is not a millicode jump. */
7476 if ((GET_CODE (PATTERN (jump)) == PARALLEL)
7477 && (XVECEXP (PATTERN (jump), 0, 0) == ret_rtx))
7478 return false;
7479
7480 label_rtx = JUMP_LABEL (jump);
7481 if (!label_rtx)
7482 return false;
7483
7484 /* Phase 2c: Make sure is not a return. */
7485 if (ANY_RETURN_P (label_rtx))
7486 return false;
7487
7488 /* Pahse 2d: Go to the target of the jump and check for aliveness of
7489 LP_COUNT register. */
7490 label = safe_as_a <rtx_insn *> (label_rtx);
7491 succ_bb = BLOCK_FOR_INSN (label);
7492 if (!succ_bb)
7493 {
7494 gcc_assert (NEXT_INSN (label));
7495 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (label)))
7496 succ_bb = NOTE_BASIC_BLOCK (NEXT_INSN (label));
7497 else
7498 succ_bb = BLOCK_FOR_INSN (NEXT_INSN (label));
7499 }
7500
7501 if (succ_bb && REGNO_REG_SET_P (df_get_live_out (succ_bb), LP_COUNT))
7502 return true;
7503
7504 return false;
7505 }
7506
7507 /* For ARC600:
7508 A write to a core reg greater or equal to 32 must not be immediately
7509 followed by a use. Anticipate the length requirement to insert a nop
7510 between PRED and SUCC to prevent a hazard. */
7511
7512 int
arc_hazard(rtx_insn * pred,rtx_insn * succ)7513 arc_hazard (rtx_insn *pred, rtx_insn *succ)
7514 {
7515 if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
7516 return 0;
7517
7518 if (arc_loop_hazard (pred, succ))
7519 return 4;
7520
7521 if (TARGET_ARC600)
7522 return arc600_corereg_hazard (pred, succ);
7523
7524 return 0;
7525 }
7526
7527 /* Return length adjustment for INSN. */
7528
7529 int
arc_adjust_insn_length(rtx_insn * insn,int len,bool)7530 arc_adjust_insn_length (rtx_insn *insn, int len, bool)
7531 {
7532 if (!INSN_P (insn))
7533 return len;
7534 /* We already handle sequences by ignoring the delay sequence flag. */
7535 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
7536 return len;
7537
7538 /* It is impossible to jump to the very end of a Zero-Overhead Loop, as
7539 the ZOL mechanism only triggers when advancing to the end address,
7540 so if there's a label at the end of a ZOL, we need to insert a nop.
7541 The ARC600 ZOL also has extra restrictions on jumps at the end of a
7542 loop. */
7543 if (recog_memoized (insn) == CODE_FOR_doloop_end_i)
7544 {
7545 rtx_insn *prev = prev_nonnote_insn (insn);
7546
7547 return ((LABEL_P (prev)
7548 || (TARGET_ARC600
7549 && (JUMP_P (prev)
7550 || CALL_P (prev) /* Could be a noreturn call. */
7551 || (NONJUMP_INSN_P (prev)
7552 && GET_CODE (PATTERN (prev)) == SEQUENCE))))
7553 ? len + 4 : len);
7554 }
7555
7556 /* Check for return with but one preceding insn since function
7557 start / call. */
7558 if (TARGET_PAD_RETURN
7559 && JUMP_P (insn)
7560 && GET_CODE (PATTERN (insn)) != ADDR_VEC
7561 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7562 && get_attr_type (insn) == TYPE_RETURN)
7563 {
7564 rtx_insn *prev = prev_active_insn (insn);
7565
7566 if (!prev || !(prev = prev_active_insn (prev))
7567 || ((NONJUMP_INSN_P (prev)
7568 && GET_CODE (PATTERN (prev)) == SEQUENCE)
7569 ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
7570 NON_SIBCALL)
7571 : CALL_ATTR (prev, NON_SIBCALL)))
7572 return len + 4;
7573 }
7574 if (TARGET_ARC600)
7575 {
7576 rtx_insn *succ = next_real_insn (insn);
7577
7578 /* One the ARC600, a write to an extension register must be separated
7579 from a read. */
7580 if (succ && INSN_P (succ))
7581 len += arc600_corereg_hazard (insn, succ);
7582 }
7583
7584 /* Restore extracted operands - otherwise splitters like the addsi3_mixed one
7585 can go awry. */
7586 extract_constrain_insn_cached (insn);
7587
7588 return len;
7589 }
7590
7591 /* Values for length_sensitive. */
7592 enum
7593 {
7594 ARC_LS_NONE,// Jcc
7595 ARC_LS_25, // 25 bit offset, B
7596 ARC_LS_21, // 21 bit offset, Bcc
7597 ARC_LS_U13,// 13 bit unsigned offset, LP
7598 ARC_LS_10, // 10 bit offset, B_s, Beq_s, Bne_s
7599 ARC_LS_9, // 9 bit offset, BRcc
7600 ARC_LS_8, // 8 bit offset, BRcc_s
7601 ARC_LS_U7, // 7 bit unsigned offset, LPcc
7602 ARC_LS_7 // 7 bit offset, Bcc_s
7603 };
7604
7605 /* While the infrastructure patch is waiting for review, duplicate the
7606 struct definitions, to allow this file to compile. */
7607 #if 1
7608 typedef struct
7609 {
7610 unsigned align_set;
7611 /* Cost as a branch / call target or call return address. */
7612 int target_cost;
7613 int fallthrough_cost;
7614 int branch_cost;
7615 int length;
7616 /* 0 for not length sensitive, 1 for largest offset range,
7617 * 2 for next smaller etc. */
7618 unsigned length_sensitive : 8;
7619 bool enabled;
7620 } insn_length_variant_t;
7621
7622 typedef struct insn_length_parameters_s
7623 {
7624 int align_unit_log;
7625 int align_base_log;
7626 int max_variants;
7627 int (*get_variants) (rtx_insn *, int, bool, bool, insn_length_variant_t *);
7628 } insn_length_parameters_t;
7629
7630 static void
7631 arc_insn_length_parameters (insn_length_parameters_t *ilp) ATTRIBUTE_UNUSED;
7632 #endif
7633
7634 static int
arc_get_insn_variants(rtx_insn * insn,int len,bool,bool target_p,insn_length_variant_t * ilv)7635 arc_get_insn_variants (rtx_insn *insn, int len, bool, bool target_p,
7636 insn_length_variant_t *ilv)
7637 {
7638 if (!NONDEBUG_INSN_P (insn))
7639 return 0;
7640 enum attr_type type;
7641 /* shorten_branches doesn't take optimize_size into account yet for the
7642 get_variants mechanism, so turn this off for now. */
7643 if (optimize_size)
7644 return 0;
7645 if (rtx_sequence *pat = dyn_cast <rtx_sequence *> (PATTERN (insn)))
7646 {
7647 /* The interaction of a short delay slot insn with a short branch is
7648 too weird for shorten_branches to piece together, so describe the
7649 entire SEQUENCE. */
7650 rtx_insn *inner;
7651 if (TARGET_UPSIZE_DBR
7652 && get_attr_length (pat->insn (1)) <= 2
7653 && (((type = get_attr_type (inner = pat->insn (0)))
7654 == TYPE_UNCOND_BRANCH)
7655 || type == TYPE_BRANCH)
7656 && get_attr_delay_slot_filled (inner) == DELAY_SLOT_FILLED_YES)
7657 {
7658 int n_variants
7659 = arc_get_insn_variants (inner, get_attr_length (inner), true,
7660 target_p, ilv+1);
7661 /* The short variant gets split into a higher-cost aligned
7662 and a lower cost unaligned variant. */
7663 gcc_assert (n_variants);
7664 gcc_assert (ilv[1].length_sensitive == ARC_LS_7
7665 || ilv[1].length_sensitive == ARC_LS_10);
7666 gcc_assert (ilv[1].align_set == 3);
7667 ilv[0] = ilv[1];
7668 ilv[0].align_set = 1;
7669 ilv[0].branch_cost += 1;
7670 ilv[1].align_set = 2;
7671 n_variants++;
7672 for (int i = 0; i < n_variants; i++)
7673 ilv[i].length += 2;
7674 /* In case an instruction with aligned size is wanted, and
7675 the short variants are unavailable / too expensive, add
7676 versions of long branch + long delay slot. */
7677 for (int i = 2, end = n_variants; i < end; i++, n_variants++)
7678 {
7679 ilv[n_variants] = ilv[i];
7680 ilv[n_variants].length += 2;
7681 }
7682 return n_variants;
7683 }
7684 return 0;
7685 }
7686 insn_length_variant_t *first_ilv = ilv;
7687 type = get_attr_type (insn);
7688 bool delay_filled
7689 = (get_attr_delay_slot_filled (insn) == DELAY_SLOT_FILLED_YES);
7690 int branch_align_cost = delay_filled ? 0 : 1;
7691 int branch_unalign_cost = delay_filled ? 0 : TARGET_UNALIGN_BRANCH ? 0 : 1;
7692 /* If the previous instruction is an sfunc call, this insn is always
7693 a target, even though the middle-end is unaware of this. */
7694 bool force_target = false;
7695 rtx_insn *prev = prev_active_insn (insn);
7696 if (prev && arc_next_active_insn (prev, 0) == insn
7697 && ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE)
7698 ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
7699 NON_SIBCALL)
7700 : (CALL_ATTR (prev, NON_SIBCALL)
7701 && NEXT_INSN (PREV_INSN (prev)) == prev)))
7702 force_target = true;
7703
7704 switch (type)
7705 {
7706 case TYPE_BRCC:
7707 /* Short BRCC only comes in no-delay-slot version, and without limm */
7708 if (!delay_filled)
7709 {
7710 ilv->align_set = 3;
7711 ilv->length = 2;
7712 ilv->branch_cost = 1;
7713 ilv->enabled = (len == 2);
7714 ilv->length_sensitive = ARC_LS_8;
7715 ilv++;
7716 }
7717 /* Fall through. */
7718 case TYPE_BRCC_NO_DELAY_SLOT:
7719 /* doloop_fallback* patterns are TYPE_BRCC_NO_DELAY_SLOT for
7720 (delay slot) scheduling purposes, but they are longer. */
7721 if (GET_CODE (PATTERN (insn)) == PARALLEL
7722 && GET_CODE (XVECEXP (PATTERN (insn), 0, 1)) == SET)
7723 return 0;
7724 /* Standard BRCC: 4 bytes, or 8 bytes with limm. */
7725 ilv->length = ((type == TYPE_BRCC) ? 4 : 8);
7726 ilv->align_set = 3;
7727 ilv->branch_cost = branch_align_cost;
7728 ilv->enabled = (len <= ilv->length);
7729 ilv->length_sensitive = ARC_LS_9;
7730 if ((target_p || force_target)
7731 || (!delay_filled && TARGET_UNALIGN_BRANCH))
7732 {
7733 ilv[1] = *ilv;
7734 ilv->align_set = 1;
7735 ilv++;
7736 ilv->align_set = 2;
7737 ilv->target_cost = 1;
7738 ilv->branch_cost = branch_unalign_cost;
7739 }
7740 ilv++;
7741
7742 rtx op, op0;
7743 op = XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 0);
7744 op0 = XEXP (op, 0);
7745
7746 if (GET_CODE (op0) == ZERO_EXTRACT
7747 && satisfies_constraint_L (XEXP (op0, 2)))
7748 op0 = XEXP (op0, 0);
7749 if (satisfies_constraint_Rcq (op0))
7750 {
7751 ilv->length = ((type == TYPE_BRCC) ? 6 : 10);
7752 ilv->align_set = 3;
7753 ilv->branch_cost = 1 + branch_align_cost;
7754 ilv->fallthrough_cost = 1;
7755 ilv->enabled = true;
7756 ilv->length_sensitive = ARC_LS_21;
7757 if (!delay_filled && TARGET_UNALIGN_BRANCH)
7758 {
7759 ilv[1] = *ilv;
7760 ilv->align_set = 1;
7761 ilv++;
7762 ilv->align_set = 2;
7763 ilv->branch_cost = 1 + branch_unalign_cost;
7764 }
7765 ilv++;
7766 }
7767 ilv->length = ((type == TYPE_BRCC) ? 8 : 12);
7768 ilv->align_set = 3;
7769 ilv->branch_cost = 1 + branch_align_cost;
7770 ilv->fallthrough_cost = 1;
7771 ilv->enabled = true;
7772 ilv->length_sensitive = ARC_LS_21;
7773 if ((target_p || force_target)
7774 || (!delay_filled && TARGET_UNALIGN_BRANCH))
7775 {
7776 ilv[1] = *ilv;
7777 ilv->align_set = 1;
7778 ilv++;
7779 ilv->align_set = 2;
7780 ilv->target_cost = 1;
7781 ilv->branch_cost = 1 + branch_unalign_cost;
7782 }
7783 ilv++;
7784 break;
7785
7786 case TYPE_SFUNC:
7787 ilv->length = 12;
7788 goto do_call;
7789 case TYPE_CALL_NO_DELAY_SLOT:
7790 ilv->length = 8;
7791 goto do_call;
7792 case TYPE_CALL:
7793 ilv->length = 4;
7794 ilv->length_sensitive
7795 = GET_CODE (PATTERN (insn)) == COND_EXEC ? ARC_LS_21 : ARC_LS_25;
7796 do_call:
7797 ilv->align_set = 3;
7798 ilv->fallthrough_cost = branch_align_cost;
7799 ilv->enabled = true;
7800 if ((target_p || force_target)
7801 || (!delay_filled && TARGET_UNALIGN_BRANCH))
7802 {
7803 ilv[1] = *ilv;
7804 ilv->align_set = 1;
7805 ilv++;
7806 ilv->align_set = 2;
7807 ilv->target_cost = 1;
7808 ilv->fallthrough_cost = branch_unalign_cost;
7809 }
7810 ilv++;
7811 break;
7812 case TYPE_UNCOND_BRANCH:
7813 /* Strictly speaking, this should be ARC_LS_10 for equality comparisons,
7814 but that makes no difference at the moment. */
7815 ilv->length_sensitive = ARC_LS_7;
7816 ilv[1].length_sensitive = ARC_LS_25;
7817 goto do_branch;
7818 case TYPE_BRANCH:
7819 ilv->length_sensitive = ARC_LS_10;
7820 ilv[1].length_sensitive = ARC_LS_21;
7821 do_branch:
7822 ilv->align_set = 3;
7823 ilv->length = 2;
7824 ilv->branch_cost = branch_align_cost;
7825 ilv->enabled = (len == ilv->length);
7826 ilv++;
7827 ilv->length = 4;
7828 ilv->align_set = 3;
7829 ilv->branch_cost = branch_align_cost;
7830 ilv->enabled = true;
7831 if ((target_p || force_target)
7832 || (!delay_filled && TARGET_UNALIGN_BRANCH))
7833 {
7834 ilv[1] = *ilv;
7835 ilv->align_set = 1;
7836 ilv++;
7837 ilv->align_set = 2;
7838 ilv->target_cost = 1;
7839 ilv->branch_cost = branch_unalign_cost;
7840 }
7841 ilv++;
7842 break;
7843 case TYPE_JUMP:
7844 return 0;
7845 default:
7846 /* For every short insn, there is generally also a long insn.
7847 trap_s is an exception. */
7848 if ((len & 2) == 0 || recog_memoized (insn) == CODE_FOR_trap_s)
7849 return 0;
7850 ilv->align_set = 3;
7851 ilv->length = len;
7852 ilv->enabled = 1;
7853 ilv++;
7854 ilv->align_set = 3;
7855 ilv->length = len + 2;
7856 ilv->enabled = 1;
7857 if (target_p || force_target)
7858 {
7859 ilv[1] = *ilv;
7860 ilv->align_set = 1;
7861 ilv++;
7862 ilv->align_set = 2;
7863 ilv->target_cost = 1;
7864 }
7865 ilv++;
7866 }
7867 /* If the previous instruction is an sfunc call, this insn is always
7868 a target, even though the middle-end is unaware of this.
7869 Therefore, if we have a call predecessor, transfer the target cost
7870 to the fallthrough and branch costs. */
7871 if (force_target)
7872 {
7873 for (insn_length_variant_t *p = first_ilv; p < ilv; p++)
7874 {
7875 p->fallthrough_cost += p->target_cost;
7876 p->branch_cost += p->target_cost;
7877 p->target_cost = 0;
7878 }
7879 }
7880
7881 return ilv - first_ilv;
7882 }
7883
7884 static void
arc_insn_length_parameters(insn_length_parameters_t * ilp)7885 arc_insn_length_parameters (insn_length_parameters_t *ilp)
7886 {
7887 ilp->align_unit_log = 1;
7888 ilp->align_base_log = 1;
7889 ilp->max_variants = 7;
7890 ilp->get_variants = arc_get_insn_variants;
7891 }
7892
7893 /* Return a copy of COND from *STATEP, inverted if that is indicated by the
7894 CC field of *STATEP. */
7895
7896 static rtx
arc_get_ccfsm_cond(struct arc_ccfsm * statep,bool reverse)7897 arc_get_ccfsm_cond (struct arc_ccfsm *statep, bool reverse)
7898 {
7899 rtx cond = statep->cond;
7900 int raw_cc = get_arc_condition_code (cond);
7901 if (reverse)
7902 raw_cc = ARC_INVERSE_CONDITION_CODE (raw_cc);
7903
7904 if (statep->cc == raw_cc)
7905 return copy_rtx (cond);
7906
7907 gcc_assert (ARC_INVERSE_CONDITION_CODE (raw_cc) == statep->cc);
7908
7909 machine_mode ccm = GET_MODE (XEXP (cond, 0));
7910 enum rtx_code code = reverse_condition (GET_CODE (cond));
7911 if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode)
7912 code = reverse_condition_maybe_unordered (GET_CODE (cond));
7913
7914 return gen_rtx_fmt_ee (code, GET_MODE (cond),
7915 copy_rtx (XEXP (cond, 0)), copy_rtx (XEXP (cond, 1)));
7916 }
7917
7918 /* Return version of PAT conditionalized with COND, which is part of INSN.
7919 ANNULLED indicates if INSN is an annulled delay-slot insn.
7920 Register further changes if necessary. */
7921 static rtx
conditionalize_nonjump(rtx pat,rtx cond,rtx insn,bool annulled)7922 conditionalize_nonjump (rtx pat, rtx cond, rtx insn, bool annulled)
7923 {
7924 /* For commutative operators, we generally prefer to have
7925 the first source match the destination. */
7926 if (GET_CODE (pat) == SET)
7927 {
7928 rtx src = SET_SRC (pat);
7929
7930 if (COMMUTATIVE_P (src))
7931 {
7932 rtx src0 = XEXP (src, 0);
7933 rtx src1 = XEXP (src, 1);
7934 rtx dst = SET_DEST (pat);
7935
7936 if (rtx_equal_p (src1, dst) && !rtx_equal_p (src0, dst)
7937 /* Leave add_n alone - the canonical form is to
7938 have the complex summand first. */
7939 && REG_P (src0))
7940 pat = gen_rtx_SET (dst,
7941 gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src),
7942 src1, src0));
7943 }
7944 }
7945
7946 /* dwarf2out.c:dwarf2out_frame_debug_expr doesn't know
7947 what to do with COND_EXEC. */
7948 if (RTX_FRAME_RELATED_P (insn))
7949 {
7950 /* If this is the delay slot insn of an anulled branch,
7951 dwarf2out.c:scan_trace understands the anulling semantics
7952 without the COND_EXEC. */
7953 gcc_assert (annulled);
7954 rtx note = alloc_reg_note (REG_FRAME_RELATED_EXPR, pat,
7955 REG_NOTES (insn));
7956 validate_change (insn, ®_NOTES (insn), note, 1);
7957 }
7958 pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat);
7959 return pat;
7960 }
7961
7962 /* Use the ccfsm machinery to do if conversion. */
7963
7964 static unsigned
arc_ifcvt(void)7965 arc_ifcvt (void)
7966 {
7967 struct arc_ccfsm *statep = &cfun->machine->ccfsm_current;
7968 basic_block merge_bb = 0;
7969
7970 memset (statep, 0, sizeof *statep);
7971 for (rtx_insn *insn = get_insns (); insn; insn = next_insn (insn))
7972 {
7973 arc_ccfsm_advance (insn, statep);
7974
7975 switch (statep->state)
7976 {
7977 case 0:
7978 if (JUMP_P (insn))
7979 merge_bb = 0;
7980 break;
7981 case 1: case 2:
7982 {
7983 /* Deleted branch. */
7984 gcc_assert (!merge_bb);
7985 merge_bb = BLOCK_FOR_INSN (insn);
7986 basic_block succ_bb
7987 = BLOCK_FOR_INSN (NEXT_INSN (NEXT_INSN (PREV_INSN (insn))));
7988 arc_ccfsm_post_advance (insn, statep);
7989 gcc_assert (!IN_RANGE (statep->state, 1, 2));
7990 rtx_insn *seq = NEXT_INSN (PREV_INSN (insn));
7991 if (seq != insn)
7992 {
7993 rtx slot = XVECEXP (PATTERN (seq), 0, 1);
7994 rtx pat = PATTERN (slot);
7995 if (INSN_ANNULLED_BRANCH_P (insn))
7996 {
7997 rtx cond
7998 = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (slot));
7999 pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat);
8000 }
8001 if (!validate_change (seq, &PATTERN (seq), pat, 0))
8002 gcc_unreachable ();
8003 PUT_CODE (slot, NOTE);
8004 NOTE_KIND (slot) = NOTE_INSN_DELETED;
8005 if (merge_bb && succ_bb)
8006 merge_blocks (merge_bb, succ_bb);
8007 }
8008 else if (merge_bb && succ_bb)
8009 {
8010 set_insn_deleted (insn);
8011 merge_blocks (merge_bb, succ_bb);
8012 }
8013 else
8014 {
8015 PUT_CODE (insn, NOTE);
8016 NOTE_KIND (insn) = NOTE_INSN_DELETED;
8017 }
8018 continue;
8019 }
8020 case 3:
8021 if (LABEL_P (insn)
8022 && statep->target_label == CODE_LABEL_NUMBER (insn))
8023 {
8024 arc_ccfsm_post_advance (insn, statep);
8025 basic_block succ_bb = BLOCK_FOR_INSN (insn);
8026 if (merge_bb && succ_bb)
8027 merge_blocks (merge_bb, succ_bb);
8028 else if (--LABEL_NUSES (insn) == 0)
8029 {
8030 const char *name = LABEL_NAME (insn);
8031 PUT_CODE (insn, NOTE);
8032 NOTE_KIND (insn) = NOTE_INSN_DELETED_LABEL;
8033 NOTE_DELETED_LABEL_NAME (insn) = name;
8034 }
8035 merge_bb = 0;
8036 continue;
8037 }
8038 /* Fall through. */
8039 case 4: case 5:
8040 if (!NONDEBUG_INSN_P (insn))
8041 break;
8042
8043 /* Conditionalized insn. */
8044
8045 rtx_insn *prev, *pprev;
8046 rtx *patp, pat, cond;
8047 bool annulled; annulled = false;
8048
8049 /* If this is a delay slot insn in a non-annulled branch,
8050 don't conditionalize it. N.B., this should be fine for
8051 conditional return too. However, don't do this for
8052 unconditional branches, as these would be encountered when
8053 processing an 'else' part. */
8054 prev = PREV_INSN (insn);
8055 pprev = PREV_INSN (prev);
8056 if (pprev && NEXT_INSN (NEXT_INSN (pprev)) == NEXT_INSN (insn)
8057 && JUMP_P (prev) && get_attr_cond (prev) == COND_USE)
8058 {
8059 if (!INSN_ANNULLED_BRANCH_P (prev))
8060 break;
8061 annulled = true;
8062 }
8063
8064 patp = &PATTERN (insn);
8065 pat = *patp;
8066 cond = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (insn));
8067 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8068 {
8069 /* ??? don't conditionalize if all side effects are dead
8070 in the not-execute case. */
8071
8072 pat = conditionalize_nonjump (pat, cond, insn, annulled);
8073 }
8074 else if (simplejump_p (insn))
8075 {
8076 patp = &SET_SRC (pat);
8077 pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, *patp, pc_rtx);
8078 }
8079 else if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
8080 {
8081 pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, pat, pc_rtx);
8082 pat = gen_rtx_SET (pc_rtx, pat);
8083 }
8084 else
8085 gcc_unreachable ();
8086 validate_change (insn, patp, pat, 1);
8087 if (!apply_change_group ())
8088 gcc_unreachable ();
8089 if (JUMP_P (insn))
8090 {
8091 rtx_insn *next = next_nonnote_insn (insn);
8092 if (GET_CODE (next) == BARRIER)
8093 delete_insn (next);
8094 if (statep->state == 3)
8095 continue;
8096 }
8097 break;
8098 default:
8099 gcc_unreachable ();
8100 }
8101 arc_ccfsm_post_advance (insn, statep);
8102 }
8103 return 0;
8104 }
8105
8106 /* Find annulled delay insns and convert them to use the appropriate predicate.
8107 This allows branch shortening to size up these insns properly. */
8108
8109 static unsigned
arc_predicate_delay_insns(void)8110 arc_predicate_delay_insns (void)
8111 {
8112 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
8113 {
8114 rtx pat, jump, dlay, src, cond, *patp;
8115 int reverse;
8116
8117 if (!NONJUMP_INSN_P (insn)
8118 || GET_CODE (pat = PATTERN (insn)) != SEQUENCE)
8119 continue;
8120 jump = XVECEXP (pat, 0, 0);
8121 dlay = XVECEXP (pat, 0, 1);
8122 if (!JUMP_P (jump) || !INSN_ANNULLED_BRANCH_P (jump))
8123 continue;
8124 /* If the branch insn does the annulling, leave the delay insn alone. */
8125 if (!TARGET_AT_DBR_CONDEXEC && !INSN_FROM_TARGET_P (dlay))
8126 continue;
8127 /* ??? Could also leave DLAY un-conditionalized if its target is dead
8128 on the other path. */
8129 gcc_assert (GET_CODE (PATTERN (jump)) == SET);
8130 gcc_assert (SET_DEST (PATTERN (jump)) == pc_rtx);
8131 src = SET_SRC (PATTERN (jump));
8132 gcc_assert (GET_CODE (src) == IF_THEN_ELSE);
8133 cond = XEXP (src, 0);
8134 if (XEXP (src, 2) == pc_rtx)
8135 reverse = 0;
8136 else if (XEXP (src, 1) == pc_rtx)
8137 reverse = 1;
8138 else
8139 gcc_unreachable ();
8140 if (reverse != !INSN_FROM_TARGET_P (dlay))
8141 {
8142 machine_mode ccm = GET_MODE (XEXP (cond, 0));
8143 enum rtx_code code = reverse_condition (GET_CODE (cond));
8144 if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode)
8145 code = reverse_condition_maybe_unordered (GET_CODE (cond));
8146
8147 cond = gen_rtx_fmt_ee (code, GET_MODE (cond),
8148 copy_rtx (XEXP (cond, 0)),
8149 copy_rtx (XEXP (cond, 1)));
8150 }
8151 else
8152 cond = copy_rtx (cond);
8153 patp = &PATTERN (dlay);
8154 pat = *patp;
8155 pat = conditionalize_nonjump (pat, cond, dlay, true);
8156 validate_change (dlay, patp, pat, 1);
8157 if (!apply_change_group ())
8158 gcc_unreachable ();
8159 }
8160 return 0;
8161 }
8162
8163 /* For ARC600: If a write to a core reg >=32 appears in a delay slot
8164 (other than of a forward brcc), it creates a hazard when there is a read
8165 of the same register at the branch target. We can't know what is at the
8166 branch target of calls, and for branches, we don't really know before the
8167 end of delay slot scheduling, either. Not only can individual instruction
8168 be hoisted out into a delay slot, a basic block can also be emptied this
8169 way, and branch and/or fall through targets be redirected. Hence we don't
8170 want such writes in a delay slot. */
8171
8172 /* Return nonzreo iff INSN writes to an extension core register. */
8173
8174 int
arc_write_ext_corereg(rtx insn)8175 arc_write_ext_corereg (rtx insn)
8176 {
8177 subrtx_iterator::array_type array;
8178 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
8179 {
8180 const_rtx x = *iter;
8181 switch (GET_CODE (x))
8182 {
8183 case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
8184 break;
8185 default:
8186 /* This is also fine for PRE/POST_MODIFY, because they
8187 contain a SET. */
8188 continue;
8189 }
8190 const_rtx dest = XEXP (x, 0);
8191 if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61)
8192 return 1;
8193 }
8194 return 0;
8195 }
8196
8197 /* This is like the hook, but returns NULL when it can't / won't generate
8198 a legitimate address. */
8199
8200 static rtx
arc_legitimize_address_0(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode)8201 arc_legitimize_address_0 (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8202 machine_mode mode)
8203 {
8204 rtx addr, inner;
8205
8206 if (flag_pic && SYMBOLIC_CONST (x))
8207 (x) = arc_legitimize_pic_address (x, 0);
8208 addr = x;
8209 if (GET_CODE (addr) == CONST)
8210 addr = XEXP (addr, 0);
8211 if (GET_CODE (addr) == PLUS
8212 && CONST_INT_P (XEXP (addr, 1))
8213 && ((GET_CODE (XEXP (addr, 0)) == SYMBOL_REF
8214 && !SYMBOL_REF_FUNCTION_P (XEXP (addr, 0)))
8215 || (REG_P (XEXP (addr, 0))
8216 && (INTVAL (XEXP (addr, 1)) & 252))))
8217 {
8218 HOST_WIDE_INT offs, upper;
8219 int size = GET_MODE_SIZE (mode);
8220
8221 offs = INTVAL (XEXP (addr, 1));
8222 upper = (offs + 256 * size) & ~511 * size;
8223 inner = plus_constant (Pmode, XEXP (addr, 0), upper);
8224 #if 0 /* ??? this produces worse code for EEMBC idctrn01 */
8225 if (GET_CODE (x) == CONST)
8226 inner = gen_rtx_CONST (Pmode, inner);
8227 #endif
8228 addr = plus_constant (Pmode, force_reg (Pmode, inner), offs - upper);
8229 x = addr;
8230 }
8231 else if (GET_CODE (addr) == SYMBOL_REF && !SYMBOL_REF_FUNCTION_P (addr))
8232 x = force_reg (Pmode, x);
8233 if (memory_address_p ((machine_mode) mode, x))
8234 return x;
8235 return NULL_RTX;
8236 }
8237
8238 static rtx
arc_legitimize_address(rtx orig_x,rtx oldx,machine_mode mode)8239 arc_legitimize_address (rtx orig_x, rtx oldx, machine_mode mode)
8240 {
8241 rtx new_x = arc_legitimize_address_0 (orig_x, oldx, mode);
8242
8243 if (new_x)
8244 return new_x;
8245 return orig_x;
8246 }
8247
8248 static rtx
arc_delegitimize_address_0(rtx x)8249 arc_delegitimize_address_0 (rtx x)
8250 {
8251 rtx u, gp;
8252
8253 if (GET_CODE (x) == CONST && GET_CODE (u = XEXP (x, 0)) == UNSPEC)
8254 {
8255 if (XINT (u, 1) == ARC_UNSPEC_GOT)
8256 return XVECEXP (u, 0, 0);
8257 }
8258 else if (GET_CODE (x) == PLUS
8259 && ((REG_P (gp = XEXP (x, 0))
8260 && REGNO (gp) == PIC_OFFSET_TABLE_REGNUM)
8261 || (GET_CODE (gp) == CONST
8262 && GET_CODE (u = XEXP (gp, 0)) == UNSPEC
8263 && XINT (u, 1) == ARC_UNSPEC_GOT
8264 && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF
8265 && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC")))
8266 && GET_CODE (XEXP (x, 1)) == CONST
8267 && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC
8268 && XINT (u, 1) == ARC_UNSPEC_GOTOFF)
8269 return XVECEXP (u, 0, 0);
8270 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8271 && ((REG_P (gp = XEXP (XEXP (x, 0), 1))
8272 && REGNO (gp) == PIC_OFFSET_TABLE_REGNUM)
8273 || (GET_CODE (gp) == CONST
8274 && GET_CODE (u = XEXP (gp, 0)) == UNSPEC
8275 && XINT (u, 1) == ARC_UNSPEC_GOT
8276 && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF
8277 && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC")))
8278 && GET_CODE (XEXP (x, 1)) == CONST
8279 && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC
8280 && XINT (u, 1) == ARC_UNSPEC_GOTOFF)
8281 return gen_rtx_PLUS (GET_MODE (x), XEXP (XEXP (x, 0), 0),
8282 XVECEXP (u, 0, 0));
8283 else if (GET_CODE (x) == PLUS
8284 && (u = arc_delegitimize_address_0 (XEXP (x, 1))))
8285 return gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0), u);
8286 return NULL_RTX;
8287 }
8288
8289 static rtx
arc_delegitimize_address(rtx x)8290 arc_delegitimize_address (rtx x)
8291 {
8292 rtx orig_x = x = delegitimize_mem_from_attrs (x);
8293 if (GET_CODE (x) == MEM)
8294 x = XEXP (x, 0);
8295 x = arc_delegitimize_address_0 (x);
8296 if (x)
8297 {
8298 if (MEM_P (orig_x))
8299 x = replace_equiv_address_nv (orig_x, x);
8300 return x;
8301 }
8302 return orig_x;
8303 }
8304
8305 /* Return a REG rtx for acc1. N.B. the gcc-internal representation may
8306 differ from the hardware register number in order to allow the generic
8307 code to correctly split the concatenation of acc1 and acc2. */
8308
8309 rtx
gen_acc1(void)8310 gen_acc1 (void)
8311 {
8312 return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 56: 57);
8313 }
8314
8315 /* Return a REG rtx for acc2. N.B. the gcc-internal representation may
8316 differ from the hardware register number in order to allow the generic
8317 code to correctly split the concatenation of acc1 and acc2. */
8318
8319 rtx
gen_acc2(void)8320 gen_acc2 (void)
8321 {
8322 return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 57: 56);
8323 }
8324
8325 /* Return a REG rtx for mlo. N.B. the gcc-internal representation may
8326 differ from the hardware register number in order to allow the generic
8327 code to correctly split the concatenation of mhi and mlo. */
8328
8329 rtx
gen_mlo(void)8330 gen_mlo (void)
8331 {
8332 return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 59: 58);
8333 }
8334
8335 /* Return a REG rtx for mhi. N.B. the gcc-internal representation may
8336 differ from the hardware register number in order to allow the generic
8337 code to correctly split the concatenation of mhi and mlo. */
8338
8339 rtx
gen_mhi(void)8340 gen_mhi (void)
8341 {
8342 return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 58: 59);
8343 }
8344
8345 /* FIXME: a parameter should be added, and code added to final.c,
8346 to reproduce this functionality in shorten_branches. */
8347 #if 0
8348 /* Return nonzero iff BRANCH should be unaligned if possible by upsizing
8349 a previous instruction. */
8350 int
8351 arc_unalign_branch_p (rtx branch)
8352 {
8353 rtx note;
8354
8355 if (!TARGET_UNALIGN_BRANCH)
8356 return 0;
8357 /* Do not do this if we have a filled delay slot. */
8358 if (get_attr_delay_slot_filled (branch) == DELAY_SLOT_FILLED_YES
8359 && !NEXT_INSN (branch)->deleted ())
8360 return 0;
8361 note = find_reg_note (branch, REG_BR_PROB, 0);
8362 return (!note
8363 || (arc_unalign_prob_threshold && !br_prob_note_reliable_p (note))
8364 || INTVAL (XEXP (note, 0)) < arc_unalign_prob_threshold);
8365 }
8366 #endif
8367
8368 /* When estimating sizes during arc_reorg, when optimizing for speed, there
8369 are three reasons why we need to consider branches to be length 6:
8370 - annull-false delay slot insns are implemented using conditional execution,
8371 thus preventing short insn formation where used.
8372 - for ARC600: annul-true delay slot insns are implemented where possible
8373 using conditional execution, preventing short insn formation where used.
8374 - for ARC700: likely or somewhat likely taken branches are made long and
8375 unaligned if possible to avoid branch penalty. */
8376
8377 bool
arc_branch_size_unknown_p(void)8378 arc_branch_size_unknown_p (void)
8379 {
8380 return !optimize_size && arc_reorg_in_progress;
8381 }
8382
8383 /* We are about to output a return insn. Add padding if necessary to avoid
8384 a mispredict. A return could happen immediately after the function
8385 start, but after a call we know that there will be at least a blink
8386 restore. */
8387
8388 void
arc_pad_return(void)8389 arc_pad_return (void)
8390 {
8391 rtx_insn *insn = current_output_insn;
8392 rtx_insn *prev = prev_active_insn (insn);
8393 int want_long;
8394
8395 if (!prev)
8396 {
8397 fputs ("\tnop_s\n", asm_out_file);
8398 cfun->machine->unalign ^= 2;
8399 want_long = 1;
8400 }
8401 /* If PREV is a sequence, we know it must be a branch / jump or a tailcall,
8402 because after a call, we'd have to restore blink first. */
8403 else if (GET_CODE (PATTERN (prev)) == SEQUENCE)
8404 return;
8405 else
8406 {
8407 want_long = (get_attr_length (prev) == 2);
8408 prev = prev_active_insn (prev);
8409 }
8410 if (!prev
8411 || ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE)
8412 ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
8413 NON_SIBCALL)
8414 : CALL_ATTR (prev, NON_SIBCALL)))
8415 {
8416 if (want_long)
8417 cfun->machine->size_reason
8418 = "call/return and return/return must be 6 bytes apart to avoid mispredict";
8419 else if (TARGET_UNALIGN_BRANCH && cfun->machine->unalign)
8420 {
8421 cfun->machine->size_reason
8422 = "Long unaligned jump avoids non-delay slot penalty";
8423 want_long = 1;
8424 }
8425 /* Disgorge delay insn, if there is any, and it may be moved. */
8426 if (final_sequence
8427 /* ??? Annulled would be OK if we can and do conditionalize
8428 the delay slot insn accordingly. */
8429 && !INSN_ANNULLED_BRANCH_P (insn)
8430 && (get_attr_cond (insn) != COND_USE
8431 || !reg_set_p (gen_rtx_REG (CCmode, CC_REG),
8432 XVECEXP (final_sequence, 0, 1))))
8433 {
8434 prev = as_a <rtx_insn *> (XVECEXP (final_sequence, 0, 1));
8435 gcc_assert (!prev_real_insn (insn)
8436 || !arc_hazard (prev_real_insn (insn), prev));
8437 cfun->machine->force_short_suffix = !want_long;
8438 rtx save_pred = current_insn_predicate;
8439 final_scan_insn (prev, asm_out_file, optimize, 1, NULL);
8440 cfun->machine->force_short_suffix = -1;
8441 prev->set_deleted ();
8442 current_output_insn = insn;
8443 current_insn_predicate = save_pred;
8444 }
8445 else if (want_long)
8446 fputs ("\tnop\n", asm_out_file);
8447 else
8448 {
8449 fputs ("\tnop_s\n", asm_out_file);
8450 cfun->machine->unalign ^= 2;
8451 }
8452 }
8453 return;
8454 }
8455
8456 /* The usual; we set up our machine_function data. */
8457
8458 static struct machine_function *
arc_init_machine_status(void)8459 arc_init_machine_status (void)
8460 {
8461 struct machine_function *machine;
8462 machine = ggc_cleared_alloc<machine_function> ();
8463 machine->fn_type = ARC_FUNCTION_UNKNOWN;
8464 machine->force_short_suffix = -1;
8465
8466 return machine;
8467 }
8468
8469 /* Implements INIT_EXPANDERS. We just set up to call the above
8470 function. */
8471
8472 void
arc_init_expanders(void)8473 arc_init_expanders (void)
8474 {
8475 init_machine_status = arc_init_machine_status;
8476 }
8477
8478 /* Check if OP is a proper parallel of a millicode call pattern. OFFSET
8479 indicates a number of elements to ignore - that allows to have a
8480 sibcall pattern that starts with (return). LOAD_P is zero for store
8481 multiple (for prologues), and one for load multiples (for epilogues),
8482 and two for load multiples where no final clobber of blink is required.
8483 We also skip the first load / store element since this is supposed to
8484 be checked in the instruction pattern. */
8485
8486 int
arc_check_millicode(rtx op,int offset,int load_p)8487 arc_check_millicode (rtx op, int offset, int load_p)
8488 {
8489 int len = XVECLEN (op, 0) - offset;
8490 int i;
8491
8492 if (load_p == 2)
8493 {
8494 if (len < 2 || len > 13)
8495 return 0;
8496 load_p = 1;
8497 }
8498 else
8499 {
8500 rtx elt = XVECEXP (op, 0, --len);
8501
8502 if (GET_CODE (elt) != CLOBBER
8503 || !REG_P (XEXP (elt, 0))
8504 || REGNO (XEXP (elt, 0)) != RETURN_ADDR_REGNUM
8505 || len < 3 || len > 13)
8506 return 0;
8507 }
8508 for (i = 1; i < len; i++)
8509 {
8510 rtx elt = XVECEXP (op, 0, i + offset);
8511 rtx reg, mem, addr;
8512
8513 if (GET_CODE (elt) != SET)
8514 return 0;
8515 mem = XEXP (elt, load_p);
8516 reg = XEXP (elt, 1-load_p);
8517 if (!REG_P (reg) || REGNO (reg) != 13U+i || !MEM_P (mem))
8518 return 0;
8519 addr = XEXP (mem, 0);
8520 if (GET_CODE (addr) != PLUS
8521 || !rtx_equal_p (stack_pointer_rtx, XEXP (addr, 0))
8522 || !CONST_INT_P (XEXP (addr, 1)) || INTVAL (XEXP (addr, 1)) != i*4)
8523 return 0;
8524 }
8525 return 1;
8526 }
8527
8528 /* Accessor functions for cfun->machine->unalign. */
8529
8530 int
arc_get_unalign(void)8531 arc_get_unalign (void)
8532 {
8533 return cfun->machine->unalign;
8534 }
8535
8536 void
arc_clear_unalign(void)8537 arc_clear_unalign (void)
8538 {
8539 if (cfun)
8540 cfun->machine->unalign = 0;
8541 }
8542
8543 void
arc_toggle_unalign(void)8544 arc_toggle_unalign (void)
8545 {
8546 cfun->machine->unalign ^= 2;
8547 }
8548
8549 /* Operands 0..2 are the operands of a addsi which uses a 12 bit
8550 constant in operand 2, but which would require a LIMM because of
8551 operand mismatch.
8552 operands 3 and 4 are new SET_SRCs for operands 0. */
8553
8554 void
split_addsi(rtx * operands)8555 split_addsi (rtx *operands)
8556 {
8557 int val = INTVAL (operands[2]);
8558
8559 /* Try for two short insns first. Lengths being equal, we prefer
8560 expansions with shorter register lifetimes. */
8561 if (val > 127 && val <= 255
8562 && satisfies_constraint_Rcq (operands[0]))
8563 {
8564 operands[3] = operands[2];
8565 operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]);
8566 }
8567 else
8568 {
8569 operands[3] = operands[1];
8570 operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[2]);
8571 }
8572 }
8573
8574 /* Operands 0..2 are the operands of a subsi which uses a 12 bit
8575 constant in operand 1, but which would require a LIMM because of
8576 operand mismatch.
8577 operands 3 and 4 are new SET_SRCs for operands 0. */
8578
8579 void
split_subsi(rtx * operands)8580 split_subsi (rtx *operands)
8581 {
8582 int val = INTVAL (operands[1]);
8583
8584 /* Try for two short insns first. Lengths being equal, we prefer
8585 expansions with shorter register lifetimes. */
8586 if (satisfies_constraint_Rcq (operands[0])
8587 && satisfies_constraint_Rcq (operands[2]))
8588 {
8589 if (val >= -31 && val <= 127)
8590 {
8591 operands[3] = gen_rtx_NEG (SImode, operands[2]);
8592 operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]);
8593 return;
8594 }
8595 else if (val >= 0 && val < 255)
8596 {
8597 operands[3] = operands[1];
8598 operands[4] = gen_rtx_MINUS (SImode, operands[0], operands[2]);
8599 return;
8600 }
8601 }
8602 /* If the destination is not an ARCompact16 register, we might
8603 still have a chance to make a short insn if the source is;
8604 we need to start with a reg-reg move for this. */
8605 operands[3] = operands[2];
8606 operands[4] = gen_rtx_MINUS (SImode, operands[1], operands[0]);
8607 }
8608
8609 /* Handle DOUBLE_REGS uses.
8610 Operand 0: destination register
8611 Operand 1: source register */
8612
8613 static bool
arc_process_double_reg_moves(rtx * operands)8614 arc_process_double_reg_moves (rtx *operands)
8615 {
8616 rtx dest = operands[0];
8617 rtx src = operands[1];
8618
8619 enum usesDxState { none, srcDx, destDx, maxDx };
8620 enum usesDxState state = none;
8621
8622 if (refers_to_regno_p (40, 44, src, 0))
8623 state = srcDx;
8624 if (refers_to_regno_p (40, 44, dest, 0))
8625 {
8626 /* Via arc_register_move_cost, we should never see D,D moves. */
8627 gcc_assert (state == none);
8628 state = destDx;
8629 }
8630
8631 if (state == none)
8632 return false;
8633
8634 if (state == srcDx)
8635 {
8636 /* Without the LR insn, we need to split this into a
8637 sequence of insns which will use the DEXCLx and DADDHxy
8638 insns to be able to read the Dx register in question. */
8639 if (TARGET_DPFP_DISABLE_LRSR)
8640 {
8641 /* gen *movdf_insn_nolrsr */
8642 rtx set = gen_rtx_SET (dest, src);
8643 rtx use1 = gen_rtx_USE (VOIDmode, const1_rtx);
8644 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, use1)));
8645 }
8646 else
8647 {
8648 /* When we have 'mov D, r' or 'mov D, D' then get the target
8649 register pair for use with LR insn. */
8650 rtx destHigh = simplify_gen_subreg(SImode, dest, DFmode, 4);
8651 rtx destLow = simplify_gen_subreg(SImode, dest, DFmode, 0);
8652
8653 /* Produce the two LR insns to get the high and low parts. */
8654 emit_insn (gen_rtx_SET (destHigh,
8655 gen_rtx_UNSPEC_VOLATILE (Pmode,
8656 gen_rtvec (1, src),
8657 VUNSPEC_ARC_LR_HIGH)));
8658 emit_insn (gen_rtx_SET (destLow,
8659 gen_rtx_UNSPEC_VOLATILE (Pmode,
8660 gen_rtvec (1, src),
8661 VUNSPEC_ARC_LR)));
8662 }
8663 }
8664 else if (state == destDx)
8665 {
8666 /* When we have 'mov r, D' or 'mov D, D' and we have access to the
8667 LR insn get the target register pair. */
8668 rtx srcHigh = simplify_gen_subreg(SImode, src, DFmode, 4);
8669 rtx srcLow = simplify_gen_subreg(SImode, src, DFmode, 0);
8670
8671 emit_insn (gen_rtx_UNSPEC_VOLATILE (Pmode,
8672 gen_rtvec (3, dest, srcHigh, srcLow),
8673 VUNSPEC_ARC_DEXCL_NORES));
8674
8675 }
8676 else
8677 gcc_unreachable ();
8678
8679 return true;
8680 }
8681
8682 /* operands 0..1 are the operands of a 64 bit move instruction.
8683 split it into two moves with operands 2/3 and 4/5. */
8684
8685 void
arc_split_move(rtx * operands)8686 arc_split_move (rtx *operands)
8687 {
8688 machine_mode mode = GET_MODE (operands[0]);
8689 int i;
8690 int swap = 0;
8691 rtx xop[4];
8692
8693 if (TARGET_DPFP)
8694 {
8695 if (arc_process_double_reg_moves (operands))
8696 return;
8697 }
8698
8699 if (TARGET_LL64
8700 && ((memory_operand (operands[0], mode)
8701 && even_register_operand (operands[1], mode))
8702 || (memory_operand (operands[1], mode)
8703 && even_register_operand (operands[0], mode))))
8704 {
8705 emit_move_insn (operands[0], operands[1]);
8706 return;
8707 }
8708
8709 for (i = 0; i < 2; i++)
8710 {
8711 if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))
8712 {
8713 rtx addr = XEXP (operands[i], 0);
8714 rtx r, o;
8715 enum rtx_code code;
8716
8717 gcc_assert (!reg_overlap_mentioned_p (operands[0], addr));
8718 switch (GET_CODE (addr))
8719 {
8720 case PRE_DEC: o = GEN_INT (-8); goto pre_modify;
8721 case PRE_INC: o = GEN_INT (8); goto pre_modify;
8722 case PRE_MODIFY: o = XEXP (XEXP (addr, 1), 1);
8723 pre_modify:
8724 code = PRE_MODIFY;
8725 break;
8726 case POST_DEC: o = GEN_INT (-8); goto post_modify;
8727 case POST_INC: o = GEN_INT (8); goto post_modify;
8728 case POST_MODIFY: o = XEXP (XEXP (addr, 1), 1);
8729 post_modify:
8730 code = POST_MODIFY;
8731 swap = 2;
8732 break;
8733 default:
8734 gcc_unreachable ();
8735 }
8736 r = XEXP (addr, 0);
8737 xop[0+i] = adjust_automodify_address_nv
8738 (operands[i], SImode,
8739 gen_rtx_fmt_ee (code, Pmode, r,
8740 gen_rtx_PLUS (Pmode, r, o)),
8741 0);
8742 xop[2+i] = adjust_automodify_address_nv
8743 (operands[i], SImode, plus_constant (Pmode, r, 4), 4);
8744 }
8745 else
8746 {
8747 xop[0+i] = operand_subword (operands[i], 0, 0, mode);
8748 xop[2+i] = operand_subword (operands[i], 1, 0, mode);
8749 }
8750 }
8751 if (reg_overlap_mentioned_p (xop[0], xop[3]))
8752 {
8753 swap = 2;
8754 gcc_assert (!reg_overlap_mentioned_p (xop[2], xop[1]));
8755 }
8756
8757 emit_move_insn (xop[0 + swap], xop[1 + swap]);
8758 emit_move_insn (xop[2 - swap], xop[3 - swap]);
8759
8760 }
8761
8762 /* Select between the instruction output templates s_tmpl (for short INSNs)
8763 and l_tmpl (for long INSNs). */
8764
8765 const char *
arc_short_long(rtx_insn * insn,const char * s_tmpl,const char * l_tmpl)8766 arc_short_long (rtx_insn *insn, const char *s_tmpl, const char *l_tmpl)
8767 {
8768 int is_short = arc_verify_short (insn, cfun->machine->unalign, -1);
8769
8770 extract_constrain_insn_cached (insn);
8771 return is_short ? s_tmpl : l_tmpl;
8772 }
8773
8774 /* Searches X for any reference to REGNO, returning the rtx of the
8775 reference found if any. Otherwise, returns NULL_RTX. */
8776
8777 rtx
arc_regno_use_in(unsigned int regno,rtx x)8778 arc_regno_use_in (unsigned int regno, rtx x)
8779 {
8780 const char *fmt;
8781 int i, j;
8782 rtx tem;
8783
8784 if (REG_P (x) && refers_to_regno_p (regno, x))
8785 return x;
8786
8787 fmt = GET_RTX_FORMAT (GET_CODE (x));
8788 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8789 {
8790 if (fmt[i] == 'e')
8791 {
8792 if ((tem = regno_use_in (regno, XEXP (x, i))))
8793 return tem;
8794 }
8795 else if (fmt[i] == 'E')
8796 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8797 if ((tem = regno_use_in (regno , XVECEXP (x, i, j))))
8798 return tem;
8799 }
8800
8801 return NULL_RTX;
8802 }
8803
8804 /* Return the integer value of the "type" attribute for INSN, or -1 if
8805 INSN can't have attributes. */
8806
8807 int
arc_attr_type(rtx_insn * insn)8808 arc_attr_type (rtx_insn *insn)
8809 {
8810 if (NONJUMP_INSN_P (insn)
8811 ? (GET_CODE (PATTERN (insn)) == USE
8812 || GET_CODE (PATTERN (insn)) == CLOBBER)
8813 : JUMP_P (insn)
8814 ? (GET_CODE (PATTERN (insn)) == ADDR_VEC
8815 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
8816 : !CALL_P (insn))
8817 return -1;
8818 return get_attr_type (insn);
8819 }
8820
8821 /* Return true if insn sets the condition codes. */
8822
8823 bool
arc_sets_cc_p(rtx_insn * insn)8824 arc_sets_cc_p (rtx_insn *insn)
8825 {
8826 if (NONJUMP_INSN_P (insn))
8827 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
8828 insn = seq->insn (seq->len () - 1);
8829 return arc_attr_type (insn) == TYPE_COMPARE;
8830 }
8831
8832 /* Return true if INSN is an instruction with a delay slot we may want
8833 to fill. */
8834
8835 bool
arc_need_delay(rtx_insn * insn)8836 arc_need_delay (rtx_insn *insn)
8837 {
8838 rtx_insn *next;
8839
8840 if (!flag_delayed_branch)
8841 return false;
8842 /* The return at the end of a function needs a delay slot. */
8843 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
8844 && (!(next = next_active_insn (insn))
8845 || ((!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) != SEQUENCE)
8846 && arc_attr_type (next) == TYPE_RETURN))
8847 && (!TARGET_PAD_RETURN
8848 || (prev_active_insn (insn)
8849 && prev_active_insn (prev_active_insn (insn))
8850 && prev_active_insn (prev_active_insn (prev_active_insn (insn))))))
8851 return true;
8852 if (NONJUMP_INSN_P (insn)
8853 ? (GET_CODE (PATTERN (insn)) == USE
8854 || GET_CODE (PATTERN (insn)) == CLOBBER
8855 || GET_CODE (PATTERN (insn)) == SEQUENCE)
8856 : JUMP_P (insn)
8857 ? (GET_CODE (PATTERN (insn)) == ADDR_VEC
8858 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
8859 : !CALL_P (insn))
8860 return false;
8861 return num_delay_slots (insn) != 0;
8862 }
8863
8864 /* Return true if the scheduling pass(es) has/have already run,
8865 i.e. where possible, we should try to mitigate high latencies
8866 by different instruction selection. */
8867
8868 bool
arc_scheduling_not_expected(void)8869 arc_scheduling_not_expected (void)
8870 {
8871 return cfun->machine->arc_reorg_started;
8872 }
8873
8874 /* Oddly enough, sometimes we get a zero overhead loop that branch
8875 shortening doesn't think is a loop - observed with compile/pr24883.c
8876 -O3 -fomit-frame-pointer -funroll-loops. Make sure to include the
8877 alignment visible for branch shortening (we actually align the loop
8878 insn before it, but that is equivalent since the loop insn is 4 byte
8879 long.) */
8880
8881 int
arc_label_align(rtx label)8882 arc_label_align (rtx label)
8883 {
8884 int loop_align = LOOP_ALIGN (LABEL);
8885
8886 if (loop_align > align_labels_log)
8887 {
8888 rtx_insn *prev = prev_nonnote_insn (label);
8889
8890 if (prev && NONJUMP_INSN_P (prev)
8891 && GET_CODE (PATTERN (prev)) == PARALLEL
8892 && recog_memoized (prev) == CODE_FOR_doloop_begin_i)
8893 return loop_align;
8894 }
8895 /* Code has a minimum p2 alignment of 1, which we must restore after an
8896 ADDR_DIFF_VEC. */
8897 if (align_labels_log < 1)
8898 {
8899 rtx_insn *next = next_nonnote_nondebug_insn (label);
8900 if (INSN_P (next) && recog_memoized (next) >= 0)
8901 return 1;
8902 }
8903 return align_labels_log;
8904 }
8905
8906 /* Return true if LABEL is in executable code. */
8907
8908 bool
arc_text_label(rtx_insn * label)8909 arc_text_label (rtx_insn *label)
8910 {
8911 rtx_insn *next;
8912
8913 /* ??? We use deleted labels like they were still there, see
8914 gcc.c-torture/compile/20000326-2.c . */
8915 gcc_assert (GET_CODE (label) == CODE_LABEL
8916 || (GET_CODE (label) == NOTE
8917 && NOTE_KIND (label) == NOTE_INSN_DELETED_LABEL));
8918 next = next_nonnote_insn (label);
8919 if (next)
8920 return (!JUMP_TABLE_DATA_P (next)
8921 || GET_CODE (PATTERN (next)) != ADDR_VEC);
8922 else if (!PREV_INSN (label))
8923 /* ??? sometimes text labels get inserted very late, see
8924 gcc.dg/torture/stackalign/comp-goto-1.c */
8925 return true;
8926 return false;
8927 }
8928
8929 /* Return the size of the pretend args for DECL. */
8930
8931 int
arc_decl_pretend_args(tree decl)8932 arc_decl_pretend_args (tree decl)
8933 {
8934 /* struct function is in DECL_STRUCT_FUNCTION (decl), but no
8935 pretend_args there... See PR38391. */
8936 gcc_assert (decl == current_function_decl);
8937 return crtl->args.pretend_args_size;
8938 }
8939
8940 /* Without this, gcc.dg/tree-prof/bb-reorg.c fails to assemble
8941 when compiling with -O2 -freorder-blocks-and-partition -fprofile-use
8942 -D_PROFILE_USE; delay branch scheduling then follows a crossing jump
8943 to redirect two breqs. */
8944
8945 static bool
arc_can_follow_jump(const rtx_insn * follower,const rtx_insn * followee)8946 arc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
8947 {
8948 /* ??? get_attr_type is declared to take an rtx. */
8949 union { const rtx_insn *c; rtx_insn *r; } u;
8950
8951 u.c = follower;
8952 if (CROSSING_JUMP_P (followee))
8953 switch (get_attr_type (u.r))
8954 {
8955 case TYPE_BRCC:
8956 case TYPE_BRCC_NO_DELAY_SLOT:
8957 return false;
8958 default:
8959 return true;
8960 }
8961 return true;
8962 }
8963
8964 /* Implement EPILOGUE__USES.
8965 Return true if REGNO should be added to the deemed uses of the epilogue.
8966
8967 We use the return address
8968 arc_return_address_regs[arc_compute_function_type (cfun)] .
8969 But also, we have to make sure all the register restore instructions
8970 are known to be live in interrupt functions. */
8971
8972 bool
arc_epilogue_uses(int regno)8973 arc_epilogue_uses (int regno)
8974 {
8975 if (reload_completed)
8976 {
8977 if (ARC_INTERRUPT_P (cfun->machine->fn_type))
8978 {
8979 if (!fixed_regs[regno])
8980 return true;
8981 return regno == arc_return_address_regs[cfun->machine->fn_type];
8982 }
8983 else
8984 return regno == RETURN_ADDR_REGNUM;
8985 }
8986 else
8987 return regno == arc_return_address_regs[arc_compute_function_type (cfun)];
8988 }
8989
8990 #ifndef TARGET_NO_LRA
8991 #define TARGET_NO_LRA !TARGET_LRA
8992 #endif
8993
8994 static bool
arc_lra_p(void)8995 arc_lra_p (void)
8996 {
8997 return !TARGET_NO_LRA;
8998 }
8999
9000 /* ??? Should we define TARGET_REGISTER_PRIORITY? We might perfer to use
9001 Rcq registers, because some insn are shorter with them. OTOH we already
9002 have separate alternatives for this purpose, and other insns don't
9003 mind, so maybe we should rather prefer the other registers?
9004 We need more data, and we can only get that if we allow people to
9005 try all options. */
9006 static int
arc_register_priority(int r)9007 arc_register_priority (int r)
9008 {
9009 switch (arc_lra_priority_tag)
9010 {
9011 case ARC_LRA_PRIORITY_NONE:
9012 return 0;
9013 case ARC_LRA_PRIORITY_NONCOMPACT:
9014 return ((((r & 7) ^ 4) - 4) & 15) != r;
9015 case ARC_LRA_PRIORITY_COMPACT:
9016 return ((((r & 7) ^ 4) - 4) & 15) == r;
9017 default:
9018 gcc_unreachable ();
9019 }
9020 }
9021
9022 static reg_class_t
arc_spill_class(reg_class_t,machine_mode)9023 arc_spill_class (reg_class_t /* orig_class */, machine_mode)
9024 {
9025 return GENERAL_REGS;
9026 }
9027
9028 bool
arc_legitimize_reload_address(rtx * p,machine_mode mode,int opnum,int itype)9029 arc_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
9030 int itype)
9031 {
9032 rtx x = *p;
9033 enum reload_type type = (enum reload_type) itype;
9034
9035 if (GET_CODE (x) == PLUS
9036 && CONST_INT_P (XEXP (x, 1))
9037 && (RTX_OK_FOR_BASE_P (XEXP (x, 0), true)
9038 || (REG_P (XEXP (x, 0))
9039 && reg_equiv_constant (REGNO (XEXP (x, 0))))))
9040 {
9041 int scale = GET_MODE_SIZE (mode);
9042 int shift;
9043 rtx index_rtx = XEXP (x, 1);
9044 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9045 rtx reg, sum, sum2;
9046
9047 if (scale > 4)
9048 scale = 4;
9049 if ((scale-1) & offset)
9050 scale = 1;
9051 shift = scale >> 1;
9052 offset_base
9053 = ((offset + (256 << shift))
9054 & ((HOST_WIDE_INT)((unsigned HOST_WIDE_INT) -512 << shift)));
9055 /* Sometimes the normal form does not suit DImode. We
9056 could avoid that by using smaller ranges, but that
9057 would give less optimized code when SImode is
9058 prevalent. */
9059 if (GET_MODE_SIZE (mode) + offset - offset_base <= (256 << shift))
9060 {
9061 int regno;
9062
9063 reg = XEXP (x, 0);
9064 regno = REGNO (reg);
9065 sum2 = sum = plus_constant (Pmode, reg, offset_base);
9066
9067 if (reg_equiv_constant (regno))
9068 {
9069 sum2 = plus_constant (Pmode, reg_equiv_constant (regno),
9070 offset_base);
9071 if (GET_CODE (sum2) == PLUS)
9072 sum2 = gen_rtx_CONST (Pmode, sum2);
9073 }
9074 *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9075 push_reload (sum2, NULL_RTX, &XEXP (*p, 0), NULL,
9076 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum,
9077 type);
9078 return true;
9079 }
9080 }
9081 /* We must re-recognize what we created before. */
9082 else if (GET_CODE (x) == PLUS
9083 && GET_CODE (XEXP (x, 0)) == PLUS
9084 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9085 && REG_P (XEXP (XEXP (x, 0), 0))
9086 && CONST_INT_P (XEXP (x, 1)))
9087 {
9088 /* Because this address is so complex, we know it must have
9089 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9090 it is already unshared, and needs no further unsharing. */
9091 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9092 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9093 return true;
9094 }
9095 return false;
9096 }
9097
9098 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
9099
9100 static bool
arc_use_by_pieces_infrastructure_p(unsigned HOST_WIDE_INT size,unsigned int align,enum by_pieces_operation op,bool speed_p)9101 arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
9102 unsigned int align,
9103 enum by_pieces_operation op,
9104 bool speed_p)
9105 {
9106 /* Let the movmem expander handle small block moves. */
9107 if (op == MOVE_BY_PIECES)
9108 return false;
9109
9110 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
9111 }
9112
9113 /* Emit a (pre) memory barrier around an atomic sequence according to
9114 MODEL. */
9115
9116 static void
arc_pre_atomic_barrier(enum memmodel model)9117 arc_pre_atomic_barrier (enum memmodel model)
9118 {
9119 if (need_atomic_barrier_p (model, true))
9120 emit_insn (gen_memory_barrier ());
9121 }
9122
9123 /* Emit a (post) memory barrier around an atomic sequence according to
9124 MODEL. */
9125
9126 static void
arc_post_atomic_barrier(enum memmodel model)9127 arc_post_atomic_barrier (enum memmodel model)
9128 {
9129 if (need_atomic_barrier_p (model, false))
9130 emit_insn (gen_memory_barrier ());
9131 }
9132
9133 /* Expand a compare and swap pattern. */
9134
9135 static void
emit_unlikely_jump(rtx insn)9136 emit_unlikely_jump (rtx insn)
9137 {
9138 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
9139
9140 insn = emit_jump_insn (insn);
9141 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
9142 }
9143
9144 /* Expand code to perform a 8 or 16-bit compare and swap by doing
9145 32-bit compare and swap on the word containing the byte or
9146 half-word. The difference between a weak and a strong CAS is that
9147 the weak version may simply fail. The strong version relies on two
9148 loops, one checks if the SCOND op is succsfully or not, the other
9149 checks if the 32 bit accessed location which contains the 8 or 16
9150 bit datum is not changed by other thread. The first loop is
9151 implemented by the atomic_compare_and_swapsi_1 pattern. The second
9152 loops is implemented by this routine. */
9153
9154 static void
arc_expand_compare_and_swap_qh(rtx bool_result,rtx result,rtx mem,rtx oldval,rtx newval,rtx weak,rtx mod_s,rtx mod_f)9155 arc_expand_compare_and_swap_qh (rtx bool_result, rtx result, rtx mem,
9156 rtx oldval, rtx newval, rtx weak,
9157 rtx mod_s, rtx mod_f)
9158 {
9159 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
9160 rtx addr = gen_reg_rtx (Pmode);
9161 rtx off = gen_reg_rtx (SImode);
9162 rtx oldv = gen_reg_rtx (SImode);
9163 rtx newv = gen_reg_rtx (SImode);
9164 rtx oldvalue = gen_reg_rtx (SImode);
9165 rtx newvalue = gen_reg_rtx (SImode);
9166 rtx res = gen_reg_rtx (SImode);
9167 rtx resv = gen_reg_rtx (SImode);
9168 rtx memsi, val, mask, end_label, loop_label, cc, x;
9169 machine_mode mode;
9170 bool is_weak = (weak != const0_rtx);
9171
9172 /* Truncate the address. */
9173 emit_insn (gen_rtx_SET (addr,
9174 gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
9175
9176 /* Compute the datum offset. */
9177 emit_insn (gen_rtx_SET (off,
9178 gen_rtx_AND (SImode, addr1, GEN_INT (3))));
9179 if (TARGET_BIG_ENDIAN)
9180 emit_insn (gen_rtx_SET (off,
9181 gen_rtx_MINUS (SImode,
9182 (GET_MODE (mem) == QImode) ?
9183 GEN_INT (3) : GEN_INT (2), off)));
9184
9185 /* Normal read from truncated address. */
9186 memsi = gen_rtx_MEM (SImode, addr);
9187 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
9188 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
9189
9190 val = copy_to_reg (memsi);
9191
9192 /* Convert the offset in bits. */
9193 emit_insn (gen_rtx_SET (off,
9194 gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
9195
9196 /* Get the proper mask. */
9197 if (GET_MODE (mem) == QImode)
9198 mask = force_reg (SImode, GEN_INT (0xff));
9199 else
9200 mask = force_reg (SImode, GEN_INT (0xffff));
9201
9202 emit_insn (gen_rtx_SET (mask,
9203 gen_rtx_ASHIFT (SImode, mask, off)));
9204
9205 /* Prepare the old and new values. */
9206 emit_insn (gen_rtx_SET (val,
9207 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
9208 val)));
9209
9210 oldval = gen_lowpart (SImode, oldval);
9211 emit_insn (gen_rtx_SET (oldv,
9212 gen_rtx_ASHIFT (SImode, oldval, off)));
9213
9214 newval = gen_lowpart_common (SImode, newval);
9215 emit_insn (gen_rtx_SET (newv,
9216 gen_rtx_ASHIFT (SImode, newval, off)));
9217
9218 emit_insn (gen_rtx_SET (oldv,
9219 gen_rtx_AND (SImode, oldv, mask)));
9220
9221 emit_insn (gen_rtx_SET (newv,
9222 gen_rtx_AND (SImode, newv, mask)));
9223
9224 if (!is_weak)
9225 {
9226 end_label = gen_label_rtx ();
9227 loop_label = gen_label_rtx ();
9228 emit_label (loop_label);
9229 }
9230
9231 /* Make the old and new values. */
9232 emit_insn (gen_rtx_SET (oldvalue,
9233 gen_rtx_IOR (SImode, oldv, val)));
9234
9235 emit_insn (gen_rtx_SET (newvalue,
9236 gen_rtx_IOR (SImode, newv, val)));
9237
9238 /* Try an 32bit atomic compare and swap. It clobbers the CC
9239 register. */
9240 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue,
9241 weak, mod_s, mod_f));
9242
9243 /* Regardless of the weakness of the operation, a proper boolean
9244 result needs to be provided. */
9245 x = gen_rtx_REG (CC_Zmode, CC_REG);
9246 x = gen_rtx_EQ (SImode, x, const0_rtx);
9247 emit_insn (gen_rtx_SET (bool_result, x));
9248
9249 if (!is_weak)
9250 {
9251 /* Check the results: if the atomic op is successfully the goto
9252 to end label. */
9253 x = gen_rtx_REG (CC_Zmode, CC_REG);
9254 x = gen_rtx_EQ (VOIDmode, x, const0_rtx);
9255 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9256 gen_rtx_LABEL_REF (Pmode, end_label), pc_rtx);
9257 emit_jump_insn (gen_rtx_SET (pc_rtx, x));
9258
9259 /* Wait for the right moment when the accessed 32-bit location
9260 is stable. */
9261 emit_insn (gen_rtx_SET (resv,
9262 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
9263 res)));
9264 mode = SELECT_CC_MODE (NE, resv, val);
9265 cc = gen_rtx_REG (mode, CC_REG);
9266 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, resv, val)));
9267
9268 /* Set the new value of the 32 bit location, proper masked. */
9269 emit_insn (gen_rtx_SET (val, resv));
9270
9271 /* Try again if location is unstable. Fall through if only
9272 scond op failed. */
9273 x = gen_rtx_NE (VOIDmode, cc, const0_rtx);
9274 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9275 gen_rtx_LABEL_REF (Pmode, loop_label), pc_rtx);
9276 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
9277
9278 emit_label (end_label);
9279 }
9280
9281 /* End: proper return the result for the given mode. */
9282 emit_insn (gen_rtx_SET (res,
9283 gen_rtx_AND (SImode, res, mask)));
9284
9285 emit_insn (gen_rtx_SET (res,
9286 gen_rtx_LSHIFTRT (SImode, res, off)));
9287
9288 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
9289 }
9290
9291 /* Helper function used by "atomic_compare_and_swap" expand
9292 pattern. */
9293
9294 void
arc_expand_compare_and_swap(rtx operands[])9295 arc_expand_compare_and_swap (rtx operands[])
9296 {
9297 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
9298 machine_mode mode;
9299
9300 bval = operands[0];
9301 rval = operands[1];
9302 mem = operands[2];
9303 oldval = operands[3];
9304 newval = operands[4];
9305 is_weak = operands[5];
9306 mod_s = operands[6];
9307 mod_f = operands[7];
9308 mode = GET_MODE (mem);
9309
9310 if (reg_overlap_mentioned_p (rval, oldval))
9311 oldval = copy_to_reg (oldval);
9312
9313 if (mode == SImode)
9314 {
9315 emit_insn (gen_atomic_compare_and_swapsi_1 (rval, mem, oldval, newval,
9316 is_weak, mod_s, mod_f));
9317 x = gen_rtx_REG (CC_Zmode, CC_REG);
9318 x = gen_rtx_EQ (SImode, x, const0_rtx);
9319 emit_insn (gen_rtx_SET (bval, x));
9320 }
9321 else
9322 {
9323 arc_expand_compare_and_swap_qh (bval, rval, mem, oldval, newval,
9324 is_weak, mod_s, mod_f);
9325 }
9326 }
9327
9328 /* Helper function used by the "atomic_compare_and_swapsi_1"
9329 pattern. */
9330
9331 void
arc_split_compare_and_swap(rtx operands[])9332 arc_split_compare_and_swap (rtx operands[])
9333 {
9334 rtx rval, mem, oldval, newval;
9335 machine_mode mode;
9336 enum memmodel mod_s, mod_f;
9337 bool is_weak;
9338 rtx label1, label2, x, cond;
9339
9340 rval = operands[0];
9341 mem = operands[1];
9342 oldval = operands[2];
9343 newval = operands[3];
9344 is_weak = (operands[4] != const0_rtx);
9345 mod_s = (enum memmodel) INTVAL (operands[5]);
9346 mod_f = (enum memmodel) INTVAL (operands[6]);
9347 mode = GET_MODE (mem);
9348
9349 /* ARC atomic ops work only with 32-bit aligned memories. */
9350 gcc_assert (mode == SImode);
9351
9352 arc_pre_atomic_barrier (mod_s);
9353
9354 label1 = NULL_RTX;
9355 if (!is_weak)
9356 {
9357 label1 = gen_label_rtx ();
9358 emit_label (label1);
9359 }
9360 label2 = gen_label_rtx ();
9361
9362 /* Load exclusive. */
9363 emit_insn (gen_arc_load_exclusivesi (rval, mem));
9364
9365 /* Check if it is oldval. */
9366 mode = SELECT_CC_MODE (NE, rval, oldval);
9367 cond = gen_rtx_REG (mode, CC_REG);
9368 emit_insn (gen_rtx_SET (cond, gen_rtx_COMPARE (mode, rval, oldval)));
9369
9370 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9371 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9372 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
9373 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
9374
9375 /* Exclusively store new item. Store clobbers CC reg. */
9376 emit_insn (gen_arc_store_exclusivesi (mem, newval));
9377
9378 if (!is_weak)
9379 {
9380 /* Check the result of the store. */
9381 cond = gen_rtx_REG (CC_Zmode, CC_REG);
9382 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9383 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9384 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
9385 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
9386 }
9387
9388 if (mod_f != MEMMODEL_RELAXED)
9389 emit_label (label2);
9390
9391 arc_post_atomic_barrier (mod_s);
9392
9393 if (mod_f == MEMMODEL_RELAXED)
9394 emit_label (label2);
9395 }
9396
9397 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
9398 to perform. MEM is the memory on which to operate. VAL is the second
9399 operand of the binary operator. BEFORE and AFTER are optional locations to
9400 return the value of MEM either before of after the operation. MODEL_RTX
9401 is a CONST_INT containing the memory model to use. */
9402
9403 void
arc_expand_atomic_op(enum rtx_code code,rtx mem,rtx val,rtx orig_before,rtx orig_after,rtx model_rtx)9404 arc_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
9405 rtx orig_before, rtx orig_after, rtx model_rtx)
9406 {
9407 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
9408 machine_mode mode = GET_MODE (mem);
9409 rtx label, x, cond;
9410 rtx before = orig_before, after = orig_after;
9411
9412 /* ARC atomic ops work only with 32-bit aligned memories. */
9413 gcc_assert (mode == SImode);
9414
9415 arc_pre_atomic_barrier (model);
9416
9417 label = gen_label_rtx ();
9418 emit_label (label);
9419 label = gen_rtx_LABEL_REF (VOIDmode, label);
9420
9421 if (before == NULL_RTX)
9422 before = gen_reg_rtx (mode);
9423
9424 if (after == NULL_RTX)
9425 after = gen_reg_rtx (mode);
9426
9427 /* Load exclusive. */
9428 emit_insn (gen_arc_load_exclusivesi (before, mem));
9429
9430 switch (code)
9431 {
9432 case NOT:
9433 x = gen_rtx_AND (mode, before, val);
9434 emit_insn (gen_rtx_SET (after, x));
9435 x = gen_rtx_NOT (mode, after);
9436 emit_insn (gen_rtx_SET (after, x));
9437 break;
9438
9439 case MINUS:
9440 if (CONST_INT_P (val))
9441 {
9442 val = GEN_INT (-INTVAL (val));
9443 code = PLUS;
9444 }
9445
9446 /* FALLTHRU. */
9447 default:
9448 x = gen_rtx_fmt_ee (code, mode, before, val);
9449 emit_insn (gen_rtx_SET (after, x));
9450 break;
9451 }
9452
9453 /* Exclusively store new item. Store clobbers CC reg. */
9454 emit_insn (gen_arc_store_exclusivesi (mem, after));
9455
9456 /* Check the result of the store. */
9457 cond = gen_rtx_REG (CC_Zmode, CC_REG);
9458 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9459 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9460 label, pc_rtx);
9461 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
9462
9463 arc_post_atomic_barrier (model);
9464 }
9465
9466 /* Implement TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P. */
9467
9468 static bool
arc_no_speculation_in_delay_slots_p()9469 arc_no_speculation_in_delay_slots_p ()
9470 {
9471 return true;
9472 }
9473
9474 /* Return a parallel of registers to represent where to find the
9475 register pieces if required, otherwise NULL_RTX. */
9476
9477 static rtx
arc_dwarf_register_span(rtx rtl)9478 arc_dwarf_register_span (rtx rtl)
9479 {
9480 enum machine_mode mode = GET_MODE (rtl);
9481 unsigned regno;
9482 rtx p;
9483
9484 if (GET_MODE_SIZE (mode) != 8)
9485 return NULL_RTX;
9486
9487 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
9488 regno = REGNO (rtl);
9489 XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno);
9490 XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1);
9491
9492 return p;
9493 }
9494
9495
9496 struct gcc_target targetm = TARGET_INITIALIZER;
9497
9498 #include "gt-arc.h"
9499