1 /* Subroutines used for code generation on the Synopsys DesignWare ARC cpu.
2    Copyright (C) 1994-2016 Free Software Foundation, Inc.
3 
4    Sources derived from work done by Sankhya Technologies (www.sankhya.com) on
5    behalf of Synopsys Inc.
6 
7    Position Independent Code support added,Code cleaned up,
8    Comments and Support For ARC700 instructions added by
9    Saurabh Verma (saurabh.verma@codito.com)
10    Ramana Radhakrishnan(ramana.radhakrishnan@codito.com)
11 
12    Fixing ABI inconsistencies, optimizations for ARC600 / ARC700 pipelines,
13    profiling support added by Joern Rennecke <joern.rennecke@embecosm.com>
14 
15 This file is part of GCC.
16 
17 GCC is free software; you can redistribute it and/or modify
18 it under the terms of the GNU General Public License as published by
19 the Free Software Foundation; either version 3, or (at your option)
20 any later version.
21 
22 GCC is distributed in the hope that it will be useful,
23 but WITHOUT ANY WARRANTY; without even the implied warranty of
24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25 GNU General Public License for more details.
26 
27 You should have received a copy of the GNU General Public License
28 along with GCC; see the file COPYING3.  If not see
29 <http://www.gnu.org/licenses/>.  */
30 
31 #include "config.h"
32 #include "system.h"
33 #include "coretypes.h"
34 #include "backend.h"
35 #include "target.h"
36 #include "rtl.h"
37 #include "tree.h"
38 #include "cfghooks.h"
39 #include "df.h"
40 #include "tm_p.h"
41 #include "stringpool.h"
42 #include "optabs.h"
43 #include "regs.h"
44 #include "emit-rtl.h"
45 #include "recog.h"
46 #include "diagnostic.h"
47 #include "fold-const.h"
48 #include "varasm.h"
49 #include "stor-layout.h"
50 #include "calls.h"
51 #include "output.h"
52 #include "insn-attr.h"
53 #include "flags.h"
54 #include "explow.h"
55 #include "expr.h"
56 #include "langhooks.h"
57 #include "tm-constrs.h"
58 #include "reload.h" /* For operands_match_p */
59 #include "cfgrtl.h"
60 #include "tree-pass.h"
61 #include "context.h"
62 #include "builtins.h"
63 #include "rtl-iter.h"
64 #include "alias.h"
65 
66 /* Which cpu we're compiling for (ARC600, ARC601, ARC700).  */
67 static const char *arc_cpu_string = "";
68 
69 /* ??? Loads can handle any constant, stores can only handle small ones.  */
70 /* OTOH, LIMMs cost extra, so their usefulness is limited.  */
71 #define RTX_OK_FOR_OFFSET_P(MODE, X) \
72 (GET_CODE (X) == CONST_INT \
73  && SMALL_INT_RANGE (INTVAL (X), (GET_MODE_SIZE (MODE) - 1) & -4, \
74 		     (INTVAL (X) & (GET_MODE_SIZE (MODE) - 1) & 3 \
75 		      ? 0 \
76 		      : -(-GET_MODE_SIZE (MODE) | -4) >> 1)))
77 
78 #define LEGITIMATE_OFFSET_ADDRESS_P(MODE, X, INDEX, STRICT) \
79 (GET_CODE (X) == PLUS			     \
80   && RTX_OK_FOR_BASE_P (XEXP (X, 0), (STRICT)) \
81   && ((INDEX && RTX_OK_FOR_INDEX_P (XEXP (X, 1), (STRICT)) \
82        && GET_MODE_SIZE ((MODE)) <= 4) \
83       || RTX_OK_FOR_OFFSET_P (MODE, XEXP (X, 1))))
84 
85 #define LEGITIMATE_SCALED_ADDRESS_P(MODE, X, STRICT) \
86 (GET_CODE (X) == PLUS \
87  && GET_CODE (XEXP (X, 0)) == MULT \
88  && RTX_OK_FOR_INDEX_P (XEXP (XEXP (X, 0), 0), (STRICT)) \
89  && GET_CODE (XEXP (XEXP (X, 0), 1)) == CONST_INT \
90  && ((GET_MODE_SIZE (MODE) == 2 && INTVAL (XEXP (XEXP (X, 0), 1)) == 2) \
91      || (GET_MODE_SIZE (MODE) == 4 && INTVAL (XEXP (XEXP (X, 0), 1)) == 4)) \
92  && (RTX_OK_FOR_BASE_P (XEXP (X, 1), (STRICT)) \
93      || (flag_pic ? CONST_INT_P (XEXP (X, 1)) : CONSTANT_P (XEXP (X, 1)))))
94 
95 #define LEGITIMATE_SMALL_DATA_ADDRESS_P(X) \
96   (GET_CODE (X) == PLUS \
97    && (REG_P (XEXP ((X), 0)) && REGNO (XEXP ((X), 0)) == SDATA_BASE_REGNUM) \
98    && ((GET_CODE (XEXP((X),1)) == SYMBOL_REF \
99 	&& SYMBOL_REF_SMALL_P (XEXP ((X), 1))) \
100        || (GET_CODE (XEXP ((X), 1)) == CONST \
101 	   && GET_CODE (XEXP (XEXP ((X), 1), 0)) == PLUS \
102 	   && GET_CODE (XEXP (XEXP (XEXP ((X), 1), 0), 0)) == SYMBOL_REF \
103 	   && SYMBOL_REF_SMALL_P (XEXP (XEXP (XEXP ((X), 1), 0), 0)) \
104 	   && GET_CODE (XEXP(XEXP (XEXP ((X), 1), 0), 1)) == CONST_INT)))
105 
106 /* Array of valid operand punctuation characters.  */
107 char arc_punct_chars[256];
108 
109 /* State used by arc_ccfsm_advance to implement conditional execution.  */
110 struct GTY (()) arc_ccfsm
111 {
112   int state;
113   int cc;
114   rtx cond;
115   rtx_insn *target_insn;
116   int target_label;
117 };
118 
119 #define arc_ccfsm_current cfun->machine->ccfsm_current
120 
121 #define ARC_CCFSM_BRANCH_DELETED_P(STATE) \
122   ((STATE)->state == 1 || (STATE)->state == 2)
123 
124 /* Indicate we're conditionalizing insns now.  */
125 #define ARC_CCFSM_RECORD_BRANCH_DELETED(STATE) \
126   ((STATE)->state += 2)
127 
128 #define ARC_CCFSM_COND_EXEC_P(STATE) \
129   ((STATE)->state == 3 || (STATE)->state == 4 || (STATE)->state == 5 \
130    || current_insn_predicate)
131 
132 /* Check if INSN has a 16 bit opcode considering struct arc_ccfsm *STATE.  */
133 #define CCFSM_ISCOMPACT(INSN,STATE) \
134   (ARC_CCFSM_COND_EXEC_P (STATE) \
135    ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \
136       || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \
137    : get_attr_iscompact (INSN) != ISCOMPACT_FALSE)
138 
139 /* Likewise, but also consider that INSN might be in a delay slot of JUMP.  */
140 #define CCFSM_DBR_ISCOMPACT(INSN,JUMP,STATE) \
141   ((ARC_CCFSM_COND_EXEC_P (STATE) \
142     || (JUMP_P (JUMP) \
143 	&& INSN_ANNULLED_BRANCH_P (JUMP) \
144 	&& (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (INSN)))) \
145    ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \
146       || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \
147    : get_attr_iscompact (INSN) != ISCOMPACT_FALSE)
148 
149 /* The maximum number of insns skipped which will be conditionalised if
150    possible.  */
151 /* When optimizing for speed:
152     Let p be the probability that the potentially skipped insns need to
153     be executed, pn the cost of a correctly predicted non-taken branch,
154     mt the cost of a mis/non-predicted taken branch,
155     mn mispredicted non-taken, pt correctly predicted taken ;
156     costs expressed in numbers of instructions like the ones considered
157     skipping.
158     Unfortunately we don't have a measure of predictability - this
159     is linked to probability only in that in the no-eviction-scenario
160     there is a lower bound 1 - 2 * min (p, 1-p), and a somewhat larger
161     value that can be assumed *if* the distribution is perfectly random.
162     A predictability of 1 is perfectly plausible not matter what p is,
163     because the decision could be dependent on an invocation parameter
164     of the program.
165     For large p, we want MAX_INSNS_SKIPPED == pn/(1-p) + mt - pn
166     For small p, we want MAX_INSNS_SKIPPED == pt
167 
168    When optimizing for size:
169     We want to skip insn unless we could use 16 opcodes for the
170     non-conditionalized insn to balance the branch length or more.
171     Performance can be tie-breaker.  */
172 /* If the potentially-skipped insns are likely to be executed, we'll
173    generally save one non-taken branch
174    o
175    this to be no less than the 1/p  */
176 #define MAX_INSNS_SKIPPED 3
177 
178 /* A nop is needed between a 4 byte insn that sets the condition codes and
179    a branch that uses them (the same isn't true for an 8 byte insn that sets
180    the condition codes).  Set by arc_ccfsm_advance.  Used by
181    arc_print_operand.  */
182 
183 static int get_arc_condition_code (rtx);
184 
185 static tree arc_handle_interrupt_attribute (tree *, tree, tree, int, bool *);
186 
187 /* Initialized arc_attribute_table to NULL since arc doesnot have any
188    machine specific supported attributes.  */
189 const struct attribute_spec arc_attribute_table[] =
190 {
191  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
192       affects_type_identity } */
193   { "interrupt", 1, 1, true, false, false, arc_handle_interrupt_attribute, true },
194   /* Function calls made to this symbol must be done indirectly, because
195      it may lie outside of the 21/25 bit addressing range of a normal function
196      call.  */
197   { "long_call",    0, 0, false, true,  true,  NULL, false },
198   /* Whereas these functions are always known to reside within the 25 bit
199      addressing range of unconditionalized bl.  */
200   { "medium_call",   0, 0, false, true,  true,  NULL, false },
201   /* And these functions are always known to reside within the 21 bit
202      addressing range of blcc.  */
203   { "short_call",   0, 0, false, true,  true,  NULL, false },
204   { NULL, 0, 0, false, false, false, NULL, false }
205 };
206 static int arc_comp_type_attributes (const_tree, const_tree);
207 static void arc_file_start (void);
208 static void arc_internal_label (FILE *, const char *, unsigned long);
209 static void arc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
210 				 tree);
211 static int arc_address_cost (rtx, machine_mode, addr_space_t, bool);
212 static void arc_encode_section_info (tree decl, rtx rtl, int first);
213 
214 static void arc_init_builtins (void);
215 static rtx arc_expand_builtin (tree, rtx, rtx, machine_mode, int);
216 
217 static int branch_dest (rtx);
218 
219 static void  arc_output_pic_addr_const (FILE *,  rtx, int);
220 void emit_pic_move (rtx *, machine_mode);
221 bool arc_legitimate_pic_operand_p (rtx);
222 static bool arc_function_ok_for_sibcall (tree, tree);
223 static rtx arc_function_value (const_tree, const_tree, bool);
224 const char * output_shift (rtx *);
225 static void arc_reorg (void);
226 static bool arc_in_small_data_p (const_tree);
227 
228 static void arc_init_reg_tables (void);
229 static bool arc_return_in_memory (const_tree, const_tree);
230 static bool arc_vector_mode_supported_p (machine_mode);
231 
232 static bool arc_can_use_doloop_p (const widest_int &, const widest_int &,
233 				  unsigned int, bool);
234 static const char *arc_invalid_within_doloop (const rtx_insn *);
235 
236 static void output_short_suffix (FILE *file);
237 
238 static bool arc_frame_pointer_required (void);
239 
240 static bool arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
241 						unsigned int,
242 						enum by_pieces_operation op,
243 						bool);
244 
245 /* Implements target hook vector_mode_supported_p.  */
246 
247 static bool
arc_vector_mode_supported_p(machine_mode mode)248 arc_vector_mode_supported_p (machine_mode mode)
249 {
250   if (!TARGET_SIMD_SET)
251     return false;
252 
253   if ((mode == V4SImode)
254       || (mode == V8HImode))
255     return true;
256 
257   return false;
258 }
259 
260 
261 /* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / review.  */
262 static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED;
263 static rtx arc_delegitimize_address (rtx);
264 static bool arc_can_follow_jump (const rtx_insn *follower,
265 				 const rtx_insn *followee);
266 
267 static rtx frame_insn (rtx);
268 static void arc_function_arg_advance (cumulative_args_t, machine_mode,
269 				      const_tree, bool);
270 static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode);
271 
272 static void arc_finalize_pic (void);
273 
274 /* initialize the GCC target structure.  */
275 #undef  TARGET_COMP_TYPE_ATTRIBUTES
276 #define TARGET_COMP_TYPE_ATTRIBUTES arc_comp_type_attributes
277 #undef TARGET_ASM_FILE_START
278 #define TARGET_ASM_FILE_START arc_file_start
279 #undef TARGET_ATTRIBUTE_TABLE
280 #define TARGET_ATTRIBUTE_TABLE arc_attribute_table
281 #undef TARGET_ASM_INTERNAL_LABEL
282 #define TARGET_ASM_INTERNAL_LABEL arc_internal_label
283 #undef TARGET_RTX_COSTS
284 #define TARGET_RTX_COSTS arc_rtx_costs
285 #undef TARGET_ADDRESS_COST
286 #define TARGET_ADDRESS_COST arc_address_cost
287 
288 #undef TARGET_ENCODE_SECTION_INFO
289 #define TARGET_ENCODE_SECTION_INFO arc_encode_section_info
290 
291 #undef TARGET_CANNOT_FORCE_CONST_MEM
292 #define TARGET_CANNOT_FORCE_CONST_MEM arc_cannot_force_const_mem
293 
294 #undef  TARGET_INIT_BUILTINS
295 #define TARGET_INIT_BUILTINS  arc_init_builtins
296 
297 #undef  TARGET_EXPAND_BUILTIN
298 #define TARGET_EXPAND_BUILTIN arc_expand_builtin
299 
300 #undef  TARGET_BUILTIN_DECL
301 #define TARGET_BUILTIN_DECL arc_builtin_decl
302 
303 #undef  TARGET_ASM_OUTPUT_MI_THUNK
304 #define TARGET_ASM_OUTPUT_MI_THUNK arc_output_mi_thunk
305 
306 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
307 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
308 
309 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
310 #define TARGET_FUNCTION_OK_FOR_SIBCALL arc_function_ok_for_sibcall
311 
312 #undef  TARGET_MACHINE_DEPENDENT_REORG
313 #define TARGET_MACHINE_DEPENDENT_REORG arc_reorg
314 
315 #undef TARGET_IN_SMALL_DATA_P
316 #define TARGET_IN_SMALL_DATA_P arc_in_small_data_p
317 
318 #undef TARGET_PROMOTE_FUNCTION_MODE
319 #define TARGET_PROMOTE_FUNCTION_MODE \
320   default_promote_function_mode_always_promote
321 
322 #undef TARGET_PROMOTE_PROTOTYPES
323 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
324 
325 #undef TARGET_RETURN_IN_MEMORY
326 #define TARGET_RETURN_IN_MEMORY arc_return_in_memory
327 #undef TARGET_PASS_BY_REFERENCE
328 #define TARGET_PASS_BY_REFERENCE arc_pass_by_reference
329 
330 #undef TARGET_SETUP_INCOMING_VARARGS
331 #define TARGET_SETUP_INCOMING_VARARGS arc_setup_incoming_varargs
332 
333 #undef TARGET_ARG_PARTIAL_BYTES
334 #define TARGET_ARG_PARTIAL_BYTES arc_arg_partial_bytes
335 
336 #undef TARGET_MUST_PASS_IN_STACK
337 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
338 
339 #undef TARGET_FUNCTION_VALUE
340 #define TARGET_FUNCTION_VALUE arc_function_value
341 
342 #undef  TARGET_SCHED_ADJUST_PRIORITY
343 #define TARGET_SCHED_ADJUST_PRIORITY arc_sched_adjust_priority
344 
345 #undef TARGET_VECTOR_MODE_SUPPORTED_P
346 #define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p
347 
348 #undef TARGET_CAN_USE_DOLOOP_P
349 #define TARGET_CAN_USE_DOLOOP_P arc_can_use_doloop_p
350 
351 #undef TARGET_INVALID_WITHIN_DOLOOP
352 #define TARGET_INVALID_WITHIN_DOLOOP arc_invalid_within_doloop
353 
354 #undef TARGET_PRESERVE_RELOAD_P
355 #define TARGET_PRESERVE_RELOAD_P arc_preserve_reload_p
356 
357 #undef TARGET_CAN_FOLLOW_JUMP
358 #define TARGET_CAN_FOLLOW_JUMP arc_can_follow_jump
359 
360 #undef TARGET_DELEGITIMIZE_ADDRESS
361 #define TARGET_DELEGITIMIZE_ADDRESS arc_delegitimize_address
362 
363 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
364 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
365   arc_use_by_pieces_infrastructure_p
366 
367 /* Usually, we will be able to scale anchor offsets.
368    When this fails, we want LEGITIMIZE_ADDRESS to kick in.  */
369 #undef TARGET_MIN_ANCHOR_OFFSET
370 #define TARGET_MIN_ANCHOR_OFFSET (-1024)
371 #undef TARGET_MAX_ANCHOR_OFFSET
372 #define TARGET_MAX_ANCHOR_OFFSET (1020)
373 
374 #undef TARGET_SECONDARY_RELOAD
375 #define TARGET_SECONDARY_RELOAD arc_secondary_reload
376 
377 #define TARGET_OPTION_OVERRIDE arc_override_options
378 
379 #define TARGET_CONDITIONAL_REGISTER_USAGE arc_conditional_register_usage
380 
381 #define TARGET_TRAMPOLINE_INIT arc_initialize_trampoline
382 
383 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arc_trampoline_adjust_address
384 
385 #define TARGET_CAN_ELIMINATE arc_can_eliminate
386 
387 #define TARGET_FRAME_POINTER_REQUIRED arc_frame_pointer_required
388 
389 #define TARGET_FUNCTION_ARG arc_function_arg
390 
391 #define TARGET_FUNCTION_ARG_ADVANCE arc_function_arg_advance
392 
393 #define TARGET_LEGITIMATE_CONSTANT_P arc_legitimate_constant_p
394 
395 #define TARGET_LEGITIMATE_ADDRESS_P arc_legitimate_address_p
396 
397 #define TARGET_MODE_DEPENDENT_ADDRESS_P arc_mode_dependent_address_p
398 
399 #define TARGET_LEGITIMIZE_ADDRESS arc_legitimize_address
400 
401 #define TARGET_ADJUST_INSN_LENGTH arc_adjust_insn_length
402 
403 #define TARGET_INSN_LENGTH_PARAMETERS arc_insn_length_parameters
404 
405 #undef TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P
406 #define TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P	\
407   arc_no_speculation_in_delay_slots_p
408 
409 #undef TARGET_LRA_P
410 #define TARGET_LRA_P arc_lra_p
411 #define TARGET_REGISTER_PRIORITY arc_register_priority
412 /* Stores with scaled offsets have different displacement ranges.  */
413 #define TARGET_DIFFERENT_ADDR_DISPLACEMENT_P hook_bool_void_true
414 #define TARGET_SPILL_CLASS arc_spill_class
415 
416 #include "target-def.h"
417 
418 #undef TARGET_ASM_ALIGNED_HI_OP
419 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
420 #undef TARGET_ASM_ALIGNED_SI_OP
421 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
422 
423 #undef TARGET_DWARF_REGISTER_SPAN
424 #define TARGET_DWARF_REGISTER_SPAN arc_dwarf_register_span
425 
426 /* Try to keep the (mov:DF _, reg) as early as possible so
427    that the d<add/sub/mul>h-lr insns appear together and can
428    use the peephole2 pattern.  */
429 
430 static int
arc_sched_adjust_priority(rtx_insn * insn,int priority)431 arc_sched_adjust_priority (rtx_insn *insn, int priority)
432 {
433   rtx set = single_set (insn);
434   if (set
435       && GET_MODE (SET_SRC(set)) == DFmode
436       && GET_CODE (SET_SRC(set)) == REG)
437     {
438       /* Incrementing priority by 20 (empirically derived).  */
439       return priority + 20;
440     }
441 
442   return priority;
443 }
444 
445 /* For ARC base register + offset addressing, the validity of the
446    address is mode-dependent for most of the offset range, as the
447    offset can be scaled by the access size.
448    We don't expose these as mode-dependent addresses in the
449    mode_dependent_address_p target hook, because that would disable
450    lots of optimizations, and most uses of these addresses are for 32
451    or 64 bit accesses anyways, which are fine.
452    However, that leaves some addresses for 8 / 16 bit values not
453    properly reloaded by the generic code, which is why we have to
454    schedule secondary reloads for these.  */
455 
456 static reg_class_t
arc_secondary_reload(bool in_p,rtx x,reg_class_t cl,machine_mode mode,secondary_reload_info * sri)457 arc_secondary_reload (bool in_p,
458 		      rtx x,
459 		      reg_class_t cl,
460 		      machine_mode mode,
461 		      secondary_reload_info *sri)
462 {
463   enum rtx_code code = GET_CODE (x);
464 
465   if (cl == DOUBLE_REGS)
466     return GENERAL_REGS;
467 
468   /* The loop counter register can be stored, but not loaded directly.  */
469   if ((cl == LPCOUNT_REG || cl == WRITABLE_CORE_REGS)
470       && in_p && MEM_P (x))
471     return GENERAL_REGS;
472 
473  /* If we have a subreg (reg), where reg is a pseudo (that will end in
474     a memory location), then we may need a scratch register to handle
475     the fp/sp+largeoffset address.  */
476   if (code == SUBREG)
477     {
478       rtx addr = NULL_RTX;
479       x = SUBREG_REG (x);
480 
481       if (REG_P (x))
482 	{
483 	  int regno = REGNO (x);
484 	  if (regno >= FIRST_PSEUDO_REGISTER)
485 	    regno = reg_renumber[regno];
486 
487 	  if (regno != -1)
488 	    return NO_REGS;
489 
490 	  /* It is a pseudo that ends in a stack location.  */
491 	  if (reg_equiv_mem (REGNO (x)))
492 	    {
493 	      /* Get the equivalent address and check the range of the
494 		 offset.  */
495 	      rtx mem = reg_equiv_mem (REGNO (x));
496 	      addr = find_replacement (&XEXP (mem, 0));
497 	    }
498 	}
499       else
500 	{
501 	  gcc_assert (MEM_P (x));
502 	  addr = XEXP (x, 0);
503 	  addr = simplify_rtx (addr);
504 	}
505       if (addr && GET_CODE (addr) == PLUS
506 	  && CONST_INT_P (XEXP (addr, 1))
507 	  && (!RTX_OK_FOR_OFFSET_P (mode, XEXP (addr, 1))))
508 	{
509 	  switch (mode)
510 	    {
511 	    case QImode:
512 	      sri->icode =
513 		in_p ? CODE_FOR_reload_qi_load : CODE_FOR_reload_qi_store;
514 	      break;
515 	    case HImode:
516 	      sri->icode =
517 		in_p ? CODE_FOR_reload_hi_load : CODE_FOR_reload_hi_store;
518 	      break;
519 	    default:
520 	      break;
521 	    }
522 	}
523     }
524   return NO_REGS;
525 }
526 
527 /* Convert reloads using offsets that are too large to use indirect
528    addressing.  */
529 
530 void
arc_secondary_reload_conv(rtx reg,rtx mem,rtx scratch,bool store_p)531 arc_secondary_reload_conv (rtx reg, rtx mem, rtx scratch, bool store_p)
532 {
533   rtx addr;
534 
535   gcc_assert (GET_CODE (mem) == MEM);
536   addr = XEXP (mem, 0);
537 
538   /* Large offset: use a move.  FIXME: ld ops accepts limms as
539      offsets.  Hence, the following move insn is not required.  */
540   emit_move_insn (scratch, addr);
541   mem = replace_equiv_address_nv (mem, scratch);
542 
543   /* Now create the move.  */
544   if (store_p)
545     emit_insn (gen_rtx_SET (mem, reg));
546   else
547     emit_insn (gen_rtx_SET (reg, mem));
548 
549   return;
550 }
551 
552 static unsigned arc_ifcvt (void);
553 
554 namespace {
555 
556 const pass_data pass_data_arc_ifcvt =
557 {
558   RTL_PASS,
559   "arc_ifcvt",				/* name */
560   OPTGROUP_NONE,			/* optinfo_flags */
561   TV_IFCVT2,				/* tv_id */
562   0,					/* properties_required */
563   0,					/* properties_provided */
564   0,					/* properties_destroyed */
565   0,					/* todo_flags_start */
566   TODO_df_finish			/* todo_flags_finish */
567 };
568 
569 class pass_arc_ifcvt : public rtl_opt_pass
570 {
571 public:
pass_arc_ifcvt(gcc::context * ctxt)572   pass_arc_ifcvt(gcc::context *ctxt)
573   : rtl_opt_pass(pass_data_arc_ifcvt, ctxt)
574   {}
575 
576   /* opt_pass methods: */
clone()577   opt_pass * clone () { return new pass_arc_ifcvt (m_ctxt); }
execute(function *)578   virtual unsigned int execute (function *) { return arc_ifcvt (); }
579 };
580 
581 } // anon namespace
582 
583 rtl_opt_pass *
make_pass_arc_ifcvt(gcc::context * ctxt)584 make_pass_arc_ifcvt (gcc::context *ctxt)
585 {
586   return new pass_arc_ifcvt (ctxt);
587 }
588 
589 static unsigned arc_predicate_delay_insns (void);
590 
591 namespace {
592 
593 const pass_data pass_data_arc_predicate_delay_insns =
594 {
595   RTL_PASS,
596   "arc_predicate_delay_insns",		/* name */
597   OPTGROUP_NONE,			/* optinfo_flags */
598   TV_IFCVT2,				/* tv_id */
599   0,					/* properties_required */
600   0,					/* properties_provided */
601   0,					/* properties_destroyed */
602   0,					/* todo_flags_start */
603   TODO_df_finish			/* todo_flags_finish */
604 };
605 
606 class pass_arc_predicate_delay_insns : public rtl_opt_pass
607 {
608 public:
pass_arc_predicate_delay_insns(gcc::context * ctxt)609   pass_arc_predicate_delay_insns(gcc::context *ctxt)
610   : rtl_opt_pass(pass_data_arc_predicate_delay_insns, ctxt)
611   {}
612 
613   /* opt_pass methods: */
execute(function *)614   virtual unsigned int execute (function *)
615     {
616       return arc_predicate_delay_insns ();
617     }
618 };
619 
620 } // anon namespace
621 
622 rtl_opt_pass *
make_pass_arc_predicate_delay_insns(gcc::context * ctxt)623 make_pass_arc_predicate_delay_insns (gcc::context *ctxt)
624 {
625   return new pass_arc_predicate_delay_insns (ctxt);
626 }
627 
628 /* Called by OVERRIDE_OPTIONS to initialize various things.  */
629 
630 void
arc_init(void)631 arc_init (void)
632 {
633   enum attr_tune tune_dflt = TUNE_NONE;
634 
635   switch (arc_cpu)
636     {
637     case PROCESSOR_ARC600:
638       arc_cpu_string = "ARC600";
639       tune_dflt = TUNE_ARC600;
640       break;
641 
642     case PROCESSOR_ARC601:
643       arc_cpu_string = "ARC601";
644       tune_dflt = TUNE_ARC600;
645       break;
646 
647     case PROCESSOR_ARC700:
648       arc_cpu_string = "ARC700";
649       tune_dflt = TUNE_ARC700_4_2_STD;
650       break;
651 
652     case PROCESSOR_ARCEM:
653       arc_cpu_string = "EM";
654       break;
655 
656     case PROCESSOR_ARCHS:
657       arc_cpu_string = "HS";
658       break;
659 
660     default:
661       gcc_unreachable ();
662     }
663 
664   if (arc_tune == TUNE_NONE)
665     arc_tune = tune_dflt;
666   /* Note: arc_multcost is only used in rtx_cost if speed is true.  */
667   if (arc_multcost < 0)
668     switch (arc_tune)
669       {
670       case TUNE_ARC700_4_2_STD:
671 	/* latency 7;
672 	   max throughput (1 multiply + 4 other insns) / 5 cycles.  */
673 	arc_multcost = COSTS_N_INSNS (4);
674 	if (TARGET_NOMPY_SET)
675 	  arc_multcost = COSTS_N_INSNS (30);
676 	break;
677       case TUNE_ARC700_4_2_XMAC:
678 	/* latency 5;
679 	   max throughput (1 multiply + 2 other insns) / 3 cycles.  */
680 	arc_multcost = COSTS_N_INSNS (3);
681 	if (TARGET_NOMPY_SET)
682 	  arc_multcost = COSTS_N_INSNS (30);
683 	break;
684       case TUNE_ARC600:
685 	if (TARGET_MUL64_SET)
686 	  {
687 	    arc_multcost = COSTS_N_INSNS (4);
688 	    break;
689 	  }
690 	/* Fall through.  */
691       default:
692 	arc_multcost = COSTS_N_INSNS (30);
693 	break;
694       }
695 
696   /* Support mul64 generation only for ARC600.  */
697   if (TARGET_MUL64_SET && (!TARGET_ARC600_FAMILY))
698       error ("-mmul64 not supported for ARC700 or ARCv2");
699 
700   /* MPY instructions valid only for ARC700 or ARCv2.  */
701   if (TARGET_NOMPY_SET && TARGET_ARC600_FAMILY)
702       error ("-mno-mpy supported only for ARC700 or ARCv2");
703 
704   /* mul/mac instructions only for ARC600.  */
705   if (TARGET_MULMAC_32BY16_SET && (!TARGET_ARC600_FAMILY))
706       error ("-mmul32x16 supported only for ARC600 or ARC601");
707 
708   if (!TARGET_DPFP && TARGET_DPFP_DISABLE_LRSR)
709       error ("-mno-dpfp-lrsr supported only with -mdpfp");
710 
711   /* FPX-1. No fast and compact together.  */
712   if ((TARGET_DPFP_FAST_SET && TARGET_DPFP_COMPACT_SET)
713       || (TARGET_SPFP_FAST_SET && TARGET_SPFP_COMPACT_SET))
714     error ("FPX fast and compact options cannot be specified together");
715 
716   /* FPX-2. No fast-spfp for arc600 or arc601.  */
717   if (TARGET_SPFP_FAST_SET && TARGET_ARC600_FAMILY)
718     error ("-mspfp_fast not available on ARC600 or ARC601");
719 
720   /* FPX-3. No FPX extensions on pre-ARC600 cores.  */
721   if ((TARGET_DPFP || TARGET_SPFP)
722       && (!TARGET_ARCOMPACT_FAMILY && !TARGET_EM))
723     error ("FPX extensions not available on pre-ARC600 cores");
724 
725   /* FPX-4.  No FPX extensions mixed with FPU extensions for ARC HS
726      cpus.  */
727   if ((TARGET_DPFP || TARGET_SPFP)
728       && TARGET_HARD_FLOAT
729       && TARGET_HS)
730     error ("No FPX/FPU mixing allowed");
731 
732   /* Only selected multiplier configurations are available for HS.  */
733   if (TARGET_HS && ((arc_mpy_option > 2 && arc_mpy_option < 7)
734 		    || (arc_mpy_option == 1)))
735     error ("This multiplier configuration is not available for HS cores");
736 
737   /* Warn for unimplemented PIC in pre-ARC700 cores, and disable flag_pic.  */
738   if (flag_pic && TARGET_ARC600_FAMILY)
739     {
740       warning (DK_WARNING,
741 	       "PIC is not supported for %s. Generating non-PIC code only..",
742 	       arc_cpu_string);
743       flag_pic = 0;
744     }
745 
746   if (TARGET_ATOMIC && !(TARGET_ARC700 || TARGET_HS))
747     error ("-matomic is only supported for ARC700 or ARC HS cores");
748 
749   /* ll64 ops only available for HS.  */
750   if (TARGET_LL64 && !TARGET_HS)
751     error ("-mll64 is only supported for ARC HS cores");
752 
753   /* FPU support only for V2.  */
754   if (TARGET_HARD_FLOAT)
755     {
756       if (TARGET_EM
757 	  && (arc_fpu_build & ~(FPU_SP | FPU_SF | FPU_SC | FPU_SD | FPX_DP)))
758 	error ("FPU double precision options are available for ARC HS only");
759       if (TARGET_HS && (arc_fpu_build & FPX_DP))
760 	error ("FPU double precision assist "
761 	       "options are not available for ARC HS");
762       if (!TARGET_HS && !TARGET_EM)
763 	error ("FPU options are available for ARCv2 architecture only");
764     }
765 
766   arc_init_reg_tables ();
767 
768   /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P.  */
769   memset (arc_punct_chars, 0, sizeof (arc_punct_chars));
770   arc_punct_chars['#'] = 1;
771   arc_punct_chars['*'] = 1;
772   arc_punct_chars['?'] = 1;
773   arc_punct_chars['!'] = 1;
774   arc_punct_chars['^'] = 1;
775   arc_punct_chars['&'] = 1;
776   arc_punct_chars['+'] = 1;
777   arc_punct_chars['_'] = 1;
778 
779   if (optimize > 1 && !TARGET_NO_COND_EXEC)
780     {
781       /* There are two target-independent ifcvt passes, and arc_reorg may do
782 	 one or more arc_ifcvt calls.  */
783       opt_pass *pass_arc_ifcvt_4 = make_pass_arc_ifcvt (g);
784       struct register_pass_info arc_ifcvt4_info
785 	= { pass_arc_ifcvt_4, "dbr", 1, PASS_POS_INSERT_AFTER };
786       struct register_pass_info arc_ifcvt5_info
787 	= { pass_arc_ifcvt_4->clone (), "shorten", 1, PASS_POS_INSERT_BEFORE };
788 
789       register_pass (&arc_ifcvt4_info);
790       register_pass (&arc_ifcvt5_info);
791     }
792 
793   if (flag_delayed_branch)
794     {
795       opt_pass *pass_arc_predicate_delay_insns
796 	= make_pass_arc_predicate_delay_insns (g);
797       struct register_pass_info arc_predicate_delay_info
798 	= { pass_arc_predicate_delay_insns, "dbr", 1, PASS_POS_INSERT_AFTER };
799 
800       register_pass (&arc_predicate_delay_info);
801     }
802 }
803 
804 /* Check ARC options, generate derived target attributes.  */
805 
806 static void
arc_override_options(void)807 arc_override_options (void)
808 {
809   if (arc_cpu == PROCESSOR_NONE)
810     arc_cpu = PROCESSOR_ARC700;
811 
812   if (arc_size_opt_level == 3)
813     optimize_size = 1;
814 
815   if (flag_pic)
816     target_flags |= MASK_NO_SDATA_SET;
817 
818   if (flag_no_common == 255)
819     flag_no_common = !TARGET_NO_SDATA_SET;
820 
821   /* TARGET_COMPACT_CASESI needs the "q" register class.  */
822   if (TARGET_MIXED_CODE)
823     TARGET_Q_CLASS = 1;
824   if (!TARGET_Q_CLASS)
825     TARGET_COMPACT_CASESI = 0;
826   if (TARGET_COMPACT_CASESI)
827     TARGET_CASE_VECTOR_PC_RELATIVE = 1;
828 
829   /* These need to be done at start up.  It's convenient to do them here.  */
830   arc_init ();
831 }
832 
833 /* The condition codes of the ARC, and the inverse function.  */
834 /* For short branches, the "c" / "nc" names are not defined in the ARC
835    Programmers manual, so we have to use "lo" / "hs"" instead.  */
836 static const char *arc_condition_codes[] =
837 {
838   "al", 0, "eq", "ne", "p", "n", "lo", "hs", "v", "nv",
839   "gt", "le", "ge", "lt", "hi", "ls", "pnz", 0
840 };
841 
842 enum arc_cc_code_index
843 {
844   ARC_CC_AL, ARC_CC_EQ = ARC_CC_AL+2, ARC_CC_NE, ARC_CC_P, ARC_CC_N,
845   ARC_CC_C,  ARC_CC_NC, ARC_CC_V, ARC_CC_NV,
846   ARC_CC_GT, ARC_CC_LE, ARC_CC_GE, ARC_CC_LT, ARC_CC_HI, ARC_CC_LS, ARC_CC_PNZ,
847   ARC_CC_LO = ARC_CC_C, ARC_CC_HS = ARC_CC_NC
848 };
849 
850 #define ARC_INVERSE_CONDITION_CODE(X)  ((X) ^ 1)
851 
852 /* Returns the index of the ARC condition code string in
853    `arc_condition_codes'.  COMPARISON should be an rtx like
854    `(eq (...) (...))'.  */
855 
856 static int
get_arc_condition_code(rtx comparison)857 get_arc_condition_code (rtx comparison)
858 {
859   switch (GET_MODE (XEXP (comparison, 0)))
860     {
861     case CCmode:
862     case SImode: /* For BRcc.  */
863       switch (GET_CODE (comparison))
864 	{
865 	case EQ : return ARC_CC_EQ;
866 	case NE : return ARC_CC_NE;
867 	case GT : return ARC_CC_GT;
868 	case LE : return ARC_CC_LE;
869 	case GE : return ARC_CC_GE;
870 	case LT : return ARC_CC_LT;
871 	case GTU : return ARC_CC_HI;
872 	case LEU : return ARC_CC_LS;
873 	case LTU : return ARC_CC_LO;
874 	case GEU : return ARC_CC_HS;
875 	default : gcc_unreachable ();
876 	}
877     case CC_ZNmode:
878       switch (GET_CODE (comparison))
879 	{
880 	case EQ : return ARC_CC_EQ;
881 	case NE : return ARC_CC_NE;
882 	case GE: return ARC_CC_P;
883 	case LT: return ARC_CC_N;
884 	case GT : return ARC_CC_PNZ;
885 	default : gcc_unreachable ();
886 	}
887     case CC_Zmode:
888       switch (GET_CODE (comparison))
889 	{
890 	case EQ : return ARC_CC_EQ;
891 	case NE : return ARC_CC_NE;
892 	default : gcc_unreachable ();
893 	}
894     case CC_Cmode:
895       switch (GET_CODE (comparison))
896 	{
897 	case LTU : return ARC_CC_C;
898 	case GEU : return ARC_CC_NC;
899 	default : gcc_unreachable ();
900 	}
901     case CC_FP_GTmode:
902       if (TARGET_ARGONAUT_SET && TARGET_SPFP)
903 	switch (GET_CODE (comparison))
904 	  {
905 	  case GT  : return ARC_CC_N;
906 	  case UNLE: return ARC_CC_P;
907 	  default : gcc_unreachable ();
908 	}
909       else
910 	switch (GET_CODE (comparison))
911 	  {
912 	  case GT   : return ARC_CC_HI;
913 	  case UNLE : return ARC_CC_LS;
914 	  default : gcc_unreachable ();
915 	}
916     case CC_FP_GEmode:
917       /* Same for FPX and non-FPX.  */
918       switch (GET_CODE (comparison))
919 	{
920 	case GE   : return ARC_CC_HS;
921 	case UNLT : return ARC_CC_LO;
922 	default : gcc_unreachable ();
923 	}
924     case CC_FP_UNEQmode:
925       switch (GET_CODE (comparison))
926 	{
927 	case UNEQ : return ARC_CC_EQ;
928 	case LTGT : return ARC_CC_NE;
929 	default : gcc_unreachable ();
930 	}
931     case CC_FP_ORDmode:
932       switch (GET_CODE (comparison))
933 	{
934 	case UNORDERED : return ARC_CC_C;
935 	case ORDERED   : return ARC_CC_NC;
936 	default : gcc_unreachable ();
937 	}
938     case CC_FPXmode:
939       switch (GET_CODE (comparison))
940 	{
941 	case EQ        : return ARC_CC_EQ;
942 	case NE        : return ARC_CC_NE;
943 	case UNORDERED : return ARC_CC_C;
944 	case ORDERED   : return ARC_CC_NC;
945 	case LTGT      : return ARC_CC_HI;
946 	case UNEQ      : return ARC_CC_LS;
947 	default : gcc_unreachable ();
948 	}
949     case CC_FPUmode:
950       switch (GET_CODE (comparison))
951 	{
952 	case EQ	       : return ARC_CC_EQ;
953 	case NE	       : return ARC_CC_NE;
954 	case GT	       : return ARC_CC_GT;
955 	case GE	       : return ARC_CC_GE;
956 	case LT	       : return ARC_CC_C;
957 	case LE	       : return ARC_CC_LS;
958 	case UNORDERED : return ARC_CC_V;
959 	case ORDERED   : return ARC_CC_NV;
960 	case UNGT      : return ARC_CC_HI;
961 	case UNGE      : return ARC_CC_HS;
962 	case UNLT      : return ARC_CC_LT;
963 	case UNLE      : return ARC_CC_LE;
964 	  /* UNEQ and LTGT do not have representation.  */
965 	case LTGT      : /* Fall through.  */
966 	case UNEQ      : /* Fall through.  */
967 	default : gcc_unreachable ();
968 	}
969     case CC_FPU_UNEQmode:
970       switch (GET_CODE (comparison))
971 	{
972 	case LTGT : return ARC_CC_NE;
973 	case UNEQ : return ARC_CC_EQ;
974 	default : gcc_unreachable ();
975 	}
976     default : gcc_unreachable ();
977     }
978   /*NOTREACHED*/
979   return (42);
980 }
981 
982 /* Return true if COMPARISON has a short form that can accomodate OFFSET.  */
983 
984 bool
arc_short_comparison_p(rtx comparison,int offset)985 arc_short_comparison_p (rtx comparison, int offset)
986 {
987   gcc_assert (ARC_CC_NC == ARC_CC_HS);
988   gcc_assert (ARC_CC_C == ARC_CC_LO);
989   switch (get_arc_condition_code (comparison))
990     {
991     case ARC_CC_EQ: case ARC_CC_NE:
992       return offset >= -512 && offset <= 506;
993     case ARC_CC_GT: case ARC_CC_LE: case ARC_CC_GE: case ARC_CC_LT:
994     case ARC_CC_HI: case ARC_CC_LS: case ARC_CC_LO: case ARC_CC_HS:
995       return offset >= -64 && offset <= 58;
996     default:
997       return false;
998     }
999 }
1000 
1001 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
1002    return the mode to be used for the comparison.  */
1003 
1004 machine_mode
arc_select_cc_mode(enum rtx_code op,rtx x,rtx y)1005 arc_select_cc_mode (enum rtx_code op, rtx x, rtx y)
1006 {
1007   machine_mode mode = GET_MODE (x);
1008   rtx x1;
1009 
1010   /* For an operation that sets the condition codes as a side-effect, the
1011      C and V flags is not set as for cmp, so we can only use comparisons where
1012      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
1013      instead.)  */
1014   /* ??? We could use "pnz" for greater than zero, however, we could then
1015      get into trouble because the comparison could not be reversed.  */
1016   if (GET_MODE_CLASS (mode) == MODE_INT
1017       && y == const0_rtx
1018       && (op == EQ || op == NE
1019 	  || ((op == LT || op == GE) && GET_MODE_SIZE (GET_MODE (x)) <= 4)))
1020     return CC_ZNmode;
1021 
1022   /* add.f for if (a+b) */
1023   if (mode == SImode
1024       && GET_CODE (y) == NEG
1025       && (op == EQ || op == NE))
1026     return CC_ZNmode;
1027 
1028   /* Check if this is a test suitable for bxor.f .  */
1029   if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y)
1030       && ((INTVAL (y) - 1) & INTVAL (y)) == 0
1031       && INTVAL (y))
1032     return CC_Zmode;
1033 
1034   /* Check if this is a test suitable for add / bmsk.f .  */
1035   if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y)
1036       && GET_CODE (x) == AND && CONST_INT_P ((x1 = XEXP (x, 1)))
1037       && ((INTVAL (x1) + 1) & INTVAL (x1)) == 0
1038       && (~INTVAL (x1) | INTVAL (y)) < 0
1039       && (~INTVAL (x1) | INTVAL (y)) > -0x800)
1040     return CC_Zmode;
1041 
1042   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
1043       && GET_CODE (x) == PLUS
1044       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
1045     return CC_Cmode;
1046 
1047   if (TARGET_ARGONAUT_SET
1048       && ((mode == SFmode && TARGET_SPFP) || (mode == DFmode && TARGET_DPFP)))
1049     switch (op)
1050       {
1051       case EQ: case NE: case UNEQ: case LTGT: case ORDERED: case UNORDERED:
1052 	return CC_FPXmode;
1053       case LT: case UNGE: case GT: case UNLE:
1054 	return CC_FP_GTmode;
1055       case LE: case UNGT: case GE: case UNLT:
1056 	return CC_FP_GEmode;
1057       default: gcc_unreachable ();
1058       }
1059   else if (TARGET_HARD_FLOAT
1060 	   && ((mode == SFmode && TARGET_FP_SP_BASE)
1061 	       || (mode == DFmode && TARGET_FP_DP_BASE)))
1062     switch (op)
1063       {
1064       case EQ:
1065       case NE:
1066       case UNORDERED:
1067       case ORDERED:
1068       case UNLT:
1069       case UNLE:
1070       case UNGT:
1071       case UNGE:
1072       case LT:
1073       case LE:
1074       case GT:
1075       case GE:
1076 	return CC_FPUmode;
1077 
1078       case LTGT:
1079       case UNEQ:
1080 	return CC_FPU_UNEQmode;
1081 
1082       default:
1083 	gcc_unreachable ();
1084       }
1085   else if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_OPTFPE)
1086     {
1087       switch (op)
1088 	{
1089 	case EQ: case NE: return CC_Zmode;
1090 	case LT: case UNGE:
1091 	case GT: case UNLE: return CC_FP_GTmode;
1092 	case LE: case UNGT:
1093 	case GE: case UNLT: return CC_FP_GEmode;
1094 	case UNEQ: case LTGT: return CC_FP_UNEQmode;
1095 	case ORDERED: case UNORDERED: return CC_FP_ORDmode;
1096 	default: gcc_unreachable ();
1097 	}
1098     }
1099   return CCmode;
1100 }
1101 
1102 /* Vectors to keep interesting information about registers where it can easily
1103    be got.  We use to use the actual mode value as the bit number, but there
1104    is (or may be) more than 32 modes now.  Instead we use two tables: one
1105    indexed by hard register number, and one indexed by mode.  */
1106 
1107 /* The purpose of arc_mode_class is to shrink the range of modes so that
1108    they all fit (as bit numbers) in a 32-bit word (again).  Each real mode is
1109    mapped into one arc_mode_class mode.  */
1110 
1111 enum arc_mode_class {
1112   C_MODE,
1113   S_MODE, D_MODE, T_MODE, O_MODE,
1114   SF_MODE, DF_MODE, TF_MODE, OF_MODE,
1115   V_MODE
1116 };
1117 
1118 /* Modes for condition codes.  */
1119 #define C_MODES (1 << (int) C_MODE)
1120 
1121 /* Modes for single-word and smaller quantities.  */
1122 #define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
1123 
1124 /* Modes for double-word and smaller quantities.  */
1125 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
1126 
1127 /* Mode for 8-byte DF values only.  */
1128 #define DF_MODES (1 << DF_MODE)
1129 
1130 /* Modes for quad-word and smaller quantities.  */
1131 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
1132 
1133 /* Modes for 128-bit vectors.  */
1134 #define V_MODES (1 << (int) V_MODE)
1135 
1136 /* Value is 1 if register/mode pair is acceptable on arc.  */
1137 
1138 unsigned int arc_hard_regno_mode_ok[] = {
1139   T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
1140   T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
1141   T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, D_MODES,
1142   D_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1143 
1144   /* ??? Leave these as S_MODES for now.  */
1145   S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1146   DF_MODES, 0, DF_MODES, 0, S_MODES, S_MODES, S_MODES, S_MODES,
1147   S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1148   S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, C_MODES, S_MODES,
1149 
1150   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1151   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1152   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1153   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1154 
1155   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1156   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1157   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1158   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1159 
1160   S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1161   S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES
1162 };
1163 
1164 unsigned int arc_mode_class [NUM_MACHINE_MODES];
1165 
1166 enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER];
1167 
1168 enum reg_class
arc_preferred_reload_class(rtx,enum reg_class cl)1169 arc_preferred_reload_class (rtx, enum reg_class cl)
1170 {
1171   if ((cl) == CHEAP_CORE_REGS  || (cl) == WRITABLE_CORE_REGS)
1172     return GENERAL_REGS;
1173   return cl;
1174 }
1175 
1176 /* Initialize the arc_mode_class array.  */
1177 
1178 static void
arc_init_reg_tables(void)1179 arc_init_reg_tables (void)
1180 {
1181   int i;
1182 
1183   for (i = 0; i < NUM_MACHINE_MODES; i++)
1184     {
1185       machine_mode m = (machine_mode) i;
1186 
1187       switch (GET_MODE_CLASS (m))
1188 	{
1189 	case MODE_INT:
1190 	case MODE_PARTIAL_INT:
1191 	case MODE_COMPLEX_INT:
1192 	  if (GET_MODE_SIZE (m) <= 4)
1193 	    arc_mode_class[i] = 1 << (int) S_MODE;
1194 	  else if (GET_MODE_SIZE (m) == 8)
1195 	    arc_mode_class[i] = 1 << (int) D_MODE;
1196 	  else if (GET_MODE_SIZE (m) == 16)
1197 	    arc_mode_class[i] = 1 << (int) T_MODE;
1198 	  else if (GET_MODE_SIZE (m) == 32)
1199 	    arc_mode_class[i] = 1 << (int) O_MODE;
1200 	  else
1201 	    arc_mode_class[i] = 0;
1202 	  break;
1203 	case MODE_FLOAT:
1204 	case MODE_COMPLEX_FLOAT:
1205 	  if (GET_MODE_SIZE (m) <= 4)
1206 	    arc_mode_class[i] = 1 << (int) SF_MODE;
1207 	  else if (GET_MODE_SIZE (m) == 8)
1208 	    arc_mode_class[i] = 1 << (int) DF_MODE;
1209 	  else if (GET_MODE_SIZE (m) == 16)
1210 	    arc_mode_class[i] = 1 << (int) TF_MODE;
1211 	  else if (GET_MODE_SIZE (m) == 32)
1212 	    arc_mode_class[i] = 1 << (int) OF_MODE;
1213 	  else
1214 	    arc_mode_class[i] = 0;
1215 	  break;
1216 	case MODE_VECTOR_INT:
1217 	  arc_mode_class [i] = (1<< (int) V_MODE);
1218 	  break;
1219 	case MODE_CC:
1220 	default:
1221 	  /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so
1222 	     we must explicitly check for them here.  */
1223 	  if (i == (int) CCmode || i == (int) CC_ZNmode || i == (int) CC_Zmode
1224 	      || i == (int) CC_Cmode
1225 	      || i == CC_FP_GTmode || i == CC_FP_GEmode || i == CC_FP_ORDmode
1226 	      || i == CC_FPUmode || i == CC_FPU_UNEQmode)
1227 	    arc_mode_class[i] = 1 << (int) C_MODE;
1228 	  else
1229 	    arc_mode_class[i] = 0;
1230 	  break;
1231 	}
1232     }
1233 }
1234 
1235 /* Core registers 56..59 are used for multiply extension options.
1236    The dsp option uses r56 and r57, these are then named acc1 and acc2.
1237    acc1 is the highpart, and acc2 the lowpart, so which register gets which
1238    number depends on endianness.
1239    The mul64 multiplier options use r57 for mlo, r58 for mmid and r59 for mhi.
1240    Because mlo / mhi form a 64 bit value, we use different gcc internal
1241    register numbers to make them form a register pair as the gcc internals
1242    know it.  mmid gets number 57, if still available, and mlo / mhi get
1243    number 58 and 59, depending on endianness.  We use DBX_REGISTER_NUMBER
1244    to map this back.  */
1245   char rname56[5] = "r56";
1246   char rname57[5] = "r57";
1247   char rname58[5] = "r58";
1248   char rname59[5] = "r59";
1249   char rname29[7] = "ilink1";
1250   char rname30[7] = "ilink2";
1251 
1252 static void
arc_conditional_register_usage(void)1253 arc_conditional_register_usage (void)
1254 {
1255   int regno;
1256   int i;
1257   int fix_start = 60, fix_end = 55;
1258 
1259   if (TARGET_V2)
1260     {
1261       /* For ARCv2 the core register set is changed.  */
1262       strcpy (rname29, "ilink");
1263       strcpy (rname30, "r30");
1264       fixed_regs[30] = call_used_regs[30] = 1;
1265    }
1266 
1267   if (TARGET_MUL64_SET)
1268     {
1269       fix_start = 57;
1270       fix_end = 59;
1271 
1272       /* We don't provide a name for mmed.  In rtl / assembly resource lists,
1273 	 you are supposed to refer to it as mlo & mhi, e.g
1274 	 (zero_extract:SI (reg:DI 58) (const_int 32) (16)) .
1275 	 In an actual asm instruction, you are of course use mmed.
1276 	 The point of avoiding having a separate register for mmed is that
1277 	 this way, we don't have to carry clobbers of that reg around in every
1278 	 isntruction that modifies mlo and/or mhi.  */
1279       strcpy (rname57, "");
1280       strcpy (rname58, TARGET_BIG_ENDIAN ? "mhi" : "mlo");
1281       strcpy (rname59, TARGET_BIG_ENDIAN ? "mlo" : "mhi");
1282     }
1283   if (TARGET_MULMAC_32BY16_SET)
1284     {
1285       fix_start = 56;
1286       fix_end = fix_end > 57 ? fix_end : 57;
1287       strcpy (rname56, TARGET_BIG_ENDIAN ? "acc1" : "acc2");
1288       strcpy (rname57, TARGET_BIG_ENDIAN ? "acc2" : "acc1");
1289     }
1290   for (regno = fix_start; regno <= fix_end; regno++)
1291     {
1292       if (!fixed_regs[regno])
1293 	warning (0, "multiply option implies r%d is fixed", regno);
1294       fixed_regs [regno] = call_used_regs[regno] = 1;
1295     }
1296   if (TARGET_Q_CLASS)
1297     {
1298       reg_alloc_order[2] = 12;
1299       reg_alloc_order[3] = 13;
1300       reg_alloc_order[4] = 14;
1301       reg_alloc_order[5] = 15;
1302       reg_alloc_order[6] = 1;
1303       reg_alloc_order[7] = 0;
1304       reg_alloc_order[8] = 4;
1305       reg_alloc_order[9] = 5;
1306       reg_alloc_order[10] = 6;
1307       reg_alloc_order[11] = 7;
1308       reg_alloc_order[12] = 8;
1309       reg_alloc_order[13] = 9;
1310       reg_alloc_order[14] = 10;
1311       reg_alloc_order[15] = 11;
1312     }
1313   if (TARGET_SIMD_SET)
1314     {
1315       int i;
1316       for (i = ARC_FIRST_SIMD_VR_REG; i <= ARC_LAST_SIMD_VR_REG; i++)
1317 	reg_alloc_order [i] = i;
1318       for (i = ARC_FIRST_SIMD_DMA_CONFIG_REG;
1319 	   i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++)
1320 	reg_alloc_order [i] = i;
1321     }
1322   /* For ARC600, lp_count may not be read in an instruction
1323      following immediately after another one setting it to a new value.
1324      There was some discussion on how to enforce scheduling constraints for
1325      processors with missing interlocks on the gcc mailing list:
1326      http://gcc.gnu.org/ml/gcc/2008-05/msg00021.html .
1327      However, we can't actually use this approach, because for ARC the
1328      delay slot scheduling pass is active, which runs after
1329      machine_dependent_reorg.  */
1330   if (TARGET_ARC600)
1331     CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT);
1332   else if (!TARGET_LP_WR_INTERLOCK)
1333     fixed_regs[LP_COUNT] = 1;
1334   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1335     if (!call_used_regs[regno])
1336       CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
1337   for (regno = 32; regno < 60; regno++)
1338     if (!fixed_regs[regno])
1339       SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], regno);
1340   if (!TARGET_ARC600_FAMILY)
1341     {
1342       for (regno = 32; regno <= 60; regno++)
1343 	CLEAR_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], regno);
1344 
1345       /* If they have used -ffixed-lp_count, make sure it takes
1346 	 effect.  */
1347       if (fixed_regs[LP_COUNT])
1348 	{
1349 	  CLEAR_HARD_REG_BIT (reg_class_contents[LPCOUNT_REG], LP_COUNT);
1350 	  CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT);
1351 	  CLEAR_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], LP_COUNT);
1352 
1353 	  /* Instead of taking out SF_MODE like below, forbid it outright.  */
1354 	  arc_hard_regno_mode_ok[60] = 0;
1355 	}
1356       else
1357 	arc_hard_regno_mode_ok[60] = 1 << (int) S_MODE;
1358     }
1359 
1360   /* ARCHS has 64-bit data-path which makes use of the even-odd paired
1361      registers.  */
1362   if (TARGET_HS)
1363     {
1364       for (regno = 1; regno < 32; regno +=2)
1365 	{
1366 	  arc_hard_regno_mode_ok[regno] = S_MODES;
1367 	}
1368     }
1369 
1370   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1371     {
1372       if (i < 29)
1373 	{
1374 	  if (TARGET_Q_CLASS && ((i <= 3) || ((i >= 12) && (i <= 15))))
1375 	    arc_regno_reg_class[i] = ARCOMPACT16_REGS;
1376 	  else
1377 	    arc_regno_reg_class[i] = GENERAL_REGS;
1378 	}
1379       else if (i < 60)
1380 	arc_regno_reg_class[i]
1381 	  = (fixed_regs[i]
1382 	     ? (TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i)
1383 		? CHEAP_CORE_REGS : ALL_CORE_REGS)
1384 	     : (((!TARGET_ARC600_FAMILY)
1385 		 && TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i))
1386 		? CHEAP_CORE_REGS : WRITABLE_CORE_REGS));
1387       else
1388 	arc_regno_reg_class[i] = NO_REGS;
1389     }
1390 
1391   /* ARCOMPACT16_REGS is empty, if TARGET_Q_CLASS has not been activated.  */
1392   if (!TARGET_Q_CLASS)
1393     {
1394       CLEAR_HARD_REG_SET(reg_class_contents [ARCOMPACT16_REGS]);
1395       CLEAR_HARD_REG_SET(reg_class_contents [AC16_BASE_REGS]);
1396     }
1397 
1398   gcc_assert (FIRST_PSEUDO_REGISTER >= 144);
1399 
1400   /* Handle Special Registers.  */
1401   arc_regno_reg_class[29] = LINK_REGS; /* ilink1 register.  */
1402   if (!TARGET_V2)
1403     arc_regno_reg_class[30] = LINK_REGS; /* ilink2 register.  */
1404   arc_regno_reg_class[31] = LINK_REGS; /* blink register.  */
1405   arc_regno_reg_class[60] = LPCOUNT_REG;
1406   arc_regno_reg_class[61] = NO_REGS;      /* CC_REG: must be NO_REGS.  */
1407   arc_regno_reg_class[62] = GENERAL_REGS;
1408 
1409   if (TARGET_DPFP)
1410     {
1411       for (i = 40; i < 44; ++i)
1412 	{
1413 	  arc_regno_reg_class[i] = DOUBLE_REGS;
1414 
1415 	  /* Unless they want us to do 'mov d1, 0x00000000' make sure
1416 	     no attempt is made to use such a register as a destination
1417 	     operand in *movdf_insn.  */
1418 	  if (!TARGET_ARGONAUT_SET)
1419 	    {
1420 	    /* Make sure no 'c', 'w', 'W', or 'Rac' constraint is
1421 	       interpreted to mean they can use D1 or D2 in their insn.  */
1422 	    CLEAR_HARD_REG_BIT(reg_class_contents[CHEAP_CORE_REGS       ], i);
1423 	    CLEAR_HARD_REG_BIT(reg_class_contents[ALL_CORE_REGS         ], i);
1424 	    CLEAR_HARD_REG_BIT(reg_class_contents[WRITABLE_CORE_REGS    ], i);
1425 	    CLEAR_HARD_REG_BIT(reg_class_contents[MPY_WRITABLE_CORE_REGS], i);
1426 	    }
1427 	}
1428     }
1429   else
1430     {
1431       /* Disable all DOUBLE_REGISTER settings,
1432 	 if not generating DPFP code.  */
1433       arc_regno_reg_class[40] = ALL_REGS;
1434       arc_regno_reg_class[41] = ALL_REGS;
1435       arc_regno_reg_class[42] = ALL_REGS;
1436       arc_regno_reg_class[43] = ALL_REGS;
1437 
1438       arc_hard_regno_mode_ok[40] = 0;
1439       arc_hard_regno_mode_ok[42] = 0;
1440 
1441       CLEAR_HARD_REG_SET(reg_class_contents [DOUBLE_REGS]);
1442     }
1443 
1444   if (TARGET_SIMD_SET)
1445     {
1446       gcc_assert (ARC_FIRST_SIMD_VR_REG == 64);
1447       gcc_assert (ARC_LAST_SIMD_VR_REG  == 127);
1448 
1449       for (i = ARC_FIRST_SIMD_VR_REG; i <= ARC_LAST_SIMD_VR_REG; i++)
1450 	arc_regno_reg_class [i] =  SIMD_VR_REGS;
1451 
1452       gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_REG == 128);
1453       gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_IN_REG == 128);
1454       gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG == 136);
1455       gcc_assert (ARC_LAST_SIMD_DMA_CONFIG_REG  == 143);
1456 
1457       for (i = ARC_FIRST_SIMD_DMA_CONFIG_REG;
1458 	   i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++)
1459 	arc_regno_reg_class [i] =  SIMD_DMA_CONFIG_REGS;
1460     }
1461 
1462   /* pc : r63 */
1463   arc_regno_reg_class[PROGRAM_COUNTER_REGNO] = GENERAL_REGS;
1464 
1465   /*ARCV2 Accumulator.  */
1466   if (TARGET_V2
1467       && (TARGET_FP_DP_FUSED || TARGET_FP_SP_FUSED))
1468   {
1469     arc_regno_reg_class[ACCL_REGNO] = WRITABLE_CORE_REGS;
1470     arc_regno_reg_class[ACCH_REGNO] = WRITABLE_CORE_REGS;
1471     SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], ACCL_REGNO);
1472     SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], ACCH_REGNO);
1473     SET_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], ACCL_REGNO);
1474     SET_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], ACCH_REGNO);
1475     arc_hard_regno_mode_ok[ACC_REG_FIRST] = D_MODES;
1476   }
1477 }
1478 
1479 /* Handle an "interrupt" attribute; arguments as in
1480    struct attribute_spec.handler.  */
1481 
1482 static tree
arc_handle_interrupt_attribute(tree *,tree name,tree args,int,bool * no_add_attrs)1483 arc_handle_interrupt_attribute (tree *, tree name, tree args, int,
1484 				bool *no_add_attrs)
1485 {
1486   gcc_assert (args);
1487 
1488   tree value = TREE_VALUE (args);
1489 
1490   if (TREE_CODE (value) != STRING_CST)
1491     {
1492       warning (OPT_Wattributes,
1493 	       "argument of %qE attribute is not a string constant",
1494 	       name);
1495       *no_add_attrs = true;
1496     }
1497   else if (strcmp (TREE_STRING_POINTER (value), "ilink1")
1498 	   && strcmp (TREE_STRING_POINTER (value), "ilink2")
1499 	   && !TARGET_V2)
1500     {
1501       warning (OPT_Wattributes,
1502 	       "argument of %qE attribute is not \"ilink1\" or \"ilink2\"",
1503 	       name);
1504       *no_add_attrs = true;
1505     }
1506   else if (TARGET_V2
1507 	   && strcmp (TREE_STRING_POINTER (value), "ilink"))
1508     {
1509       warning (OPT_Wattributes,
1510 	       "argument of %qE attribute is not \"ilink\"",
1511 	       name);
1512       *no_add_attrs = true;
1513     }
1514 
1515   return NULL_TREE;
1516 }
1517 
1518 /* Return zero if TYPE1 and TYPE are incompatible, one if they are compatible,
1519    and two if they are nearly compatible (which causes a warning to be
1520    generated).  */
1521 
1522 static int
arc_comp_type_attributes(const_tree type1,const_tree type2)1523 arc_comp_type_attributes (const_tree type1,
1524 			  const_tree type2)
1525 {
1526   int l1, l2, m1, m2, s1, s2;
1527 
1528   /* Check for mismatch of non-default calling convention.  */
1529   if (TREE_CODE (type1) != FUNCTION_TYPE)
1530     return 1;
1531 
1532   /* Check for mismatched call attributes.  */
1533   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
1534   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
1535   m1 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type1)) != NULL;
1536   m2 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type2)) != NULL;
1537   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
1538   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
1539 
1540   /* Only bother to check if an attribute is defined.  */
1541   if (l1 | l2 | m1 | m2 | s1 | s2)
1542     {
1543       /* If one type has an attribute, the other must have the same attribute.  */
1544       if ((l1 != l2) || (m1 != m2) || (s1 != s2))
1545 	return 0;
1546 
1547       /* Disallow mixed attributes.  */
1548       if (l1 + m1 + s1 > 1)
1549 	return 0;
1550     }
1551 
1552 
1553   return 1;
1554 }
1555 
1556 /* Set the default attributes for TYPE.  */
1557 
1558 void
arc_set_default_type_attributes(tree type ATTRIBUTE_UNUSED)1559 arc_set_default_type_attributes (tree type ATTRIBUTE_UNUSED)
1560 {
1561   gcc_unreachable();
1562 }
1563 
1564 /* Misc. utilities.  */
1565 
1566 /* X and Y are two things to compare using CODE.  Emit the compare insn and
1567    return the rtx for the cc reg in the proper mode.  */
1568 
1569 rtx
gen_compare_reg(rtx comparison,machine_mode omode)1570 gen_compare_reg (rtx comparison, machine_mode omode)
1571 {
1572   enum rtx_code code = GET_CODE (comparison);
1573   rtx x = XEXP (comparison, 0);
1574   rtx y = XEXP (comparison, 1);
1575   rtx tmp, cc_reg;
1576   machine_mode mode, cmode;
1577 
1578 
1579   cmode = GET_MODE (x);
1580   if (cmode == VOIDmode)
1581     cmode = GET_MODE (y);
1582   gcc_assert (cmode == SImode || cmode == SFmode || cmode == DFmode);
1583   if (cmode == SImode)
1584     {
1585       if (!register_operand (x, SImode))
1586 	{
1587 	  if (register_operand (y, SImode))
1588 	    {
1589 	      tmp = x;
1590 	      x = y;
1591 	      y = tmp;
1592 	      code = swap_condition (code);
1593 	    }
1594 	  else
1595 	    x = copy_to_mode_reg (SImode, x);
1596 	}
1597       if (GET_CODE (y) == SYMBOL_REF && flag_pic)
1598 	y = copy_to_mode_reg (SImode, y);
1599     }
1600   else
1601     {
1602       x = force_reg (cmode, x);
1603       y = force_reg (cmode, y);
1604     }
1605   mode = SELECT_CC_MODE (code, x, y);
1606 
1607   cc_reg = gen_rtx_REG (mode, CC_REG);
1608 
1609   /* ??? FIXME (x-y)==0, as done by both cmpsfpx_raw and
1610      cmpdfpx_raw, is not a correct comparison for floats:
1611         http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
1612    */
1613   if (TARGET_ARGONAUT_SET
1614       && ((cmode == SFmode && TARGET_SPFP) || (cmode == DFmode && TARGET_DPFP)))
1615     {
1616       switch (code)
1617 	{
1618 	case NE: case EQ: case LT: case UNGE: case LE: case UNGT:
1619 	case UNEQ: case LTGT: case ORDERED: case UNORDERED:
1620 	  break;
1621 	case GT: case UNLE: case GE: case UNLT:
1622 	  code = swap_condition (code);
1623 	  tmp = x;
1624 	  x = y;
1625 	  y = tmp;
1626 	  break;
1627 	default:
1628 	  gcc_unreachable ();
1629 	}
1630       if (cmode == SFmode)
1631       {
1632 	emit_insn (gen_cmpsfpx_raw (x, y));
1633       }
1634       else /* DFmode */
1635       {
1636 	/* Accepts Dx regs directly by insns.  */
1637 	emit_insn (gen_cmpdfpx_raw (x, y));
1638       }
1639 
1640       if (mode != CC_FPXmode)
1641 	emit_insn (gen_rtx_SET (cc_reg,
1642 				gen_rtx_COMPARE (mode,
1643 						 gen_rtx_REG (CC_FPXmode, 61),
1644 						 const0_rtx)));
1645     }
1646   else if (TARGET_HARD_FLOAT
1647 	   && ((cmode == SFmode && TARGET_FP_SP_BASE)
1648 	       || (cmode == DFmode && TARGET_FP_DP_BASE)))
1649     emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
1650   else if (GET_MODE_CLASS (cmode) == MODE_FLOAT && TARGET_OPTFPE)
1651     {
1652       rtx op0 = gen_rtx_REG (cmode, 0);
1653       rtx op1 = gen_rtx_REG (cmode, GET_MODE_SIZE (cmode) / UNITS_PER_WORD);
1654       bool swap = false;
1655 
1656       switch (code)
1657 	{
1658 	case NE: case EQ: case GT: case UNLE: case GE: case UNLT:
1659 	case UNEQ: case LTGT: case ORDERED: case UNORDERED:
1660 	  break;
1661 	case LT: case UNGE: case LE: case UNGT:
1662 	  code = swap_condition (code);
1663 	  swap = true;
1664 	  break;
1665 	default:
1666 	  gcc_unreachable ();
1667 	}
1668       if (currently_expanding_to_rtl)
1669 	{
1670 	  if (swap)
1671 	    {
1672 	      tmp = x;
1673 	      x = y;
1674 	      y = tmp;
1675 	    }
1676 	  emit_move_insn (op0, x);
1677 	  emit_move_insn (op1, y);
1678 	}
1679       else
1680 	{
1681 	  gcc_assert (rtx_equal_p (op0, x));
1682 	  gcc_assert (rtx_equal_p (op1, y));
1683 	  if (swap)
1684 	    {
1685 	      op0 = y;
1686 	      op1 = x;
1687 	    }
1688 	}
1689       emit_insn (gen_cmp_float (cc_reg, gen_rtx_COMPARE (mode, op0, op1)));
1690     }
1691   else
1692     emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
1693   return gen_rtx_fmt_ee (code, omode, cc_reg, const0_rtx);
1694 }
1695 
1696 /* Return true if VALUE, a const_double, will fit in a limm (4 byte number).
1697    We assume the value can be either signed or unsigned.  */
1698 
1699 bool
arc_double_limm_p(rtx value)1700 arc_double_limm_p (rtx value)
1701 {
1702   HOST_WIDE_INT low, high;
1703 
1704   gcc_assert (GET_CODE (value) == CONST_DOUBLE);
1705 
1706   if (TARGET_DPFP)
1707     return true;
1708 
1709   low = CONST_DOUBLE_LOW (value);
1710   high = CONST_DOUBLE_HIGH (value);
1711 
1712   if (low & 0x80000000)
1713     {
1714       return (((unsigned HOST_WIDE_INT) low <= 0xffffffff && high == 0)
1715 	      || (((low & - (unsigned HOST_WIDE_INT) 0x80000000)
1716 		   == - (unsigned HOST_WIDE_INT) 0x80000000)
1717 		  && high == -1));
1718     }
1719   else
1720     {
1721       return (unsigned HOST_WIDE_INT) low <= 0x7fffffff && high == 0;
1722     }
1723 }
1724 
1725 /* Do any needed setup for a variadic function.  For the ARC, we must
1726    create a register parameter block, and then copy any anonymous arguments
1727    in registers to memory.
1728 
1729    CUM has not been updated for the last named argument which has type TYPE
1730    and mode MODE, and we rely on this fact.  */
1731 
1732 static void
arc_setup_incoming_varargs(cumulative_args_t args_so_far,machine_mode mode,tree type,int * pretend_size,int no_rtl)1733 arc_setup_incoming_varargs (cumulative_args_t args_so_far,
1734 			    machine_mode mode, tree type,
1735 			    int *pretend_size, int no_rtl)
1736 {
1737   int first_anon_arg;
1738   CUMULATIVE_ARGS next_cum;
1739 
1740   /* We must treat `__builtin_va_alist' as an anonymous arg.  */
1741 
1742   next_cum = *get_cumulative_args (args_so_far);
1743   arc_function_arg_advance (pack_cumulative_args (&next_cum),
1744 			    mode, type, true);
1745   first_anon_arg = next_cum;
1746 
1747   if (FUNCTION_ARG_REGNO_P (first_anon_arg))
1748     {
1749       /* First anonymous (unnamed) argument is in a reg.  */
1750 
1751       /* Note that first_reg_offset < MAX_ARC_PARM_REGS.  */
1752       int first_reg_offset = first_anon_arg;
1753 
1754       if (!no_rtl)
1755 	{
1756 	  rtx regblock
1757 	    = gen_rtx_MEM (BLKmode, plus_constant (Pmode, arg_pointer_rtx,
1758 			   FIRST_PARM_OFFSET (0)));
1759 	  move_block_from_reg (first_reg_offset, regblock,
1760 			       MAX_ARC_PARM_REGS - first_reg_offset);
1761 	}
1762 
1763       *pretend_size
1764 	= ((MAX_ARC_PARM_REGS - first_reg_offset ) * UNITS_PER_WORD);
1765     }
1766 }
1767 
1768 /* Cost functions.  */
1769 
1770 /* Provide the costs of an addressing mode that contains ADDR.
1771    If ADDR is not a valid address, its cost is irrelevant.  */
1772 
1773 int
arc_address_cost(rtx addr,machine_mode,addr_space_t,bool speed)1774 arc_address_cost (rtx addr, machine_mode, addr_space_t, bool speed)
1775 {
1776   switch (GET_CODE (addr))
1777     {
1778     case REG :
1779       return speed || satisfies_constraint_Rcq (addr) ? 0 : 1;
1780     case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC:
1781     case PRE_MODIFY: case POST_MODIFY:
1782       return !speed;
1783 
1784     case LABEL_REF :
1785     case SYMBOL_REF :
1786     case CONST :
1787       /* Most likely needs a LIMM.  */
1788       return COSTS_N_INSNS (1);
1789 
1790     case PLUS :
1791       {
1792 	register rtx plus0 = XEXP (addr, 0);
1793 	register rtx plus1 = XEXP (addr, 1);
1794 
1795 	if (GET_CODE (plus0) != REG
1796 	    && (GET_CODE (plus0) != MULT
1797 		|| !CONST_INT_P (XEXP (plus0, 1))
1798 		|| (INTVAL (XEXP (plus0, 1)) != 2
1799 		    && INTVAL (XEXP (plus0, 1)) != 4)))
1800 	  break;
1801 
1802 	switch (GET_CODE (plus1))
1803 	  {
1804 	  case CONST_INT :
1805 	    return (!RTX_OK_FOR_OFFSET_P (SImode, plus1)
1806 		    ? COSTS_N_INSNS (1)
1807 		    : speed
1808 		    ? 0
1809 		    : (satisfies_constraint_Rcq (plus0)
1810 		       && satisfies_constraint_O (plus1))
1811 		    ? 0
1812 		    : 1);
1813 	  case REG:
1814 	    return (speed < 1 ? 0
1815 		    : (satisfies_constraint_Rcq (plus0)
1816 		       && satisfies_constraint_Rcq (plus1))
1817 		    ? 0 : 1);
1818 	  case CONST :
1819 	  case SYMBOL_REF :
1820 	  case LABEL_REF :
1821 	    return COSTS_N_INSNS (1);
1822 	  default:
1823 	    break;
1824 	  }
1825 	break;
1826       }
1827     default:
1828       break;
1829     }
1830 
1831   return 4;
1832 }
1833 
1834 /* Emit instruction X with the frame related bit set.  */
1835 
1836 static rtx
frame_insn(rtx x)1837 frame_insn (rtx x)
1838 {
1839   x = emit_insn (x);
1840   RTX_FRAME_RELATED_P (x) = 1;
1841   return x;
1842 }
1843 
1844 /* Emit a frame insn to move SRC to DST.  */
1845 
1846 static rtx
frame_move(rtx dst,rtx src)1847 frame_move (rtx dst, rtx src)
1848 {
1849   rtx tmp = gen_rtx_SET (dst, src);
1850   RTX_FRAME_RELATED_P (tmp) = 1;
1851   return frame_insn (tmp);
1852 }
1853 
1854 /* Like frame_move, but add a REG_INC note for REG if ADDR contains an
1855    auto increment address, or is zero.  */
1856 
1857 static rtx
frame_move_inc(rtx dst,rtx src,rtx reg,rtx addr)1858 frame_move_inc (rtx dst, rtx src, rtx reg, rtx addr)
1859 {
1860   rtx insn = frame_move (dst, src);
1861 
1862   if (!addr
1863       || GET_CODE (addr) == PRE_DEC || GET_CODE (addr) == POST_INC
1864       || GET_CODE (addr) == PRE_MODIFY || GET_CODE (addr) == POST_MODIFY)
1865     add_reg_note (insn, REG_INC, reg);
1866   return insn;
1867 }
1868 
1869 /* Emit a frame insn which adjusts a frame address register REG by OFFSET.  */
1870 
1871 static rtx
frame_add(rtx reg,HOST_WIDE_INT offset)1872 frame_add (rtx reg, HOST_WIDE_INT offset)
1873 {
1874   gcc_assert ((offset & 0x3) == 0);
1875   if (!offset)
1876     return NULL_RTX;
1877   return frame_move (reg, plus_constant (Pmode, reg, offset));
1878 }
1879 
1880 /* Emit a frame insn which adjusts stack pointer by OFFSET.  */
1881 
1882 static rtx
frame_stack_add(HOST_WIDE_INT offset)1883 frame_stack_add (HOST_WIDE_INT offset)
1884 {
1885   return frame_add (stack_pointer_rtx, offset);
1886 }
1887 
1888 /* Traditionally, we push saved registers first in the prologue,
1889    then we allocate the rest of the frame - and reverse in the epilogue.
1890    This has still its merits for ease of debugging, or saving code size
1891    or even execution time if the stack frame is so large that some accesses
1892    can't be encoded anymore with offsets in the instruction code when using
1893    a different scheme.
1894    Also, it would be a good starting point if we got instructions to help
1895    with register save/restore.
1896 
1897    However, often stack frames are small, and the pushing / popping has
1898    some costs:
1899    - the stack modification prevents a lot of scheduling.
1900    - frame allocation / deallocation needs extra instructions.
1901    - unless we know that we compile ARC700 user code, we need to put
1902      a memory barrier after frame allocation / before deallocation to
1903      prevent interrupts clobbering our data in the frame.
1904      In particular, we don't have any such guarantees for library functions,
1905      which tend to, on the other hand, to have small frames.
1906 
1907    Thus, for small frames, we'd like to use a different scheme:
1908    - The frame is allocated in full with the first prologue instruction,
1909      and deallocated in full with the last epilogue instruction.
1910      Thus, the instructions in-betwen can be freely scheduled.
1911    - If the function has no outgoing arguments on the stack, we can allocate
1912      one register save slot at the top of the stack.  This register can then
1913      be saved simultanously with frame allocation, and restored with
1914      frame deallocation.
1915      This register can be picked depending on scheduling considerations,
1916      although same though should go into having some set of registers
1917      to be potentially lingering after a call, and others to be available
1918      immediately - i.e. in the absence of interprocedual optimization, we
1919      can use an ABI-like convention for register allocation to reduce
1920      stalls after function return.  */
1921 /* Function prologue/epilogue handlers.  */
1922 
1923 /* ARCompact stack frames look like:
1924 
1925            Before call                     After call
1926   high  +-----------------------+       +-----------------------+
1927   mem   |  reg parm save area   |       | reg parm save area    |
1928         |  only created for     |       | only created for      |
1929         |  variable arg fns     |       | variable arg fns      |
1930     AP  +-----------------------+       +-----------------------+
1931         |  return addr register |       | return addr register  |
1932         |  (if required)        |       | (if required)         |
1933         +-----------------------+       +-----------------------+
1934         |                       |       |                       |
1935         |  reg save area        |       | reg save area         |
1936         |                       |       |                       |
1937         +-----------------------+       +-----------------------+
1938         |  frame pointer        |       | frame pointer         |
1939         |  (if required)        |       | (if required)         |
1940     FP  +-----------------------+       +-----------------------+
1941         |                       |       |                       |
1942         |  local/temp variables |       | local/temp variables  |
1943         |                       |       |                       |
1944         +-----------------------+       +-----------------------+
1945         |                       |       |                       |
1946         |  arguments on stack   |       | arguments on stack    |
1947         |                       |       |                       |
1948     SP  +-----------------------+       +-----------------------+
1949                                         | reg parm save area    |
1950                                         | only created for      |
1951                                         | variable arg fns      |
1952                                     AP  +-----------------------+
1953                                         | return addr register  |
1954                                         | (if required)         |
1955                                         +-----------------------+
1956                                         |                       |
1957                                         | reg save area         |
1958                                         |                       |
1959                                         +-----------------------+
1960                                         | frame pointer         |
1961                                         | (if required)         |
1962                                     FP  +-----------------------+
1963                                         |                       |
1964                                         | local/temp variables  |
1965                                         |                       |
1966                                         +-----------------------+
1967                                         |                       |
1968                                         | arguments on stack    |
1969   low                                   |                       |
1970   mem                               SP  +-----------------------+
1971 
1972 Notes:
1973 1) The "reg parm save area" does not exist for non variable argument fns.
1974    The "reg parm save area" can be eliminated completely if we created our
1975    own va-arc.h, but that has tradeoffs as well (so it's not done).  */
1976 
1977 /* Structure to be filled in by arc_compute_frame_size with register
1978    save masks, and offsets for the current function.  */
1979 struct GTY (()) arc_frame_info
1980 {
1981   unsigned int total_size;	/* # bytes that the entire frame takes up.  */
1982   unsigned int extra_size;	/* # bytes of extra stuff.  */
1983   unsigned int pretend_size;	/* # bytes we push and pretend caller did.  */
1984   unsigned int args_size;	/* # bytes that outgoing arguments take up.  */
1985   unsigned int reg_size;	/* # bytes needed to store regs.  */
1986   unsigned int var_size;	/* # bytes that variables take up.  */
1987   unsigned int reg_offset;	/* Offset from new sp to store regs.  */
1988   unsigned int gmask;		/* Mask of saved gp registers.  */
1989   int          initialized;	/* Nonzero if frame size already calculated.  */
1990   short millicode_start_reg;
1991   short millicode_end_reg;
1992   bool save_return_addr;
1993 };
1994 
1995 /* Defining data structures for per-function information.  */
1996 
1997 typedef struct GTY (()) machine_function
1998 {
1999   enum arc_function_type fn_type;
2000   struct arc_frame_info frame_info;
2001   /* To keep track of unalignment caused by short insns.  */
2002   int unalign;
2003   int force_short_suffix; /* Used when disgorging return delay slot insns.  */
2004   const char *size_reason;
2005   struct arc_ccfsm ccfsm_current;
2006   /* Map from uid to ccfsm state during branch shortening.  */
2007   rtx ccfsm_current_insn;
2008   char arc_reorg_started;
2009   char prescan_initialized;
2010 } machine_function;
2011 
2012 /* Type of function DECL.
2013 
2014    The result is cached.  To reset the cache at the end of a function,
2015    call with DECL = NULL_TREE.  */
2016 
2017 enum arc_function_type
arc_compute_function_type(struct function * fun)2018 arc_compute_function_type (struct function *fun)
2019 {
2020   tree decl = fun->decl;
2021   tree a;
2022   enum arc_function_type fn_type = fun->machine->fn_type;
2023 
2024   if (fn_type != ARC_FUNCTION_UNKNOWN)
2025     return fn_type;
2026 
2027   /* Assume we have a normal function (not an interrupt handler).  */
2028   fn_type = ARC_FUNCTION_NORMAL;
2029 
2030   /* Now see if this is an interrupt handler.  */
2031   for (a = DECL_ATTRIBUTES (decl);
2032        a;
2033        a = TREE_CHAIN (a))
2034     {
2035       tree name = TREE_PURPOSE (a), args = TREE_VALUE (a);
2036 
2037       if (name == get_identifier ("interrupt")
2038 	  && list_length (args) == 1
2039 	  && TREE_CODE (TREE_VALUE (args)) == STRING_CST)
2040 	{
2041 	  tree value = TREE_VALUE (args);
2042 
2043 	  if (!strcmp (TREE_STRING_POINTER (value), "ilink1")
2044 	      || !strcmp (TREE_STRING_POINTER (value), "ilink"))
2045 	    fn_type = ARC_FUNCTION_ILINK1;
2046 	  else if (!strcmp (TREE_STRING_POINTER (value), "ilink2"))
2047 	    fn_type = ARC_FUNCTION_ILINK2;
2048 	  else
2049 	    gcc_unreachable ();
2050 	  break;
2051 	}
2052     }
2053 
2054   return fun->machine->fn_type = fn_type;
2055 }
2056 
2057 #define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM))
2058 #define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM))
2059 
2060 /* Tell prologue and epilogue if register REGNO should be saved / restored.
2061    The return address and frame pointer are treated separately.
2062    Don't consider them here.
2063    Addition for pic: The gp register needs to be saved if the current
2064    function changes it to access gotoff variables.
2065    FIXME: This will not be needed if we used some arbitrary register
2066    instead of r26.
2067 */
2068 #define MUST_SAVE_REGISTER(regno, interrupt_p) \
2069 (((regno) != RETURN_ADDR_REGNUM && (regno) != FRAME_POINTER_REGNUM \
2070   && (df_regs_ever_live_p (regno) && (!call_used_regs[regno] || interrupt_p))) \
2071  || (flag_pic && crtl->uses_pic_offset_table \
2072      && regno == PIC_OFFSET_TABLE_REGNUM) )
2073 
2074 #define MUST_SAVE_RETURN_ADDR \
2075   (cfun->machine->frame_info.save_return_addr)
2076 
2077 /* Return non-zero if there are registers to be saved or loaded using
2078    millicode thunks.  We can only use consecutive sequences starting
2079    with r13, and not going beyond r25.
2080    GMASK is a bitmask of registers to save.  This function sets
2081    FRAME->millicod_start_reg .. FRAME->millicode_end_reg to the range
2082    of registers to be saved / restored with a millicode call.  */
2083 
2084 static int
arc_compute_millicode_save_restore_regs(unsigned int gmask,struct arc_frame_info * frame)2085 arc_compute_millicode_save_restore_regs (unsigned int gmask,
2086 					 struct arc_frame_info *frame)
2087 {
2088   int regno;
2089 
2090   int start_reg = 13, end_reg = 25;
2091 
2092   for (regno = start_reg; regno <= end_reg && (gmask & (1L << regno));)
2093     regno++;
2094   end_reg = regno - 1;
2095   /* There is no point in using millicode thunks if we don't save/restore
2096      at least three registers.  For non-leaf functions we also have the
2097      blink restore.  */
2098   if (regno - start_reg >= 3 - (crtl->is_leaf == 0))
2099     {
2100       frame->millicode_start_reg = 13;
2101       frame->millicode_end_reg = regno - 1;
2102       return 1;
2103     }
2104   return 0;
2105 }
2106 
2107 /* Return the bytes needed to compute the frame pointer from the current
2108    stack pointer.
2109 
2110    SIZE is the size needed for local variables.  */
2111 
2112 unsigned int
arc_compute_frame_size(int size)2113 arc_compute_frame_size (int size)	/* size = # of var. bytes allocated.  */
2114 {
2115   int regno;
2116   unsigned int total_size, var_size, args_size, pretend_size, extra_size;
2117   unsigned int reg_size, reg_offset;
2118   unsigned int gmask;
2119   enum arc_function_type fn_type;
2120   int interrupt_p;
2121   struct arc_frame_info *frame_info = &cfun->machine->frame_info;
2122 
2123   size = ARC_STACK_ALIGN (size);
2124 
2125   /* 1) Size of locals and temporaries */
2126   var_size	= size;
2127 
2128   /* 2) Size of outgoing arguments */
2129   args_size	= crtl->outgoing_args_size;
2130 
2131   /* 3) Calculate space needed for saved registers.
2132      ??? We ignore the extension registers for now.  */
2133 
2134   /* See if this is an interrupt handler.  Call used registers must be saved
2135      for them too.  */
2136 
2137   reg_size = 0;
2138   gmask = 0;
2139   fn_type = arc_compute_function_type (cfun);
2140   interrupt_p = ARC_INTERRUPT_P (fn_type);
2141 
2142   for (regno = 0; regno <= 31; regno++)
2143     {
2144       if (MUST_SAVE_REGISTER (regno, interrupt_p))
2145 	{
2146 	  reg_size += UNITS_PER_WORD;
2147 	  gmask |= 1 << regno;
2148 	}
2149     }
2150 
2151   /* 4) Space for back trace data structure.
2152 	<return addr reg size> (if required) + <fp size> (if required).  */
2153   frame_info->save_return_addr
2154     = (!crtl->is_leaf || df_regs_ever_live_p (RETURN_ADDR_REGNUM));
2155   /* Saving blink reg in case of leaf function for millicode thunk calls.  */
2156   if (optimize_size && !TARGET_NO_MILLICODE_THUNK_SET)
2157     {
2158       if (arc_compute_millicode_save_restore_regs (gmask, frame_info))
2159 	frame_info->save_return_addr = true;
2160     }
2161 
2162   extra_size = 0;
2163   if (MUST_SAVE_RETURN_ADDR)
2164     extra_size = 4;
2165   if (frame_pointer_needed)
2166     extra_size += 4;
2167 
2168   /* 5) Space for variable arguments passed in registers */
2169   pretend_size	= crtl->args.pretend_args_size;
2170 
2171   /* Ensure everything before the locals is aligned appropriately.  */
2172     {
2173        unsigned int extra_plus_reg_size;
2174        unsigned int extra_plus_reg_size_aligned;
2175 
2176        extra_plus_reg_size = extra_size + reg_size;
2177        extra_plus_reg_size_aligned = ARC_STACK_ALIGN(extra_plus_reg_size);
2178        reg_size = extra_plus_reg_size_aligned - extra_size;
2179     }
2180 
2181   /* Compute total frame size.  */
2182   total_size = var_size + args_size + extra_size + pretend_size + reg_size;
2183 
2184   total_size = ARC_STACK_ALIGN (total_size);
2185 
2186   /* Compute offset of register save area from stack pointer:
2187      Frame: pretend_size <blink> reg_size <fp> var_size args_size <--sp
2188   */
2189   reg_offset = (total_size - (pretend_size + reg_size + extra_size)
2190 		+ (frame_pointer_needed ? 4 : 0));
2191 
2192   /* Save computed information.  */
2193   frame_info->total_size   = total_size;
2194   frame_info->extra_size   = extra_size;
2195   frame_info->pretend_size = pretend_size;
2196   frame_info->var_size     = var_size;
2197   frame_info->args_size    = args_size;
2198   frame_info->reg_size     = reg_size;
2199   frame_info->reg_offset   = reg_offset;
2200   frame_info->gmask        = gmask;
2201   frame_info->initialized  = reload_completed;
2202 
2203   /* Ok, we're done.  */
2204   return total_size;
2205 }
2206 
2207 /* Common code to save/restore registers.  */
2208 /* BASE_REG is the base register to use for addressing and to adjust.
2209    GMASK is a bitmask of general purpose registers to save/restore.
2210    epilogue_p 0: prologue 1:epilogue 2:epilogue, sibling thunk
2211    If *FIRST_OFFSET is non-zero, add it first to BASE_REG - preferably
2212    using a pre-modify for the first memory access.  *FIRST_OFFSET is then
2213    zeroed.  */
2214 
2215 static void
arc_save_restore(rtx base_reg,unsigned int gmask,int epilogue_p,int * first_offset)2216 arc_save_restore (rtx base_reg,
2217 		  unsigned int gmask, int epilogue_p, int *first_offset)
2218 {
2219   unsigned int offset = 0;
2220   int regno;
2221   struct arc_frame_info *frame = &cfun->machine->frame_info;
2222   rtx sibthunk_insn = NULL_RTX;
2223 
2224   if (gmask)
2225     {
2226       /* Millicode thunks implementation:
2227 	 Generates calls to millicodes for registers starting from r13 to r25
2228 	 Present Limitations:
2229 	 - Only one range supported. The remaining regs will have the ordinary
2230 	   st and ld instructions for store and loads. Hence a gmask asking
2231 	   to store r13-14, r16-r25 will only generate calls to store and
2232 	   load r13 to r14 while store and load insns will be generated for
2233 	   r16 to r25 in the prologue and epilogue respectively.
2234 
2235 	 - Presently library only supports register ranges starting from r13.
2236       */
2237       if (epilogue_p == 2 || frame->millicode_end_reg > 14)
2238 	{
2239 	  int start_call = frame->millicode_start_reg;
2240 	  int end_call = frame->millicode_end_reg;
2241 	  int n_regs = end_call - start_call + 1;
2242 	  int i = 0, r, off = 0;
2243 	  rtx insn;
2244 	  rtx ret_addr = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
2245 
2246 	  if (*first_offset)
2247 	    {
2248 	      /* "reg_size" won't be more than 127 .  */
2249 	      gcc_assert (epilogue_p || abs (*first_offset) <= 127);
2250 	      frame_add (base_reg, *first_offset);
2251 	      *first_offset = 0;
2252 	    }
2253 	  insn = gen_rtx_PARALLEL
2254 		  (VOIDmode, rtvec_alloc ((epilogue_p == 2) + n_regs + 1));
2255 	  if (epilogue_p == 2)
2256 	    i += 2;
2257 	  else
2258 	    XVECEXP (insn, 0, n_regs) = gen_rtx_CLOBBER (VOIDmode, ret_addr);
2259 	  for (r = start_call; r <= end_call; r++, off += UNITS_PER_WORD, i++)
2260 	    {
2261 	      rtx reg = gen_rtx_REG (SImode, r);
2262 	      rtx mem
2263 		= gen_frame_mem (SImode, plus_constant (Pmode, base_reg, off));
2264 
2265 	      if (epilogue_p)
2266 		XVECEXP (insn, 0, i) = gen_rtx_SET (reg, mem);
2267 	      else
2268 		XVECEXP (insn, 0, i) = gen_rtx_SET (mem, reg);
2269 	      gmask = gmask & ~(1L << r);
2270 	    }
2271 	  if (epilogue_p == 2)
2272 	    sibthunk_insn = insn;
2273 	  else
2274 	    {
2275 	      insn = frame_insn (insn);
2276 	      if (epilogue_p)
2277 		for (r = start_call; r <= end_call; r++)
2278 		  {
2279 		    rtx reg = gen_rtx_REG (SImode, r);
2280 		    add_reg_note (insn, REG_CFA_RESTORE, reg);
2281 		  }
2282 	    }
2283 	  offset += off;
2284 	}
2285 
2286       for (regno = 0; regno <= 31; regno++)
2287 	{
2288 	  enum machine_mode mode = SImode;
2289 	  bool found = false;
2290 
2291 	  if (TARGET_LL64
2292 	      && (regno % 2 == 0)
2293 	      && ((gmask & (1L << regno)) != 0)
2294 	      && ((gmask & (1L << (regno+1))) != 0))
2295 	    {
2296 	      found = true;
2297 	      mode  = DImode;
2298 	    }
2299 	  else if ((gmask & (1L << regno)) != 0)
2300 	    {
2301 	      found = true;
2302 	      mode  = SImode;
2303 	    }
2304 
2305 	  if (found)
2306 	    {
2307 	      rtx reg = gen_rtx_REG (mode, regno);
2308 	      rtx addr, mem;
2309 	      int cfa_adjust = *first_offset;
2310 
2311 	      if (*first_offset)
2312 		{
2313 		  gcc_assert (!offset);
2314 		  addr = plus_constant (Pmode, base_reg, *first_offset);
2315 		  addr = gen_rtx_PRE_MODIFY (Pmode, base_reg, addr);
2316 		  *first_offset = 0;
2317 		}
2318 	      else
2319 		{
2320 		  gcc_assert (SMALL_INT (offset));
2321 		  addr = plus_constant (Pmode, base_reg, offset);
2322 		}
2323 	      mem = gen_frame_mem (mode, addr);
2324 	      if (epilogue_p)
2325 		{
2326 		  rtx insn =
2327 		    frame_move_inc (reg, mem, base_reg, addr);
2328 		  add_reg_note (insn, REG_CFA_RESTORE, reg);
2329 		  if (cfa_adjust)
2330 		    {
2331 		      enum reg_note note = REG_CFA_ADJUST_CFA;
2332 		      add_reg_note (insn, note,
2333 				    gen_rtx_SET (stack_pointer_rtx,
2334 						 plus_constant (Pmode,
2335 								stack_pointer_rtx,
2336 								cfa_adjust)));
2337 		    }
2338 		}
2339 	      else
2340 		frame_move_inc (mem, reg, base_reg, addr);
2341 	      offset += UNITS_PER_WORD;
2342 	      if (mode == DImode)
2343 		{
2344 		  offset += UNITS_PER_WORD;
2345 		  ++regno;
2346 		}
2347 	    } /* if */
2348 	} /* for */
2349     }/* if */
2350   if (sibthunk_insn)
2351     {
2352       int start_call = frame->millicode_start_reg;
2353       int end_call = frame->millicode_end_reg;
2354       int r;
2355 
2356       rtx r12 = gen_rtx_REG (Pmode, 12);
2357 
2358       frame_insn (gen_rtx_SET (r12, GEN_INT (offset)));
2359       XVECEXP (sibthunk_insn, 0, 0) = ret_rtx;
2360       XVECEXP (sibthunk_insn, 0, 1)
2361 	= gen_rtx_SET (stack_pointer_rtx,
2362 		       gen_rtx_PLUS (Pmode, stack_pointer_rtx, r12));
2363       sibthunk_insn = emit_jump_insn (sibthunk_insn);
2364       RTX_FRAME_RELATED_P (sibthunk_insn) = 1;
2365 
2366       /* Would be nice if we could do this earlier, when the PARALLEL
2367 	 is populated, but these need to be attached after the
2368 	 emit.  */
2369       for (r = start_call; r <= end_call; r++)
2370 	{
2371 	  rtx reg = gen_rtx_REG (SImode, r);
2372 	  add_reg_note (sibthunk_insn, REG_CFA_RESTORE, reg);
2373 	}
2374     }
2375 } /* arc_save_restore */
2376 
2377 
2378 int arc_return_address_regs[4]
2379   = {0, RETURN_ADDR_REGNUM, ILINK1_REGNUM, ILINK2_REGNUM};
2380 
2381 /* Set up the stack and frame pointer (if desired) for the function.  */
2382 
2383 void
arc_expand_prologue(void)2384 arc_expand_prologue (void)
2385 {
2386   int size = get_frame_size ();
2387   unsigned int gmask = cfun->machine->frame_info.gmask;
2388   /*  unsigned int frame_pointer_offset;*/
2389   unsigned int frame_size_to_allocate;
2390   /* (FIXME: The first store will use a PRE_MODIFY; this will usually be r13.
2391      Change the stack layout so that we rather store a high register with the
2392      PRE_MODIFY, thus enabling more short insn generation.)  */
2393   int first_offset = 0;
2394 
2395   size = ARC_STACK_ALIGN (size);
2396 
2397   /* Compute/get total frame size.  */
2398   size = (!cfun->machine->frame_info.initialized
2399 	   ? arc_compute_frame_size (size)
2400 	   : cfun->machine->frame_info.total_size);
2401 
2402   if (flag_stack_usage_info)
2403     current_function_static_stack_size = size;
2404 
2405   /* Keep track of frame size to be allocated.  */
2406   frame_size_to_allocate = size;
2407 
2408   /* These cases shouldn't happen.  Catch them now.  */
2409   gcc_assert (!(size == 0 && gmask));
2410 
2411   /* Allocate space for register arguments if this is a variadic function.  */
2412   if (cfun->machine->frame_info.pretend_size != 0)
2413     {
2414        /* Ensure pretend_size is maximum of 8 * word_size.  */
2415       gcc_assert (cfun->machine->frame_info.pretend_size <= 32);
2416 
2417       frame_stack_add (-(HOST_WIDE_INT)cfun->machine->frame_info.pretend_size);
2418       frame_size_to_allocate -= cfun->machine->frame_info.pretend_size;
2419     }
2420 
2421   /* The home-grown ABI says link register is saved first.  */
2422   if (MUST_SAVE_RETURN_ADDR)
2423     {
2424       rtx ra = gen_rtx_REG (SImode, RETURN_ADDR_REGNUM);
2425       rtx mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
2426 
2427       frame_move_inc (mem, ra, stack_pointer_rtx, 0);
2428       frame_size_to_allocate -= UNITS_PER_WORD;
2429 
2430     } /* MUST_SAVE_RETURN_ADDR */
2431 
2432   /* Save any needed call-saved regs (and call-used if this is an
2433      interrupt handler) for ARCompact ISA.  */
2434   if (cfun->machine->frame_info.reg_size)
2435     {
2436       first_offset = -cfun->machine->frame_info.reg_size;
2437       /* N.B. FRAME_POINTER_MASK and RETURN_ADDR_MASK are cleared in gmask.  */
2438       arc_save_restore (stack_pointer_rtx, gmask, 0, &first_offset);
2439       frame_size_to_allocate -= cfun->machine->frame_info.reg_size;
2440     }
2441 
2442 
2443   /* Save frame pointer if needed.  */
2444   if (frame_pointer_needed)
2445     {
2446       rtx addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2447 			       GEN_INT (-UNITS_PER_WORD + first_offset));
2448       rtx mem = gen_frame_mem (Pmode, gen_rtx_PRE_MODIFY (Pmode,
2449 							  stack_pointer_rtx,
2450 							  addr));
2451       frame_move_inc (mem, frame_pointer_rtx, stack_pointer_rtx, 0);
2452       frame_size_to_allocate -= UNITS_PER_WORD;
2453       first_offset = 0;
2454       frame_move (frame_pointer_rtx, stack_pointer_rtx);
2455     }
2456 
2457   /* ??? We don't handle the case where the saved regs are more than 252
2458      bytes away from sp.  This can be handled by decrementing sp once, saving
2459      the regs, and then decrementing it again.  The epilogue doesn't have this
2460      problem as the `ld' insn takes reg+limm values (though it would be more
2461      efficient to avoid reg+limm).  */
2462 
2463   frame_size_to_allocate -= first_offset;
2464   /* Allocate the stack frame.  */
2465   if (frame_size_to_allocate > 0)
2466     frame_stack_add ((HOST_WIDE_INT) 0 - frame_size_to_allocate);
2467 
2468   /* Setup the gp register, if needed.  */
2469   if (crtl->uses_pic_offset_table)
2470     arc_finalize_pic ();
2471 }
2472 
2473 /* Do any necessary cleanup after a function to restore stack, frame,
2474    and regs.  */
2475 
2476 void
arc_expand_epilogue(int sibcall_p)2477 arc_expand_epilogue (int sibcall_p)
2478 {
2479   int size = get_frame_size ();
2480   enum arc_function_type fn_type = arc_compute_function_type (cfun);
2481 
2482   size = ARC_STACK_ALIGN (size);
2483   size = (!cfun->machine->frame_info.initialized
2484 	   ? arc_compute_frame_size (size)
2485 	   : cfun->machine->frame_info.total_size);
2486 
2487   unsigned int pretend_size = cfun->machine->frame_info.pretend_size;
2488   unsigned int frame_size;
2489   unsigned int size_to_deallocate;
2490   int restored;
2491   int can_trust_sp_p = !cfun->calls_alloca;
2492   int first_offset = 0;
2493   int millicode_p = cfun->machine->frame_info.millicode_end_reg > 0;
2494   rtx insn;
2495 
2496   size_to_deallocate = size;
2497 
2498   frame_size = size - (pretend_size +
2499 		       cfun->machine->frame_info.reg_size +
2500 		       cfun->machine->frame_info.extra_size);
2501 
2502   /* ??? There are lots of optimizations that can be done here.
2503      EG: Use fp to restore regs if it's closer.
2504      Maybe in time we'll do them all.  For now, always restore regs from
2505      sp, but don't restore sp if we don't have to.  */
2506 
2507   if (!can_trust_sp_p)
2508     gcc_assert (frame_pointer_needed);
2509 
2510   /* Restore stack pointer to the beginning of saved register area for
2511      ARCompact ISA.  */
2512   if (frame_size)
2513     {
2514       if (frame_pointer_needed)
2515 	frame_move (stack_pointer_rtx, frame_pointer_rtx);
2516       else
2517 	first_offset = frame_size;
2518       size_to_deallocate -= frame_size;
2519     }
2520   else if (!can_trust_sp_p)
2521     frame_stack_add (-frame_size);
2522 
2523 
2524   /* Restore any saved registers.  */
2525   if (frame_pointer_needed)
2526     {
2527       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
2528 
2529       insn = frame_move_inc (frame_pointer_rtx, gen_frame_mem (Pmode, addr),
2530 			     stack_pointer_rtx, 0);
2531       add_reg_note (insn, REG_CFA_RESTORE, frame_pointer_rtx);
2532       add_reg_note (insn, REG_CFA_DEF_CFA,
2533 		    plus_constant (SImode, stack_pointer_rtx,
2534 				   4));
2535       size_to_deallocate -= UNITS_PER_WORD;
2536     }
2537 
2538   /* Load blink after the calls to thunk calls in case of optimize size.  */
2539   if (millicode_p)
2540     {
2541 	  int sibthunk_p = (!sibcall_p
2542 			    && fn_type == ARC_FUNCTION_NORMAL
2543 			    && !cfun->machine->frame_info.pretend_size);
2544 
2545 	  gcc_assert (!(cfun->machine->frame_info.gmask
2546 			& (FRAME_POINTER_MASK | RETURN_ADDR_MASK)));
2547 	  arc_save_restore (stack_pointer_rtx,
2548 			    cfun->machine->frame_info.gmask,
2549 			    1 + sibthunk_p, &first_offset);
2550 	  if (sibthunk_p)
2551 	    return;
2552     }
2553   /* If we are to restore registers, and first_offset would require
2554      a limm to be encoded in a PRE_MODIFY, yet we can add it with a
2555      fast add to the stack pointer, do this now.  */
2556   if ((!SMALL_INT (first_offset)
2557        && cfun->machine->frame_info.gmask
2558        && ((TARGET_ARC700 && !optimize_size)
2559 	    ? first_offset <= 0x800
2560 	    : satisfies_constraint_C2a (GEN_INT (first_offset))))
2561        /* Also do this if we have both gprs and return
2562 	  address to restore, and they both would need a LIMM.  */
2563        || (MUST_SAVE_RETURN_ADDR
2564 	   && !SMALL_INT ((cfun->machine->frame_info.reg_size + first_offset) >> 2)
2565 	   && cfun->machine->frame_info.gmask))
2566     {
2567       frame_stack_add (first_offset);
2568       first_offset = 0;
2569     }
2570   if (MUST_SAVE_RETURN_ADDR)
2571     {
2572       rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
2573       int ra_offs = cfun->machine->frame_info.reg_size + first_offset;
2574       rtx addr = plus_constant (Pmode, stack_pointer_rtx, ra_offs);
2575       HOST_WIDE_INT cfa_adjust = 0;
2576 
2577       /* If the load of blink would need a LIMM, but we can add
2578 	 the offset quickly to sp, do the latter.  */
2579       if (!SMALL_INT (ra_offs >> 2)
2580 	  && !cfun->machine->frame_info.gmask
2581 	  && ((TARGET_ARC700 && !optimize_size)
2582 	       ? ra_offs <= 0x800
2583 	       : satisfies_constraint_C2a (GEN_INT (ra_offs))))
2584 	{
2585 	   size_to_deallocate -= ra_offs - first_offset;
2586 	   first_offset = 0;
2587 	   frame_stack_add (ra_offs);
2588 	   ra_offs = 0;
2589 	   addr = stack_pointer_rtx;
2590 	}
2591       /* See if we can combine the load of the return address with the
2592 	 final stack adjustment.
2593 	 We need a separate load if there are still registers to
2594 	 restore.  We also want a separate load if the combined insn
2595 	 would need a limm, but a separate load doesn't.  */
2596       if (ra_offs
2597 	  && !cfun->machine->frame_info.gmask
2598 	  && (SMALL_INT (ra_offs) || !SMALL_INT (ra_offs >> 2)))
2599 	{
2600 	  addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, addr);
2601 	  cfa_adjust = ra_offs;
2602 	  first_offset = 0;
2603 	  size_to_deallocate -= cfun->machine->frame_info.reg_size;
2604 	}
2605       else if (!ra_offs && size_to_deallocate == UNITS_PER_WORD)
2606 	{
2607 	  addr = gen_rtx_POST_INC (Pmode, addr);
2608 	  cfa_adjust = GET_MODE_SIZE (Pmode);
2609 	  size_to_deallocate = 0;
2610 	}
2611 
2612       insn = frame_move_inc (ra, gen_frame_mem (Pmode, addr),
2613 			     stack_pointer_rtx, addr);
2614       if (cfa_adjust)
2615 	{
2616 	  enum reg_note note = REG_CFA_ADJUST_CFA;
2617 
2618 	  add_reg_note (insn, note,
2619 			gen_rtx_SET (stack_pointer_rtx,
2620 				     plus_constant (SImode, stack_pointer_rtx,
2621 						    cfa_adjust)));
2622 	}
2623       add_reg_note (insn, REG_CFA_RESTORE, ra);
2624     }
2625 
2626   if (!millicode_p)
2627     {
2628        if (cfun->machine->frame_info.reg_size)
2629 	 arc_save_restore (stack_pointer_rtx,
2630 	   /* The zeroing of these two bits is unnecessary, but leave this in for clarity.  */
2631 			   cfun->machine->frame_info.gmask
2632 			   & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK), 1, &first_offset);
2633     }
2634 
2635 
2636   /* The rest of this function does the following:
2637      ARCompact    : handle epilogue_delay, restore sp (phase-2), return
2638   */
2639 
2640   /* Keep track of how much of the stack pointer we've restored.
2641      It makes the following a lot more readable.  */
2642   size_to_deallocate += first_offset;
2643   restored = size - size_to_deallocate;
2644 
2645   if (size > restored)
2646     frame_stack_add (size - restored);
2647 
2648   /* Emit the return instruction.  */
2649   if (sibcall_p == FALSE)
2650     emit_jump_insn (gen_simple_return ());
2651 }
2652 
2653 /* Return the offset relative to the stack pointer where the return address
2654    is stored, or -1 if it is not stored.  */
2655 
2656 int
arc_return_slot_offset()2657 arc_return_slot_offset ()
2658 {
2659   struct arc_frame_info *afi = &cfun->machine->frame_info;
2660 
2661   return (afi->save_return_addr
2662 	  ? afi->total_size - afi->pretend_size - afi->extra_size : -1);
2663 }
2664 
2665 /* PIC */
2666 
2667 /* Emit special PIC prologues and epilogues.  */
2668 /* If the function has any GOTOFF relocations, then the GOTBASE
2669    register has to be setup in the prologue
2670    The instruction needed at the function start for setting up the
2671    GOTBASE register is
2672       add rdest, pc,
2673    ----------------------------------------------------------
2674    The rtl to be emitted for this should be:
2675      set (reg basereg)
2676          (plus (reg pc)
2677                (const (unspec (symref _DYNAMIC) 3)))
2678    ----------------------------------------------------------  */
2679 
2680 static void
arc_finalize_pic(void)2681 arc_finalize_pic (void)
2682 {
2683   rtx pat;
2684   rtx baseptr_rtx = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM);
2685 
2686   if (crtl->uses_pic_offset_table == 0)
2687     return;
2688 
2689   gcc_assert (flag_pic != 0);
2690 
2691   pat = gen_rtx_SYMBOL_REF (Pmode, "_DYNAMIC");
2692   pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pat), ARC_UNSPEC_GOT);
2693   pat = gen_rtx_CONST (Pmode, pat);
2694 
2695   pat = gen_rtx_SET (baseptr_rtx, pat);
2696 
2697   emit_insn (pat);
2698 }
2699 
2700 /* !TARGET_BARREL_SHIFTER support.  */
2701 /* Emit a shift insn to set OP0 to OP1 shifted by OP2; CODE specifies what
2702    kind of shift.  */
2703 
2704 void
emit_shift(enum rtx_code code,rtx op0,rtx op1,rtx op2)2705 emit_shift (enum rtx_code code, rtx op0, rtx op1, rtx op2)
2706 {
2707   rtx shift = gen_rtx_fmt_ee (code, SImode, op1, op2);
2708   rtx pat
2709     = ((shift4_operator (shift, SImode) ?  gen_shift_si3 : gen_shift_si3_loop)
2710 	(op0, op1, op2, shift));
2711   emit_insn (pat);
2712 }
2713 
2714 /* Output the assembler code for doing a shift.
2715    We go to a bit of trouble to generate efficient code as the ARC601 only has
2716    single bit shifts.  This is taken from the h8300 port.  We only have one
2717    mode of shifting and can't access individual bytes like the h8300 can, so
2718    this is greatly simplified (at the expense of not generating hyper-
2719    efficient code).
2720 
2721    This function is not used if the variable shift insns are present.  */
2722 
2723 /* FIXME:  This probably can be done using a define_split in arc.md.
2724    Alternately, generate rtx rather than output instructions.  */
2725 
2726 const char *
output_shift(rtx * operands)2727 output_shift (rtx *operands)
2728 {
2729   /*  static int loopend_lab;*/
2730   rtx shift = operands[3];
2731   machine_mode mode = GET_MODE (shift);
2732   enum rtx_code code = GET_CODE (shift);
2733   const char *shift_one;
2734 
2735   gcc_assert (mode == SImode);
2736 
2737   switch (code)
2738     {
2739     case ASHIFT:   shift_one = "add %0,%1,%1"; break;
2740     case ASHIFTRT: shift_one = "asr %0,%1"; break;
2741     case LSHIFTRT: shift_one = "lsr %0,%1"; break;
2742     default:       gcc_unreachable ();
2743     }
2744 
2745   if (GET_CODE (operands[2]) != CONST_INT)
2746     {
2747       output_asm_insn ("and.f lp_count,%2, 0x1f", operands);
2748       goto shiftloop;
2749     }
2750   else
2751     {
2752       int n;
2753 
2754       n = INTVAL (operands[2]);
2755 
2756       /* Only consider the lower 5 bits of the shift count.  */
2757       n = n & 0x1f;
2758 
2759       /* First see if we can do them inline.  */
2760       /* ??? We could get better scheduling & shorter code (using short insns)
2761 	 by using splitters.  Alas, that'd be even more verbose.  */
2762       if (code == ASHIFT && n <= 9 && n > 2
2763 	  && dest_reg_operand (operands[4], SImode))
2764 	{
2765 	  output_asm_insn ("mov %4,0\n\tadd3 %0,%4,%1", operands);
2766 	  for (n -=3 ; n >= 3; n -= 3)
2767 	    output_asm_insn ("add3 %0,%4,%0", operands);
2768 	  if (n == 2)
2769 	    output_asm_insn ("add2 %0,%4,%0", operands);
2770 	  else if (n)
2771 	    output_asm_insn ("add %0,%0,%0", operands);
2772 	}
2773       else if (n <= 4)
2774 	{
2775 	  while (--n >= 0)
2776 	    {
2777 	      output_asm_insn (shift_one, operands);
2778 	      operands[1] = operands[0];
2779 	    }
2780 	}
2781       /* See if we can use a rotate/and.  */
2782       else if (n == BITS_PER_WORD - 1)
2783 	{
2784 	  switch (code)
2785 	    {
2786 	    case ASHIFT :
2787 	      output_asm_insn ("and %0,%1,1\n\tror %0,%0", operands);
2788 	      break;
2789 	    case ASHIFTRT :
2790 	      /* The ARC doesn't have a rol insn.  Use something else.  */
2791 	      output_asm_insn ("add.f 0,%1,%1\n\tsbc %0,%0,%0", operands);
2792 	      break;
2793 	    case LSHIFTRT :
2794 	      /* The ARC doesn't have a rol insn.  Use something else.  */
2795 	      output_asm_insn ("add.f 0,%1,%1\n\trlc %0,0", operands);
2796 	      break;
2797 	    default:
2798 	      break;
2799 	    }
2800 	}
2801       else if (n == BITS_PER_WORD - 2 && dest_reg_operand (operands[4], SImode))
2802 	{
2803 	  switch (code)
2804 	    {
2805 	    case ASHIFT :
2806 	      output_asm_insn ("and %0,%1,3\n\tror %0,%0\n\tror %0,%0", operands);
2807 	      break;
2808 	    case ASHIFTRT :
2809 #if 1 /* Need some scheduling comparisons.  */
2810 	      output_asm_insn ("add.f %4,%1,%1\n\tsbc %0,%0,%0\n\t"
2811 			       "add.f 0,%4,%4\n\trlc %0,%0", operands);
2812 #else
2813 	      output_asm_insn ("add.f %4,%1,%1\n\tbxor %0,%4,31\n\t"
2814 			       "sbc.f %0,%0,%4\n\trlc %0,%0", operands);
2815 #endif
2816 	      break;
2817 	    case LSHIFTRT :
2818 #if 1
2819 	      output_asm_insn ("add.f %4,%1,%1\n\trlc %0,0\n\t"
2820 			       "add.f 0,%4,%4\n\trlc %0,%0", operands);
2821 #else
2822 	      output_asm_insn ("add.f %0,%1,%1\n\trlc.f %0,0\n\t"
2823 			       "and %0,%0,1\n\trlc %0,%0", operands);
2824 #endif
2825 	      break;
2826 	    default:
2827 	      break;
2828 	    }
2829 	}
2830       else if (n == BITS_PER_WORD - 3 && code == ASHIFT)
2831 	output_asm_insn ("and %0,%1,7\n\tror %0,%0\n\tror %0,%0\n\tror %0,%0",
2832 			 operands);
2833       /* Must loop.  */
2834       else
2835 	{
2836 	  operands[2] = GEN_INT (n);
2837 	  output_asm_insn ("mov.f lp_count, %2", operands);
2838 
2839 	shiftloop:
2840 	    {
2841 	      output_asm_insn ("lpnz\t2f", operands);
2842 	      output_asm_insn (shift_one, operands);
2843 	      output_asm_insn ("nop", operands);
2844 	      fprintf (asm_out_file, "2:\t%s end single insn loop\n",
2845 		       ASM_COMMENT_START);
2846 	    }
2847 	}
2848     }
2849 
2850   return "";
2851 }
2852 
2853 /* Nested function support.  */
2854 
2855 /* Directly store VALUE into memory object BLOCK at OFFSET.  */
2856 
2857 static void
emit_store_direct(rtx block,int offset,int value)2858 emit_store_direct (rtx block, int offset, int value)
2859 {
2860   emit_insn (gen_store_direct (adjust_address (block, SImode, offset),
2861 			       force_reg (SImode,
2862 					  gen_int_mode (value, SImode))));
2863 }
2864 
2865 /* Emit RTL insns to initialize the variable parts of a trampoline.
2866    FNADDR is an RTX for the address of the function's pure code.
2867    CXT is an RTX for the static chain value for the function.  */
2868 /* With potentially multiple shared objects loaded, and multiple stacks
2869    present for multiple thereds where trampolines might reside, a simple
2870    range check will likely not suffice for the profiler to tell if a callee
2871    is a trampoline.  We a speedier check by making the trampoline start at
2872    an address that is not 4-byte aligned.
2873    A trampoline looks like this:
2874 
2875    nop_s	     0x78e0
2876 entry:
2877    ld_s r12,[pcl,12] 0xd403
2878    ld   r11,[pcl,12] 0x170c 700b
2879    j_s [r12]         0x7c00
2880    nop_s	     0x78e0
2881 
2882    The fastest trampoline to execute for trampolines within +-8KB of CTX
2883    would be:
2884    add2 r11,pcl,s12
2885    j [limm]           0x20200f80 limm
2886    and that would also be faster to write to the stack by computing the offset
2887    from CTX to TRAMP at compile time.  However, it would really be better to
2888    get rid of the high cost of cache invalidation when generating trampolines,
2889    which requires that the code part of trampolines stays constant, and
2890    additionally either
2891    - making sure that no executable code but trampolines is on the stack,
2892      no icache entries linger for the area of the stack from when before the
2893      stack was allocated, and allocating trampolines in trampoline-only
2894      cache lines
2895   or
2896    - allocate trampolines fram a special pool of pre-allocated trampolines.  */
2897 
2898 static void
arc_initialize_trampoline(rtx tramp,tree fndecl,rtx cxt)2899 arc_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt)
2900 {
2901   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
2902 
2903   emit_store_direct (tramp, 0, TARGET_BIG_ENDIAN ? 0x78e0d403 : 0xd40378e0);
2904   emit_store_direct (tramp, 4, TARGET_BIG_ENDIAN ? 0x170c700b : 0x700b170c);
2905   emit_store_direct (tramp, 8, TARGET_BIG_ENDIAN ? 0x7c0078e0 : 0x78e07c00);
2906   emit_move_insn (adjust_address (tramp, SImode, 12), fnaddr);
2907   emit_move_insn (adjust_address (tramp, SImode, 16), cxt);
2908   emit_insn (gen_flush_icache (adjust_address (tramp, SImode, 0)));
2909 }
2910 
2911 /* Allow the profiler to easily distinguish trampolines from normal
2912   functions.  */
2913 
2914 static rtx
arc_trampoline_adjust_address(rtx addr)2915 arc_trampoline_adjust_address (rtx addr)
2916 {
2917   return plus_constant (Pmode, addr, 2);
2918 }
2919 
2920 /* This is set briefly to 1 when we output a ".as" address modifer, and then
2921    reset when we output the scaled address.  */
2922 static int output_scaled = 0;
2923 
2924 /* Print operand X (an rtx) in assembler syntax to file FILE.
2925    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
2926    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
2927 /* In final.c:output_asm_insn:
2928     'l' : label
2929     'a' : address
2930     'c' : constant address if CONSTANT_ADDRESS_P
2931     'n' : negative
2932    Here:
2933     'Z': log2(x+1)-1
2934     'z': log2
2935     'M': log2(~x)
2936     '#': condbranch delay slot suffix
2937     '*': jump delay slot suffix
2938     '?' : nonjump-insn suffix for conditional execution or short instruction
2939     '!' : jump / call suffix for conditional execution or short instruction
2940     '`': fold constant inside unary o-perator, re-recognize, and emit.
2941     'd'
2942     'D'
2943     'R': Second word
2944     'S'
2945     'B': Branch comparison operand - suppress sda reference
2946     'H': Most significant word
2947     'L': Least significant word
2948     'A': ASCII decimal representation of floating point value
2949     'U': Load/store update or scaling indicator
2950     'V': cache bypass indicator for volatile
2951     'P'
2952     'F'
2953     '^'
2954     'O': Operator
2955     'o': original symbol - no @ prepending.  */
2956 
2957 void
arc_print_operand(FILE * file,rtx x,int code)2958 arc_print_operand (FILE *file, rtx x, int code)
2959 {
2960   switch (code)
2961     {
2962     case 'Z':
2963       if (GET_CODE (x) == CONST_INT)
2964 	fprintf (file, "%d",exact_log2(INTVAL (x) + 1) - 1 );
2965       else
2966 	output_operand_lossage ("invalid operand to %%Z code");
2967 
2968       return;
2969 
2970     case 'z':
2971       if (GET_CODE (x) == CONST_INT)
2972 	fprintf (file, "%d",exact_log2(INTVAL (x)) );
2973       else
2974 	output_operand_lossage ("invalid operand to %%z code");
2975 
2976       return;
2977 
2978     case 'M':
2979       if (GET_CODE (x) == CONST_INT)
2980 	fprintf (file, "%d",exact_log2(~INTVAL (x)) );
2981       else
2982 	output_operand_lossage ("invalid operand to %%M code");
2983 
2984       return;
2985 
2986     case '#' :
2987       /* Conditional branches depending on condition codes.
2988 	 Note that this is only for branches that were known to depend on
2989 	 condition codes before delay slot scheduling;
2990 	 out-of-range brcc / bbit expansions should use '*'.
2991 	 This distinction is important because of the different
2992 	 allowable delay slot insns and the output of the delay suffix
2993 	 for TARGET_AT_DBR_COND_EXEC.  */
2994     case '*' :
2995       /* Unconditional branches / branches not depending on condition codes.
2996 	 This could also be a CALL_INSN.
2997 	 Output the appropriate delay slot suffix.  */
2998       if (final_sequence && final_sequence->len () != 1)
2999 	{
3000 	  rtx_insn *jump = final_sequence->insn (0);
3001 	  rtx_insn *delay = final_sequence->insn (1);
3002 
3003 	  /* For TARGET_PAD_RETURN we might have grabbed the delay insn.  */
3004 	  if (delay->deleted ())
3005 	    return;
3006 	  if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump))
3007 	    fputs (INSN_FROM_TARGET_P (delay) ? ".d"
3008 		   : TARGET_AT_DBR_CONDEXEC && code == '#' ? ".d"
3009 		   : get_attr_type (jump) == TYPE_RETURN && code == '#' ? ""
3010 		   : ".nd",
3011 		   file);
3012 	  else
3013 	    fputs (".d", file);
3014 	}
3015       return;
3016     case '?' : /* with leading "." */
3017     case '!' : /* without leading "." */
3018       /* This insn can be conditionally executed.  See if the ccfsm machinery
3019 	 says it should be conditionalized.
3020 	 If it shouldn't, we'll check the compact attribute if this insn
3021 	 has a short variant, which may be used depending on code size and
3022 	 alignment considerations.  */
3023       if (current_insn_predicate)
3024 	arc_ccfsm_current.cc
3025 	  = get_arc_condition_code (current_insn_predicate);
3026       if (ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current))
3027 	{
3028 	  /* Is this insn in a delay slot sequence?  */
3029 	  if (!final_sequence || XVECLEN (final_sequence, 0) < 2
3030 	      || current_insn_predicate
3031 	      || CALL_P (final_sequence->insn (0))
3032 	      || simplejump_p (final_sequence->insn (0)))
3033 	    {
3034 	      /* This insn isn't in a delay slot sequence, or conditionalized
3035 		 independently of its position in a delay slot.  */
3036 	      fprintf (file, "%s%s",
3037 		       code == '?' ? "." : "",
3038 		       arc_condition_codes[arc_ccfsm_current.cc]);
3039 	      /* If this is a jump, there are still short variants.  However,
3040 		 only beq_s / bne_s have the same offset range as b_s,
3041 		 and the only short conditional returns are jeq_s and jne_s.  */
3042 	      if (code == '!'
3043 		  && (arc_ccfsm_current.cc == ARC_CC_EQ
3044 		      || arc_ccfsm_current.cc == ARC_CC_NE
3045 		      || 0 /* FIXME: check if branch in 7 bit range.  */))
3046 		output_short_suffix (file);
3047 	    }
3048 	  else if (code == '!') /* Jump with delay slot.  */
3049 	    fputs (arc_condition_codes[arc_ccfsm_current.cc], file);
3050 	  else /* An Instruction in a delay slot of a jump or call.  */
3051 	    {
3052 	      rtx jump = XVECEXP (final_sequence, 0, 0);
3053 	      rtx insn = XVECEXP (final_sequence, 0, 1);
3054 
3055 	      /* If the insn is annulled and is from the target path, we need
3056 		 to inverse the condition test.  */
3057 	      if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump))
3058 		{
3059 		  if (INSN_FROM_TARGET_P (insn))
3060 		    fprintf (file, "%s%s",
3061 			     code == '?' ? "." : "",
3062 			     arc_condition_codes[ARC_INVERSE_CONDITION_CODE (arc_ccfsm_current.cc)]);
3063 		  else
3064 		    fprintf (file, "%s%s",
3065 			     code == '?' ? "." : "",
3066 			     arc_condition_codes[arc_ccfsm_current.cc]);
3067 		  if (arc_ccfsm_current.state == 5)
3068 		    arc_ccfsm_current.state = 0;
3069 		}
3070 	      else
3071 		/* This insn is executed for either path, so don't
3072 		   conditionalize it at all.  */
3073 		output_short_suffix (file);
3074 
3075 	    }
3076 	}
3077       else
3078 	output_short_suffix (file);
3079       return;
3080     case'`':
3081       /* FIXME: fold constant inside unary operator, re-recognize, and emit.  */
3082       gcc_unreachable ();
3083     case 'd' :
3084       fputs (arc_condition_codes[get_arc_condition_code (x)], file);
3085       return;
3086     case 'D' :
3087       fputs (arc_condition_codes[ARC_INVERSE_CONDITION_CODE
3088 				 (get_arc_condition_code (x))],
3089 	     file);
3090       return;
3091     case 'R' :
3092       /* Write second word of DImode or DFmode reference,
3093 	 register or memory.  */
3094       if (GET_CODE (x) == REG)
3095 	fputs (reg_names[REGNO (x)+1], file);
3096       else if (GET_CODE (x) == MEM)
3097 	{
3098 	  fputc ('[', file);
3099 
3100 	  /* Handle possible auto-increment.  For PRE_INC / PRE_DEC /
3101 	    PRE_MODIFY, we will have handled the first word already;
3102 	    For POST_INC / POST_DEC / POST_MODIFY, the access to the
3103 	    first word will be done later.  In either case, the access
3104 	    to the first word will do the modify, and we only have
3105 	    to add an offset of four here.  */
3106 	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
3107 	      || GET_CODE (XEXP (x, 0)) == PRE_DEC
3108 	      || GET_CODE (XEXP (x, 0)) == PRE_MODIFY
3109 	      || GET_CODE (XEXP (x, 0)) == POST_INC
3110 	      || GET_CODE (XEXP (x, 0)) == POST_DEC
3111 	      || GET_CODE (XEXP (x, 0)) == POST_MODIFY)
3112 	    output_address (VOIDmode,
3113 			    plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 4));
3114 	  else if (output_scaled)
3115 	    {
3116 	      rtx addr = XEXP (x, 0);
3117 	      int size = GET_MODE_SIZE (GET_MODE (x));
3118 
3119 	      output_address (VOIDmode,
3120 			      plus_constant (Pmode, XEXP (addr, 0),
3121 					     ((INTVAL (XEXP (addr, 1)) + 4)
3122 					      >> (size == 2 ? 1 : 2))));
3123 	      output_scaled = 0;
3124 	    }
3125 	  else
3126 	    output_address (VOIDmode,
3127 			    plus_constant (Pmode, XEXP (x, 0), 4));
3128 	  fputc (']', file);
3129 	}
3130       else
3131 	output_operand_lossage ("invalid operand to %%R code");
3132       return;
3133     case 'S' :
3134 	/* FIXME: remove %S option.  */
3135 	break;
3136     case 'B' /* Branch or other LIMM ref - must not use sda references.  */ :
3137       if (CONSTANT_P (x))
3138 	{
3139 	  output_addr_const (file, x);
3140 	  return;
3141 	}
3142       break;
3143     case 'H' :
3144     case 'L' :
3145       if (GET_CODE (x) == REG)
3146 	{
3147 	  /* L = least significant word, H = most significant word.  */
3148 	  if ((WORDS_BIG_ENDIAN != 0) ^ (code == 'L'))
3149 	    fputs (reg_names[REGNO (x)], file);
3150 	  else
3151 	    fputs (reg_names[REGNO (x)+1], file);
3152 	}
3153       else if (GET_CODE (x) == CONST_INT
3154 	       || GET_CODE (x) == CONST_DOUBLE)
3155 	{
3156 	  rtx first, second;
3157 
3158 	  split_double (x, &first, &second);
3159 
3160 	  if((WORDS_BIG_ENDIAN) == 0)
3161 	      fprintf (file, "0x%08" PRIx64,
3162 		       code == 'L' ? INTVAL (first) : INTVAL (second));
3163 	  else
3164 	      fprintf (file, "0x%08" PRIx64,
3165 		       code == 'L' ? INTVAL (second) : INTVAL (first));
3166 
3167 
3168 	  }
3169       else
3170 	output_operand_lossage ("invalid operand to %%H/%%L code");
3171       return;
3172     case 'A' :
3173       {
3174 	char str[30];
3175 
3176 	gcc_assert (GET_CODE (x) == CONST_DOUBLE
3177 		    && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT);
3178 
3179 	real_to_decimal (str, CONST_DOUBLE_REAL_VALUE (x), sizeof (str), 0, 1);
3180 	fprintf (file, "%s", str);
3181 	return;
3182       }
3183     case 'U' :
3184       /* Output a load/store with update indicator if appropriate.  */
3185       if (GET_CODE (x) == MEM)
3186 	{
3187 	  rtx addr = XEXP (x, 0);
3188 	  switch (GET_CODE (addr))
3189 	    {
3190 	    case PRE_INC: case PRE_DEC: case PRE_MODIFY:
3191 	      fputs (".a", file); break;
3192 	    case POST_INC: case POST_DEC: case POST_MODIFY:
3193 	      fputs (".ab", file); break;
3194 	    case PLUS:
3195 	      /* Are we using a scaled index?  */
3196 	      if (GET_CODE (XEXP (addr, 0)) == MULT)
3197 		fputs (".as", file);
3198 	      /* Can we use a scaled offset?  */
3199 	      else if (CONST_INT_P (XEXP (addr, 1))
3200 		       && GET_MODE_SIZE (GET_MODE (x)) > 1
3201 		       && (!(INTVAL (XEXP (addr, 1))
3202 			     & (GET_MODE_SIZE (GET_MODE (x)) - 1) & 3))
3203 		       /* Does it make a difference?  */
3204 		       && !SMALL_INT_RANGE(INTVAL (XEXP (addr, 1)),
3205 					   GET_MODE_SIZE (GET_MODE (x)) - 2, 0))
3206 		{
3207 		  fputs (".as", file);
3208 		  output_scaled = 1;
3209 		}
3210 	      break;
3211 	    case REG:
3212 	      break;
3213 	    default:
3214 	      gcc_assert (CONSTANT_P (addr)); break;
3215 	    }
3216 	}
3217       else
3218 	output_operand_lossage ("invalid operand to %%U code");
3219       return;
3220     case 'V' :
3221       /* Output cache bypass indicator for a load/store insn.  Volatile memory
3222 	 refs are defined to use the cache bypass mechanism.  */
3223       if (GET_CODE (x) == MEM)
3224 	{
3225 	  if (MEM_VOLATILE_P (x) && !TARGET_VOLATILE_CACHE_SET )
3226 	    fputs (".di", file);
3227 	}
3228       else
3229 	output_operand_lossage ("invalid operand to %%V code");
3230       return;
3231       /* plt code.  */
3232     case 'P':
3233     case 0 :
3234       /* Do nothing special.  */
3235       break;
3236     case 'F':
3237       fputs (reg_names[REGNO (x)]+1, file);
3238       return;
3239     case '^':
3240 	/* This punctuation character is needed because label references are
3241 	printed in the output template using %l. This is a front end
3242 	character, and when we want to emit a '@' before it, we have to use
3243 	this '^'.  */
3244 
3245 	fputc('@',file);
3246 	return;
3247     case 'O':
3248       /* Output an operator.  */
3249       switch (GET_CODE (x))
3250 	{
3251 	case PLUS:	fputs ("add", file); return;
3252 	case SS_PLUS:	fputs ("adds", file); return;
3253 	case AND:	fputs ("and", file); return;
3254 	case IOR:	fputs ("or", file); return;
3255 	case XOR:	fputs ("xor", file); return;
3256 	case MINUS:	fputs ("sub", file); return;
3257 	case SS_MINUS:	fputs ("subs", file); return;
3258 	case ASHIFT:	fputs ("asl", file); return;
3259 	case ASHIFTRT:	fputs ("asr", file); return;
3260 	case LSHIFTRT:	fputs ("lsr", file); return;
3261 	case ROTATERT:	fputs ("ror", file); return;
3262 	case MULT:	fputs ("mpy", file); return;
3263 	case ABS:	fputs ("abs", file); return; /* Unconditional.  */
3264 	case NEG:	fputs ("neg", file); return;
3265 	case SS_NEG:	fputs ("negs", file); return;
3266 	case NOT:	fputs ("not", file); return; /* Unconditional.  */
3267 	case ZERO_EXTEND:
3268 	  fputs ("ext", file); /* bmsk allows predication.  */
3269 	  goto size_suffix;
3270 	case SIGN_EXTEND: /* Unconditional.  */
3271 	  fputs ("sex", file);
3272 	size_suffix:
3273 	  switch (GET_MODE (XEXP (x, 0)))
3274 	    {
3275 	    case QImode: fputs ("b", file); return;
3276 	    case HImode: fputs ("w", file); return;
3277 	    default: break;
3278 	    }
3279 	  break;
3280 	case SS_TRUNCATE:
3281 	  if (GET_MODE (x) != HImode)
3282 	    break;
3283 	  fputs ("sat16", file);
3284 	default: break;
3285 	}
3286       output_operand_lossage ("invalid operand to %%O code"); return;
3287     case 'o':
3288       if (GET_CODE (x) == SYMBOL_REF)
3289 	{
3290 	  assemble_name (file, XSTR (x, 0));
3291 	  return;
3292 	}
3293       break;
3294     case '&':
3295       if (TARGET_ANNOTATE_ALIGN && cfun->machine->size_reason)
3296 	fprintf (file, "; unalign: %d", cfun->machine->unalign);
3297       return;
3298     case '+':
3299       if (TARGET_V2)
3300 	fputs ("m", file);
3301       else
3302 	fputs ("h", file);
3303       return;
3304     case '_':
3305       if (TARGET_V2)
3306 	fputs ("h", file);
3307       else
3308 	fputs ("w", file);
3309       return;
3310     default :
3311       /* Unknown flag.  */
3312       output_operand_lossage ("invalid operand output code");
3313     }
3314 
3315   switch (GET_CODE (x))
3316     {
3317     case REG :
3318       fputs (reg_names[REGNO (x)], file);
3319       break;
3320     case MEM :
3321       {
3322 	rtx addr = XEXP (x, 0);
3323 	int size = GET_MODE_SIZE (GET_MODE (x));
3324 
3325 	fputc ('[', file);
3326 
3327 	switch (GET_CODE (addr))
3328 	  {
3329 	  case PRE_INC: case POST_INC:
3330 	    output_address (VOIDmode,
3331 			    plus_constant (Pmode, XEXP (addr, 0), size)); break;
3332 	  case PRE_DEC: case POST_DEC:
3333 	    output_address (VOIDmode,
3334 			    plus_constant (Pmode, XEXP (addr, 0), -size));
3335 	    break;
3336 	  case PRE_MODIFY: case POST_MODIFY:
3337 	    output_address (VOIDmode, XEXP (addr, 1)); break;
3338 	  case PLUS:
3339 	    if (output_scaled)
3340 	      {
3341 		output_address (VOIDmode,
3342 				plus_constant (Pmode, XEXP (addr, 0),
3343 					       (INTVAL (XEXP (addr, 1))
3344 						>> (size == 2 ? 1 : 2))));
3345 		output_scaled = 0;
3346 	      }
3347 	    else
3348 	      output_address (VOIDmode, addr);
3349 	    break;
3350 	  default:
3351 	    if (flag_pic && CONSTANT_ADDRESS_P (addr))
3352 	      arc_output_pic_addr_const (file, addr, code);
3353 	    else
3354 	      output_address (VOIDmode, addr);
3355 	    break;
3356 	  }
3357 	fputc (']', file);
3358 	break;
3359       }
3360     case CONST_DOUBLE :
3361       /* We handle SFmode constants here as output_addr_const doesn't.  */
3362       if (GET_MODE (x) == SFmode)
3363 	{
3364 	  long l;
3365 
3366 	  REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
3367 	  fprintf (file, "0x%08lx", l);
3368 	  break;
3369 	}
3370       /* Fall through.  Let output_addr_const deal with it.  */
3371     default :
3372       if (flag_pic)
3373 	arc_output_pic_addr_const (file, x, code);
3374       else
3375 	{
3376 	  /* FIXME: Dirty way to handle @var@sda+const. Shd be handled
3377 	     with asm_output_symbol_ref */
3378 	  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3379 	    {
3380 	      x = XEXP (x, 0);
3381 	      output_addr_const (file, XEXP (x, 0));
3382 	      if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF && SYMBOL_REF_SMALL_P (XEXP (x, 0)))
3383 		fprintf (file, "@sda");
3384 
3385 	      if (GET_CODE (XEXP (x, 1)) != CONST_INT
3386 		  || INTVAL (XEXP (x, 1)) >= 0)
3387 		fprintf (file, "+");
3388 	      output_addr_const (file, XEXP (x, 1));
3389 	    }
3390 	  else
3391 	    output_addr_const (file, x);
3392 	}
3393       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_SMALL_P (x))
3394 	fprintf (file, "@sda");
3395       break;
3396     }
3397 }
3398 
3399 /* Print a memory address as an operand to reference that memory location.  */
3400 
3401 void
arc_print_operand_address(FILE * file,rtx addr)3402 arc_print_operand_address (FILE *file , rtx addr)
3403 {
3404   register rtx base, index = 0;
3405 
3406   switch (GET_CODE (addr))
3407     {
3408     case REG :
3409       fputs (reg_names[REGNO (addr)], file);
3410       break;
3411     case SYMBOL_REF :
3412       output_addr_const (file, addr);
3413       if (SYMBOL_REF_SMALL_P (addr))
3414 	fprintf (file, "@sda");
3415       break;
3416     case PLUS :
3417       if (GET_CODE (XEXP (addr, 0)) == MULT)
3418 	index = XEXP (XEXP (addr, 0), 0), base = XEXP (addr, 1);
3419       else if (CONST_INT_P (XEXP (addr, 0)))
3420 	index = XEXP (addr, 0), base = XEXP (addr, 1);
3421       else
3422 	base = XEXP (addr, 0), index = XEXP (addr, 1);
3423 
3424       gcc_assert (OBJECT_P (base));
3425       arc_print_operand_address (file, base);
3426       if (CONSTANT_P (base) && CONST_INT_P (index))
3427 	fputc ('+', file);
3428       else
3429 	fputc (',', file);
3430       gcc_assert (OBJECT_P (index));
3431       arc_print_operand_address (file, index);
3432       break;
3433     case CONST:
3434       {
3435 	rtx c = XEXP (addr, 0);
3436 
3437 	gcc_assert (GET_CODE (XEXP (c, 0)) == SYMBOL_REF);
3438 	gcc_assert (GET_CODE (XEXP (c, 1)) == CONST_INT);
3439 
3440 	output_address (VOIDmode, XEXP (addr, 0));
3441 
3442 	break;
3443       }
3444     case PRE_INC :
3445     case PRE_DEC :
3446       /* We shouldn't get here as we've lost the mode of the memory object
3447 	 (which says how much to inc/dec by.  */
3448       gcc_unreachable ();
3449       break;
3450     default :
3451       if (flag_pic)
3452 	arc_output_pic_addr_const (file, addr, 0);
3453       else
3454 	output_addr_const (file, addr);
3455       break;
3456     }
3457 }
3458 
3459 /* Called via walk_stores.  DATA points to a hash table we can use to
3460    establish a unique SYMBOL_REF for each counter, which corresponds to
3461    a caller-callee pair.
3462    X is a store which we want to examine for an UNSPEC_PROF, which
3463    would be an address loaded into a register, or directly used in a MEM.
3464    If we found an UNSPEC_PROF, if we encounter a new counter the first time,
3465    write out a description and a data allocation for a 32 bit counter.
3466    Also, fill in the appropriate symbol_ref into each UNSPEC_PROF instance.  */
3467 
3468 static void
write_profile_sections(rtx dest ATTRIBUTE_UNUSED,rtx x,void * data)3469 write_profile_sections (rtx dest ATTRIBUTE_UNUSED, rtx x, void *data)
3470 {
3471   rtx *srcp, src;
3472   htab_t htab = (htab_t) data;
3473   rtx *slot;
3474 
3475   if (GET_CODE (x) != SET)
3476     return;
3477   srcp = &SET_SRC (x);
3478   if (MEM_P (*srcp))
3479     srcp = &XEXP (*srcp, 0);
3480   else if (MEM_P (SET_DEST (x)))
3481     srcp = &XEXP (SET_DEST (x), 0);
3482   src = *srcp;
3483   if (GET_CODE (src) != CONST)
3484     return;
3485   src = XEXP (src, 0);
3486   if (GET_CODE (src) != UNSPEC || XINT (src, 1) != UNSPEC_PROF)
3487     return;
3488 
3489   gcc_assert (XVECLEN (src, 0) == 3);
3490   if (!htab_elements (htab))
3491     {
3492       output_asm_insn (".section .__arc_profile_desc, \"a\"\n"
3493 		       "\t.long %0 + 1\n",
3494 		       &XVECEXP (src, 0, 0));
3495     }
3496   slot = (rtx *) htab_find_slot (htab, src, INSERT);
3497   if (*slot == HTAB_EMPTY_ENTRY)
3498     {
3499       static int count_nr;
3500       char buf[24];
3501       rtx count;
3502 
3503       *slot = src;
3504       sprintf (buf, "__prof_count%d", count_nr++);
3505       count = gen_rtx_SYMBOL_REF (Pmode, xstrdup (buf));
3506       XVECEXP (src, 0, 2) = count;
3507       output_asm_insn (".section\t.__arc_profile_desc, \"a\"\n"
3508 		       "\t.long\t%1\n"
3509 		       "\t.section\t.__arc_profile_counters, \"aw\"\n"
3510 		       "\t.type\t%o2, @object\n"
3511 		       "\t.size\t%o2, 4\n"
3512 		       "%o2:\t.zero 4",
3513 		       &XVECEXP (src, 0, 0));
3514       *srcp = count;
3515     }
3516   else
3517     *srcp = XVECEXP (*slot, 0, 2);
3518 }
3519 
3520 /* Hash function for UNSPEC_PROF htab.  Use both the caller's name and
3521    the callee's name (if known).  */
3522 
3523 static hashval_t
unspec_prof_hash(const void * x)3524 unspec_prof_hash (const void *x)
3525 {
3526   const_rtx u = (const_rtx) x;
3527   const_rtx s1 = XVECEXP (u, 0, 1);
3528 
3529   return (htab_hash_string (XSTR (XVECEXP (u, 0, 0), 0))
3530 	  ^ (s1->code == SYMBOL_REF ? htab_hash_string (XSTR (s1, 0)) : 0));
3531 }
3532 
3533 /* Equality function for UNSPEC_PROF htab.  Two pieces of UNSPEC_PROF rtl
3534    shall refer to the same counter if both caller name and callee rtl
3535    are identical.  */
3536 
3537 static int
unspec_prof_htab_eq(const void * x,const void * y)3538 unspec_prof_htab_eq (const void *x, const void *y)
3539 {
3540   const_rtx u0 = (const_rtx) x;
3541   const_rtx u1 = (const_rtx) y;
3542   const_rtx s01 = XVECEXP (u0, 0, 1);
3543   const_rtx s11 = XVECEXP (u1, 0, 1);
3544 
3545   return (!strcmp (XSTR (XVECEXP (u0, 0, 0), 0),
3546 		   XSTR (XVECEXP (u1, 0, 0), 0))
3547 	  && rtx_equal_p (s01, s11));
3548 }
3549 
3550 /* Conditional execution support.
3551 
3552    This is based on the ARM port but for now is much simpler.
3553 
3554    A finite state machine takes care of noticing whether or not instructions
3555    can be conditionally executed, and thus decrease execution time and code
3556    size by deleting branch instructions.  The fsm is controlled by
3557    arc_ccfsm_advance (called by arc_final_prescan_insn), and controls the
3558    actions of PRINT_OPERAND.  The patterns in the .md file for the branch
3559    insns also have a hand in this.  */
3560 /* The way we leave dealing with non-anulled or annull-false delay slot
3561    insns to the consumer is awkward.  */
3562 
3563 /* The state of the fsm controlling condition codes are:
3564    0: normal, do nothing special
3565    1: don't output this insn
3566    2: don't output this insn
3567    3: make insns conditional
3568    4: make insns conditional
3569    5: make insn conditional (only for outputting anulled delay slot insns)
3570 
3571    special value for cfun->machine->uid_ccfsm_state:
3572    6: return with but one insn before it since function start / call
3573 
3574    State transitions (state->state by whom, under what condition):
3575    0 -> 1 arc_ccfsm_advance, if insn is a conditional branch skipping over
3576           some instructions.
3577    0 -> 2 arc_ccfsm_advance, if insn is a conditional branch followed
3578           by zero or more non-jump insns and an unconditional branch with
3579 	  the same target label as the condbranch.
3580    1 -> 3 branch patterns, after having not output the conditional branch
3581    2 -> 4 branch patterns, after having not output the conditional branch
3582    0 -> 5 branch patterns, for anulled delay slot insn.
3583    3 -> 0 ASM_OUTPUT_INTERNAL_LABEL, if the `target' label is reached
3584           (the target label has CODE_LABEL_NUMBER equal to
3585 	  arc_ccfsm_target_label).
3586    4 -> 0 arc_ccfsm_advance, if `target' unconditional branch is reached
3587    3 -> 1 arc_ccfsm_advance, finding an 'else' jump skipping over some insns.
3588    5 -> 0 when outputting the delay slot insn
3589 
3590    If the jump clobbers the conditions then we use states 2 and 4.
3591 
3592    A similar thing can be done with conditional return insns.
3593 
3594    We also handle separating branches from sets of the condition code.
3595    This is done here because knowledge of the ccfsm state is required,
3596    we may not be outputting the branch.  */
3597 
3598 /* arc_final_prescan_insn calls arc_ccfsm_advance to adjust arc_ccfsm_current,
3599    before letting final output INSN.  */
3600 
3601 static void
arc_ccfsm_advance(rtx_insn * insn,struct arc_ccfsm * state)3602 arc_ccfsm_advance (rtx_insn *insn, struct arc_ccfsm *state)
3603 {
3604   /* BODY will hold the body of INSN.  */
3605   register rtx body;
3606 
3607   /* This will be 1 if trying to repeat the trick (ie: do the `else' part of
3608      an if/then/else), and things need to be reversed.  */
3609   int reverse = 0;
3610 
3611   /* If we start with a return insn, we only succeed if we find another one.  */
3612   int seeking_return = 0;
3613 
3614   /* START_INSN will hold the insn from where we start looking.  This is the
3615      first insn after the following code_label if REVERSE is true.  */
3616   rtx_insn *start_insn = insn;
3617 
3618   /* Type of the jump_insn. Brcc insns don't affect ccfsm changes,
3619      since they don't rely on a cmp preceding the.  */
3620   enum attr_type jump_insn_type;
3621 
3622   /* Allow -mdebug-ccfsm to turn this off so we can see how well it does.
3623      We can't do this in macro FINAL_PRESCAN_INSN because its called from
3624      final_scan_insn which has `optimize' as a local.  */
3625   if (optimize < 2 || TARGET_NO_COND_EXEC)
3626     return;
3627 
3628   /* Ignore notes and labels.  */
3629   if (!INSN_P (insn))
3630     return;
3631   body = PATTERN (insn);
3632   /* If in state 4, check if the target branch is reached, in order to
3633      change back to state 0.  */
3634   if (state->state == 4)
3635     {
3636       if (insn == state->target_insn)
3637 	{
3638 	  state->target_insn = NULL;
3639 	  state->state = 0;
3640 	}
3641       return;
3642     }
3643 
3644   /* If in state 3, it is possible to repeat the trick, if this insn is an
3645      unconditional branch to a label, and immediately following this branch
3646      is the previous target label which is only used once, and the label this
3647      branch jumps to is not too far off.  Or in other words "we've done the
3648      `then' part, see if we can do the `else' part."  */
3649   if (state->state == 3)
3650     {
3651       if (simplejump_p (insn))
3652 	{
3653 	  start_insn = next_nonnote_insn (start_insn);
3654 	  if (GET_CODE (start_insn) == BARRIER)
3655 	    {
3656 	      /* ??? Isn't this always a barrier?  */
3657 	      start_insn = next_nonnote_insn (start_insn);
3658 	    }
3659 	  if (GET_CODE (start_insn) == CODE_LABEL
3660 	      && CODE_LABEL_NUMBER (start_insn) == state->target_label
3661 	      && LABEL_NUSES (start_insn) == 1)
3662 	    reverse = TRUE;
3663 	  else
3664 	    return;
3665 	}
3666       else if (GET_CODE (body) == SIMPLE_RETURN)
3667 	{
3668 	  start_insn = next_nonnote_insn (start_insn);
3669 	  if (GET_CODE (start_insn) == BARRIER)
3670 	    start_insn = next_nonnote_insn (start_insn);
3671 	  if (GET_CODE (start_insn) == CODE_LABEL
3672 	      && CODE_LABEL_NUMBER (start_insn) == state->target_label
3673 	      && LABEL_NUSES (start_insn) == 1)
3674 	    {
3675 	      reverse = TRUE;
3676 	      seeking_return = 1;
3677 	    }
3678 	  else
3679 	    return;
3680 	}
3681       else
3682 	return;
3683     }
3684 
3685   if (GET_CODE (insn) != JUMP_INSN
3686       || GET_CODE (PATTERN (insn)) == ADDR_VEC
3687       || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
3688     return;
3689 
3690  /* We can't predicate BRCC or loop ends.
3691     Also, when generating PIC code, and considering a medium range call,
3692     we can't predicate the call.  */
3693   jump_insn_type = get_attr_type (insn);
3694   if (jump_insn_type == TYPE_BRCC
3695       || jump_insn_type == TYPE_BRCC_NO_DELAY_SLOT
3696       || jump_insn_type == TYPE_LOOP_END
3697       || (jump_insn_type == TYPE_CALL && !get_attr_predicable (insn)))
3698     return;
3699 
3700   /* This jump might be paralleled with a clobber of the condition codes,
3701      the jump should always come first.  */
3702   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
3703     body = XVECEXP (body, 0, 0);
3704 
3705   if (reverse
3706       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
3707 	  && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
3708     {
3709       int insns_skipped = 0, fail = FALSE, succeed = FALSE;
3710       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
3711       int then_not_else = TRUE;
3712       /* Nonzero if next insn must be the target label.  */
3713       int next_must_be_target_label_p;
3714       rtx_insn *this_insn = start_insn;
3715       rtx label = 0;
3716 
3717       /* Register the insn jumped to.  */
3718       if (reverse)
3719 	{
3720 	  if (!seeking_return)
3721 	    label = XEXP (SET_SRC (body), 0);
3722 	}
3723       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
3724 	label = XEXP (XEXP (SET_SRC (body), 1), 0);
3725       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
3726 	{
3727 	  label = XEXP (XEXP (SET_SRC (body), 2), 0);
3728 	  then_not_else = FALSE;
3729 	}
3730       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == SIMPLE_RETURN)
3731 	seeking_return = 1;
3732       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == SIMPLE_RETURN)
3733 	{
3734 	  seeking_return = 1;
3735 	  then_not_else = FALSE;
3736 	}
3737       else
3738 	gcc_unreachable ();
3739 
3740       /* If this is a non-annulled branch with a delay slot, there is
3741 	 no need to conditionalize the delay slot.  */
3742       if (NEXT_INSN (PREV_INSN (insn)) != insn
3743 	  && state->state == 0 && !INSN_ANNULLED_BRANCH_P (insn))
3744 	{
3745 	  this_insn = NEXT_INSN (this_insn);
3746 	  gcc_assert (NEXT_INSN (NEXT_INSN (PREV_INSN (start_insn)))
3747 		      == NEXT_INSN (this_insn));
3748 	}
3749       /* See how many insns this branch skips, and what kind of insns.  If all
3750 	 insns are okay, and the label or unconditional branch to the same
3751 	 label is not too far away, succeed.  */
3752       for (insns_skipped = 0, next_must_be_target_label_p = FALSE;
3753 	   !fail && !succeed && insns_skipped < MAX_INSNS_SKIPPED;
3754 	   insns_skipped++)
3755 	{
3756 	  rtx scanbody;
3757 
3758 	  this_insn = next_nonnote_insn (this_insn);
3759 	  if (!this_insn)
3760 	    break;
3761 
3762 	  if (next_must_be_target_label_p)
3763 	    {
3764 	      if (GET_CODE (this_insn) == BARRIER)
3765 		continue;
3766 	      if (GET_CODE (this_insn) == CODE_LABEL
3767 		  && this_insn == label)
3768 		{
3769 		  state->state = 1;
3770 		  succeed = TRUE;
3771 		}
3772 	      else
3773 		fail = TRUE;
3774 	      break;
3775 	    }
3776 
3777 	  scanbody = PATTERN (this_insn);
3778 
3779 	  switch (GET_CODE (this_insn))
3780 	    {
3781 	    case CODE_LABEL:
3782 	      /* Succeed if it is the target label, otherwise fail since
3783 		 control falls in from somewhere else.  */
3784 	      if (this_insn == label)
3785 		{
3786 		  state->state = 1;
3787 		  succeed = TRUE;
3788 		}
3789 	      else
3790 		fail = TRUE;
3791 	      break;
3792 
3793 	    case BARRIER:
3794 	      /* Succeed if the following insn is the target label.
3795 		 Otherwise fail.
3796 		 If return insns are used then the last insn in a function
3797 		 will be a barrier.  */
3798 	      next_must_be_target_label_p = TRUE;
3799 	      break;
3800 
3801 	    case CALL_INSN:
3802 	      /* Can handle a call insn if there are no insns after it.
3803 		 IE: The next "insn" is the target label.  We don't have to
3804 		 worry about delay slots as such insns are SEQUENCE's inside
3805 		 INSN's.  ??? It is possible to handle such insns though.  */
3806 	      if (get_attr_cond (this_insn) == COND_CANUSE)
3807 		next_must_be_target_label_p = TRUE;
3808 	      else
3809 		fail = TRUE;
3810 	      break;
3811 
3812 	    case JUMP_INSN:
3813 	      /* If this is an unconditional branch to the same label, succeed.
3814 		 If it is to another label, do nothing.  If it is conditional,
3815 		 fail.  */
3816 	      /* ??? Probably, the test for the SET and the PC are
3817 		 unnecessary.  */
3818 
3819 	      if (GET_CODE (scanbody) == SET
3820 		  && GET_CODE (SET_DEST (scanbody)) == PC)
3821 		{
3822 		  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
3823 		      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
3824 		    {
3825 		      state->state = 2;
3826 		      succeed = TRUE;
3827 		    }
3828 		  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
3829 		    fail = TRUE;
3830 		  else if (get_attr_cond (this_insn) != COND_CANUSE)
3831 		    fail = TRUE;
3832 		}
3833 	      else if (GET_CODE (scanbody) == SIMPLE_RETURN
3834 		       && seeking_return)
3835 		{
3836 		  state->state = 2;
3837 		  succeed = TRUE;
3838 		}
3839 	      else if (GET_CODE (scanbody) == PARALLEL)
3840 		{
3841 		  if (get_attr_cond (this_insn) != COND_CANUSE)
3842 		    fail = TRUE;
3843 		}
3844 	      break;
3845 
3846 	    case INSN:
3847 	      /* We can only do this with insns that can use the condition
3848 		 codes (and don't set them).  */
3849 	      if (GET_CODE (scanbody) == SET
3850 		  || GET_CODE (scanbody) == PARALLEL)
3851 		{
3852 		  if (get_attr_cond (this_insn) != COND_CANUSE)
3853 		    fail = TRUE;
3854 		}
3855 	      /* We can't handle other insns like sequences.  */
3856 	      else
3857 		fail = TRUE;
3858 	      break;
3859 
3860 	    default:
3861 	      break;
3862 	    }
3863 	}
3864 
3865       if (succeed)
3866 	{
3867 	  if ((!seeking_return) && (state->state == 1 || reverse))
3868 	    state->target_label = CODE_LABEL_NUMBER (label);
3869 	  else if (seeking_return || state->state == 2)
3870 	    {
3871 	      while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
3872 		{
3873 		  this_insn = next_nonnote_insn (this_insn);
3874 
3875 		  gcc_assert (!this_insn ||
3876 			      (GET_CODE (this_insn) != BARRIER
3877 			       && GET_CODE (this_insn) != CODE_LABEL));
3878 		}
3879 	      if (!this_insn)
3880 		{
3881 		  /* Oh dear! we ran off the end, give up.  */
3882 		  extract_insn_cached (insn);
3883 		  state->state = 0;
3884 		  state->target_insn = NULL;
3885 		  return;
3886 		}
3887 	      state->target_insn = this_insn;
3888 	    }
3889 	  else
3890 	    gcc_unreachable ();
3891 
3892 	  /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
3893 	     what it was.  */
3894 	  if (!reverse)
3895 	    {
3896 	      state->cond = XEXP (SET_SRC (body), 0);
3897 	      state->cc = get_arc_condition_code (XEXP (SET_SRC (body), 0));
3898 	    }
3899 
3900 	  if (reverse || then_not_else)
3901 	    state->cc = ARC_INVERSE_CONDITION_CODE (state->cc);
3902 	}
3903 
3904       /* Restore recog_operand.  Getting the attributes of other insns can
3905 	 destroy this array, but final.c assumes that it remains intact
3906 	 across this call; since the insn has been recognized already we
3907 	 call insn_extract direct.  */
3908       extract_insn_cached (insn);
3909     }
3910 }
3911 
3912 /* Record that we are currently outputting label NUM with prefix PREFIX.
3913    It it's the label we're looking for, reset the ccfsm machinery.
3914 
3915    Called from ASM_OUTPUT_INTERNAL_LABEL.  */
3916 
3917 static void
arc_ccfsm_at_label(const char * prefix,int num,struct arc_ccfsm * state)3918 arc_ccfsm_at_label (const char *prefix, int num, struct arc_ccfsm *state)
3919 {
3920   if (state->state == 3 && state->target_label == num
3921       && !strcmp (prefix, "L"))
3922     {
3923       state->state = 0;
3924       state->target_insn = NULL;
3925     }
3926 }
3927 
3928 /* We are considering a conditional branch with the condition COND.
3929    Check if we want to conditionalize a delay slot insn, and if so modify
3930    the ccfsm state accordingly.
3931    REVERSE says branch will branch when the condition is false.  */
3932 void
arc_ccfsm_record_condition(rtx cond,bool reverse,rtx_insn * jump,struct arc_ccfsm * state)3933 arc_ccfsm_record_condition (rtx cond, bool reverse, rtx_insn *jump,
3934 			    struct arc_ccfsm *state)
3935 {
3936   rtx_insn *seq_insn = NEXT_INSN (PREV_INSN (jump));
3937   if (!state)
3938     state = &arc_ccfsm_current;
3939 
3940   gcc_assert (state->state == 0);
3941   if (seq_insn != jump)
3942     {
3943       rtx insn = XVECEXP (PATTERN (seq_insn), 0, 1);
3944 
3945       if (!as_a<rtx_insn *> (insn)->deleted ()
3946 	  && INSN_ANNULLED_BRANCH_P (jump)
3947 	  && (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (insn)))
3948 	{
3949 	  state->cond = cond;
3950 	  state->cc = get_arc_condition_code (cond);
3951 	  if (!reverse)
3952 	    arc_ccfsm_current.cc
3953 	      = ARC_INVERSE_CONDITION_CODE (state->cc);
3954 	  rtx pat = PATTERN (insn);
3955 	  if (GET_CODE (pat) == COND_EXEC)
3956 	    gcc_assert ((INSN_FROM_TARGET_P (insn)
3957 			 ? ARC_INVERSE_CONDITION_CODE (state->cc) : state->cc)
3958 			== get_arc_condition_code (XEXP (pat, 0)));
3959 	  else
3960 	    state->state = 5;
3961 	}
3962     }
3963 }
3964 
3965 /* Update *STATE as we would when we emit INSN.  */
3966 
3967 static void
arc_ccfsm_post_advance(rtx_insn * insn,struct arc_ccfsm * state)3968 arc_ccfsm_post_advance (rtx_insn *insn, struct arc_ccfsm *state)
3969 {
3970   enum attr_type type;
3971 
3972   if (LABEL_P (insn))
3973     arc_ccfsm_at_label ("L", CODE_LABEL_NUMBER (insn), state);
3974   else if (JUMP_P (insn)
3975 	   && GET_CODE (PATTERN (insn)) != ADDR_VEC
3976 	   && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
3977 	   && ((type = get_attr_type (insn)) == TYPE_BRANCH
3978 	       || (type == TYPE_UNCOND_BRANCH
3979 		   /* ??? Maybe should also handle TYPE_RETURN here,
3980 		      but we don't have a testcase for that.  */
3981 		   && ARC_CCFSM_BRANCH_DELETED_P (state))))
3982     {
3983       if (ARC_CCFSM_BRANCH_DELETED_P (state))
3984 	ARC_CCFSM_RECORD_BRANCH_DELETED (state);
3985       else
3986 	{
3987 	  rtx src = SET_SRC (PATTERN (insn));
3988 	  arc_ccfsm_record_condition (XEXP (src, 0), XEXP (src, 1) == pc_rtx,
3989 				      insn, state);
3990 	}
3991     }
3992   else if (arc_ccfsm_current.state == 5)
3993     arc_ccfsm_current.state = 0;
3994 }
3995 
3996 /* Return true if the current insn, which is a conditional branch, is to be
3997    deleted.  */
3998 
3999 bool
arc_ccfsm_branch_deleted_p(void)4000 arc_ccfsm_branch_deleted_p (void)
4001 {
4002   return ARC_CCFSM_BRANCH_DELETED_P (&arc_ccfsm_current);
4003 }
4004 
4005 /* Record a branch isn't output because subsequent insns can be
4006    conditionalized.  */
4007 
4008 void
arc_ccfsm_record_branch_deleted(void)4009 arc_ccfsm_record_branch_deleted (void)
4010 {
4011   ARC_CCFSM_RECORD_BRANCH_DELETED (&arc_ccfsm_current);
4012 }
4013 
4014 /* During insn output, indicate if the current insn is predicated.  */
4015 
4016 bool
arc_ccfsm_cond_exec_p(void)4017 arc_ccfsm_cond_exec_p (void)
4018 {
4019   return (cfun->machine->prescan_initialized
4020 	  && ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current));
4021 }
4022 
4023 /* Like next_active_insn, but return NULL if we find an ADDR_(DIFF_)VEC,
4024    and look inside SEQUENCEs.  */
4025 
4026 static rtx_insn *
arc_next_active_insn(rtx_insn * insn,struct arc_ccfsm * statep)4027 arc_next_active_insn (rtx_insn *insn, struct arc_ccfsm *statep)
4028 {
4029   rtx pat;
4030 
4031   do
4032     {
4033       if (statep)
4034 	arc_ccfsm_post_advance (insn, statep);
4035       insn = NEXT_INSN (insn);
4036       if (!insn || BARRIER_P (insn))
4037 	return NULL;
4038       if (statep)
4039 	arc_ccfsm_advance (insn, statep);
4040     }
4041   while (NOTE_P (insn)
4042 	 || (cfun->machine->arc_reorg_started
4043 	     && LABEL_P (insn) && !label_to_alignment (insn))
4044 	 || (NONJUMP_INSN_P (insn)
4045 	     && (GET_CODE (PATTERN (insn)) == USE
4046 		 || GET_CODE (PATTERN (insn)) == CLOBBER)));
4047   if (!LABEL_P (insn))
4048     {
4049       gcc_assert (INSN_P (insn));
4050       pat = PATTERN (insn);
4051       if (GET_CODE (pat) == ADDR_VEC || GET_CODE (pat) == ADDR_DIFF_VEC)
4052 	return NULL;
4053       if (GET_CODE (pat) == SEQUENCE)
4054 	return as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
4055     }
4056   return insn;
4057 }
4058 
4059 /* When deciding if an insn should be output short, we want to know something
4060    about the following insns:
4061    - if another insn follows which we know we can output as a short insn
4062      before an alignment-sensitive point, we can output this insn short:
4063      the decision about the eventual alignment can be postponed.
4064    - if a to-be-aligned label comes next, we should output this insn such
4065      as to get / preserve 4-byte alignment.
4066    - if a likely branch without delay slot insn, or a call with an immediately
4067      following short insn comes next, we should out output this insn such as to
4068      get / preserve 2 mod 4 unalignment.
4069    - do the same for a not completely unlikely branch with a short insn
4070      following before any other branch / label.
4071    - in order to decide if we are actually looking at a branch, we need to
4072      call arc_ccfsm_advance.
4073    - in order to decide if we are looking at a short insn, we should know
4074      if it is conditionalized.  To a first order of approximation this is
4075      the case if the state from arc_ccfsm_advance from before this insn
4076      indicates the insn is conditionalized.  However, a further refinement
4077      could be to not conditionalize an insn if the destination register(s)
4078      is/are dead in the non-executed case.  */
4079 /* Return non-zero if INSN should be output as a short insn.  UNALIGN is
4080    zero if the current insn is aligned to a 4-byte-boundary, two otherwise.
4081    If CHECK_ATTR is greater than 0, check the iscompact attribute first.  */
4082 
4083 int
arc_verify_short(rtx_insn * insn,int,int check_attr)4084 arc_verify_short (rtx_insn *insn, int, int check_attr)
4085 {
4086   enum attr_iscompact iscompact;
4087   struct machine_function *machine;
4088 
4089   if (check_attr > 0)
4090     {
4091       iscompact = get_attr_iscompact (insn);
4092       if (iscompact == ISCOMPACT_FALSE)
4093 	return 0;
4094     }
4095   machine = cfun->machine;
4096 
4097   if (machine->force_short_suffix >= 0)
4098     return machine->force_short_suffix;
4099 
4100   return (get_attr_length (insn) & 2) != 0;
4101 }
4102 
4103 /* When outputting an instruction (alternative) that can potentially be short,
4104    output the short suffix if the insn is in fact short, and update
4105    cfun->machine->unalign accordingly.  */
4106 
4107 static void
output_short_suffix(FILE * file)4108 output_short_suffix (FILE *file)
4109 {
4110   rtx_insn *insn = current_output_insn;
4111 
4112   if (arc_verify_short (insn, cfun->machine->unalign, 1))
4113     {
4114       fprintf (file, "_s");
4115       cfun->machine->unalign ^= 2;
4116     }
4117   /* Restore recog_operand.  */
4118   extract_insn_cached (insn);
4119 }
4120 
4121 /* Implement FINAL_PRESCAN_INSN.  */
4122 
4123 void
arc_final_prescan_insn(rtx_insn * insn,rtx * opvec ATTRIBUTE_UNUSED,int noperands ATTRIBUTE_UNUSED)4124 arc_final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
4125 			int noperands ATTRIBUTE_UNUSED)
4126 {
4127   if (TARGET_DUMPISIZE)
4128     fprintf (asm_out_file, "\n; at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4129 
4130   /* Output a nop if necessary to prevent a hazard.
4131      Don't do this for delay slots: inserting a nop would
4132      alter semantics, and the only time we would find a hazard is for a
4133      call function result - and in that case, the hazard is spurious to
4134      start with.  */
4135   if (PREV_INSN (insn)
4136       && PREV_INSN (NEXT_INSN (insn)) == insn
4137       && arc_hazard (prev_real_insn (insn), insn))
4138     {
4139       current_output_insn =
4140 	emit_insn_before (gen_nop (), NEXT_INSN (PREV_INSN (insn)));
4141       final_scan_insn (current_output_insn, asm_out_file, optimize, 1, NULL);
4142       current_output_insn = insn;
4143     }
4144   /* Restore extraction data which might have been clobbered by arc_hazard.  */
4145   extract_constrain_insn_cached (insn);
4146 
4147   if (!cfun->machine->prescan_initialized)
4148     {
4149       /* Clear lingering state from branch shortening.  */
4150       memset (&arc_ccfsm_current, 0, sizeof arc_ccfsm_current);
4151       cfun->machine->prescan_initialized = 1;
4152     }
4153   arc_ccfsm_advance (insn, &arc_ccfsm_current);
4154 
4155   cfun->machine->size_reason = 0;
4156 }
4157 
4158 /* Given FROM and TO register numbers, say whether this elimination is allowed.
4159    Frame pointer elimination is automatically handled.
4160 
4161    All eliminations are permissible. If we need a frame
4162    pointer, we must eliminate ARG_POINTER_REGNUM into
4163    FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4164 
4165 static bool
arc_can_eliminate(const int from ATTRIBUTE_UNUSED,const int to)4166 arc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
4167 {
4168   return to == FRAME_POINTER_REGNUM || !arc_frame_pointer_required ();
4169 }
4170 
4171 /* Define the offset between two registers, one to be eliminated, and
4172    the other its replacement, at the start of a routine.  */
4173 
4174 int
arc_initial_elimination_offset(int from,int to)4175 arc_initial_elimination_offset (int from, int to)
4176 {
4177   if (! cfun->machine->frame_info.initialized)
4178      arc_compute_frame_size (get_frame_size ());
4179 
4180   if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
4181     {
4182       return (cfun->machine->frame_info.extra_size
4183 	      + cfun->machine->frame_info.reg_size);
4184     }
4185 
4186   if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4187     {
4188       return (cfun->machine->frame_info.total_size
4189 	      - cfun->machine->frame_info.pretend_size);
4190     }
4191 
4192   if ((from == FRAME_POINTER_REGNUM) && (to == STACK_POINTER_REGNUM))
4193     {
4194       return (cfun->machine->frame_info.total_size
4195 	      - (cfun->machine->frame_info.pretend_size
4196 	      + cfun->machine->frame_info.extra_size
4197 	      + cfun->machine->frame_info.reg_size));
4198     }
4199 
4200   gcc_unreachable ();
4201 }
4202 
4203 static bool
arc_frame_pointer_required(void)4204 arc_frame_pointer_required (void)
4205 {
4206  return cfun->calls_alloca;
4207 }
4208 
4209 
4210 /* Return the destination address of a branch.  */
4211 
4212 int
branch_dest(rtx branch)4213 branch_dest (rtx branch)
4214 {
4215   rtx pat = PATTERN (branch);
4216   rtx dest = (GET_CODE (pat) == PARALLEL
4217 	      ? SET_SRC (XVECEXP (pat, 0, 0)) : SET_SRC (pat));
4218   int dest_uid;
4219 
4220   if (GET_CODE (dest) == IF_THEN_ELSE)
4221     dest = XEXP (dest, XEXP (dest, 1) == pc_rtx ? 2 : 1);
4222 
4223   dest = XEXP (dest, 0);
4224   dest_uid = INSN_UID (dest);
4225 
4226   return INSN_ADDRESSES (dest_uid);
4227 }
4228 
4229 
4230 /* Implement TARGET_ENCODE_SECTION_INFO hook.  */
4231 
4232 static void
arc_encode_section_info(tree decl,rtx rtl,int first)4233 arc_encode_section_info (tree decl, rtx rtl, int first)
4234 {
4235   /* For sdata, SYMBOL_FLAG_LOCAL and SYMBOL_FLAG_FUNCTION.
4236      This clears machine specific flags, so has to come first.  */
4237   default_encode_section_info (decl, rtl, first);
4238 
4239   /* Check if it is a function, and whether it has the
4240      [long/medium/short]_call attribute specified.  */
4241   if (TREE_CODE (decl) == FUNCTION_DECL)
4242     {
4243       rtx symbol = XEXP (rtl, 0);
4244       int flags = SYMBOL_REF_FLAGS (symbol);
4245 
4246       tree attr = (TREE_TYPE (decl) != error_mark_node
4247 		   ? TYPE_ATTRIBUTES (TREE_TYPE (decl)) : NULL_TREE);
4248       tree long_call_attr = lookup_attribute ("long_call", attr);
4249       tree medium_call_attr = lookup_attribute ("medium_call", attr);
4250       tree short_call_attr = lookup_attribute ("short_call", attr);
4251 
4252       if (long_call_attr != NULL_TREE)
4253 	flags |= SYMBOL_FLAG_LONG_CALL;
4254       else if (medium_call_attr != NULL_TREE)
4255 	flags |= SYMBOL_FLAG_MEDIUM_CALL;
4256       else if (short_call_attr != NULL_TREE)
4257 	flags |= SYMBOL_FLAG_SHORT_CALL;
4258 
4259       SYMBOL_REF_FLAGS (symbol) = flags;
4260     }
4261 }
4262 
4263 /* This is how to output a definition of an internal numbered label where
4264    PREFIX is the class of label and NUM is the number within the class.  */
4265 
arc_internal_label(FILE * stream,const char * prefix,unsigned long labelno)4266 static void arc_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
4267 {
4268   if (cfun)
4269     arc_ccfsm_at_label (prefix, labelno, &arc_ccfsm_current);
4270   default_internal_label (stream, prefix, labelno);
4271 }
4272 
4273 /* Set the cpu type and print out other fancy things,
4274    at the top of the file.  */
4275 
arc_file_start(void)4276 static void arc_file_start (void)
4277 {
4278   default_file_start ();
4279   fprintf (asm_out_file, "\t.cpu %s\n", arc_cpu_string);
4280 }
4281 
4282 /* Cost functions.  */
4283 
4284 /* Compute a (partial) cost for rtx X.  Return true if the complete
4285    cost has been computed, and false if subexpressions should be
4286    scanned.  In either case, *TOTAL contains the cost result.  */
4287 
4288 static bool
arc_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed)4289 arc_rtx_costs (rtx x, machine_mode mode, int outer_code,
4290 	       int opno ATTRIBUTE_UNUSED, int *total, bool speed)
4291 {
4292   int code = GET_CODE (x);
4293 
4294   switch (code)
4295     {
4296       /* Small integers are as cheap as registers.  */
4297     case CONST_INT:
4298       {
4299 	bool nolimm = false; /* Can we do without long immediate?  */
4300 	bool fast = false; /* Is the result available immediately?  */
4301 	bool condexec = false; /* Does this allow conditiobnal execution?  */
4302 	bool compact = false; /* Is a 16 bit opcode available?  */
4303 	/* CONDEXEC also implies that we can have an unconditional
4304 	   3-address operation.  */
4305 
4306 	nolimm = compact = condexec = false;
4307 	if (UNSIGNED_INT6 (INTVAL (x)))
4308 	  nolimm = condexec = compact = true;
4309 	else
4310 	  {
4311 	    if (SMALL_INT (INTVAL (x)))
4312 	      nolimm = fast = true;
4313 	    switch (outer_code)
4314 	      {
4315 	      case AND: /* bclr, bmsk, ext[bw] */
4316 		if (satisfies_constraint_Ccp (x) /* bclr */
4317 		    || satisfies_constraint_C1p (x) /* bmsk */)
4318 		  nolimm = fast = condexec = compact = true;
4319 		break;
4320 	      case IOR: /* bset */
4321 		if (satisfies_constraint_C0p (x)) /* bset */
4322 		  nolimm = fast = condexec = compact = true;
4323 		break;
4324 	      case XOR:
4325 		if (satisfies_constraint_C0p (x)) /* bxor */
4326 		  nolimm = fast = condexec = true;
4327 		break;
4328 	      case SET:
4329 		if (satisfies_constraint_Crr (x)) /* ror b,u6 */
4330 		  nolimm = true;
4331 	      default:
4332 		break;
4333 	      }
4334 	  }
4335 	/* FIXME: Add target options to attach a small cost if
4336 	   condexec / compact is not true.  */
4337 	if (nolimm)
4338 	  {
4339 	    *total = 0;
4340 	    return true;
4341 	  }
4342       }
4343       /* FALLTHRU */
4344 
4345       /*  4 byte values can be fetched as immediate constants -
4346 	  let's give that the cost of an extra insn.  */
4347     case CONST:
4348     case LABEL_REF:
4349     case SYMBOL_REF:
4350       *total = COSTS_N_INSNS (1);
4351       return true;
4352 
4353     case CONST_DOUBLE:
4354       {
4355 	rtx high, low;
4356 
4357 	if (TARGET_DPFP)
4358 	  {
4359 	    *total = COSTS_N_INSNS (1);
4360 	    return true;
4361 	  }
4362 	/* FIXME: correct the order of high,low */
4363 	split_double (x, &high, &low);
4364 	*total = COSTS_N_INSNS (!SMALL_INT (INTVAL (high))
4365 				+ !SMALL_INT (INTVAL (low)));
4366 	return true;
4367       }
4368 
4369     /* Encourage synth_mult to find a synthetic multiply when reasonable.
4370        If we need more than 12 insns to do a multiply, then go out-of-line,
4371        since the call overhead will be < 10% of the cost of the multiply.  */
4372     case ASHIFT:
4373     case ASHIFTRT:
4374     case LSHIFTRT:
4375       if (TARGET_BARREL_SHIFTER)
4376 	{
4377 	  /* If we want to shift a constant, we need a LIMM.  */
4378 	  /* ??? when the optimizers want to know if a constant should be
4379 	     hoisted, they ask for the cost of the constant.  OUTER_CODE is
4380 	     insufficient context for shifts since we don't know which operand
4381 	     we are looking at.  */
4382 	  if (CONSTANT_P (XEXP (x, 0)))
4383 	    {
4384 	      *total += (COSTS_N_INSNS (2)
4385 			 + rtx_cost (XEXP (x, 1), mode, (enum rtx_code) code,
4386 				     0, speed));
4387 	      return true;
4388 	    }
4389 	  *total = COSTS_N_INSNS (1);
4390 	}
4391       else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4392 	*total = COSTS_N_INSNS (16);
4393       else
4394 	{
4395 	  *total = COSTS_N_INSNS (INTVAL (XEXP ((x), 1)));
4396 	  /* ??? want_to_gcse_p can throw negative shift counts at us,
4397 	     and then panics when it gets a negative cost as result.
4398 	     Seen for gcc.c-torture/compile/20020710-1.c -Os .  */
4399 	  if (*total < 0)
4400 	    *total = 0;
4401 	}
4402       return false;
4403 
4404     case DIV:
4405     case UDIV:
4406       if (speed)
4407 	*total = COSTS_N_INSNS(30);
4408       else
4409 	*total = COSTS_N_INSNS(1);
4410 	return false;
4411 
4412     case MULT:
4413       if ((TARGET_DPFP && GET_MODE (x) == DFmode))
4414 	*total = COSTS_N_INSNS (1);
4415       else if (speed)
4416 	*total= arc_multcost;
4417       /* We do not want synth_mult sequences when optimizing
4418 	 for size.  */
4419       else if (TARGET_MUL64_SET || TARGET_ARC700_MPY)
4420 	*total = COSTS_N_INSNS (1);
4421       else
4422 	*total = COSTS_N_INSNS (2);
4423       return false;
4424     case PLUS:
4425       if (GET_CODE (XEXP (x, 0)) == MULT
4426 	  && _2_4_8_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
4427 	{
4428 	  *total += (rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed)
4429 		     + rtx_cost (XEXP (XEXP (x, 0), 0), mode, PLUS, 1, speed));
4430 	  return true;
4431 	}
4432       return false;
4433     case MINUS:
4434       if (GET_CODE (XEXP (x, 1)) == MULT
4435 	  && _2_4_8_operand (XEXP (XEXP (x, 1), 1), VOIDmode))
4436 	{
4437 	  *total += (rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed)
4438 		     + rtx_cost (XEXP (XEXP (x, 1), 0), mode, PLUS, 1, speed));
4439 	  return true;
4440 	}
4441       return false;
4442     case COMPARE:
4443       {
4444 	rtx op0 = XEXP (x, 0);
4445 	rtx op1 = XEXP (x, 1);
4446 
4447 	if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
4448 	    && XEXP (op0, 1) == const1_rtx)
4449 	  {
4450 	    /* btst / bbit0 / bbit1:
4451 	       Small integers and registers are free; everything else can
4452 	       be put in a register.  */
4453 	    mode = GET_MODE (XEXP (op0, 0));
4454 	    *total = (rtx_cost (XEXP (op0, 0), mode, SET, 1, speed)
4455 		      + rtx_cost (XEXP (op0, 2), mode, SET, 1, speed));
4456 	    return true;
4457 	  }
4458 	if (GET_CODE (op0) == AND && op1 == const0_rtx
4459 	    && satisfies_constraint_C1p (XEXP (op0, 1)))
4460 	  {
4461 	    /* bmsk.f */
4462 	    *total = rtx_cost (XEXP (op0, 0), VOIDmode, SET, 1, speed);
4463 	    return true;
4464 	  }
4465 	/* add.f  */
4466 	if (GET_CODE (op1) == NEG)
4467 	  {
4468 	    /* op0 might be constant, the inside of op1 is rather
4469 	       unlikely to be so.  So swapping the operands might lower
4470 	       the cost.  */
4471 	    mode = GET_MODE (op0);
4472 	    *total = (rtx_cost (op0, mode, PLUS, 1, speed)
4473 		      + rtx_cost (XEXP (op1, 0), mode, PLUS, 0, speed));
4474 	  }
4475 	return false;
4476       }
4477     case EQ: case NE:
4478       if (outer_code == IF_THEN_ELSE
4479 	  && GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
4480 	  && XEXP (x, 1) == const0_rtx
4481 	  && XEXP (XEXP (x, 0), 1) == const1_rtx)
4482 	{
4483 	  /* btst / bbit0 / bbit1:
4484 	     Small integers and registers are free; everything else can
4485 	     be put in a register.  */
4486 	  rtx op0 = XEXP (x, 0);
4487 
4488 	  mode = GET_MODE (XEXP (op0, 0));
4489 	  *total = (rtx_cost (XEXP (op0, 0), mode, SET, 1, speed)
4490 		    + rtx_cost (XEXP (op0, 2), mode, SET, 1, speed));
4491 	  return true;
4492 	}
4493       /* Fall through.  */
4494     /* scc_insn expands into two insns.  */
4495     case GTU: case GEU: case LEU:
4496       if (mode == SImode)
4497 	*total += COSTS_N_INSNS (1);
4498       return false;
4499     case LTU: /* might use adc.  */
4500       if (mode == SImode)
4501 	*total += COSTS_N_INSNS (1) - 1;
4502       return false;
4503     default:
4504       return false;
4505     }
4506 }
4507 
4508 /* Return true if ADDR is an address that needs to be expressed as an
4509    explicit sum of pcl + offset.  */
4510 
4511 bool
arc_legitimate_pc_offset_p(rtx addr)4512 arc_legitimate_pc_offset_p (rtx addr)
4513 {
4514   if (GET_CODE (addr) != CONST)
4515     return false;
4516   addr = XEXP (addr, 0);
4517   if (GET_CODE (addr) == PLUS)
4518     {
4519       if (GET_CODE (XEXP (addr, 1)) != CONST_INT)
4520 	return false;
4521       addr = XEXP (addr, 0);
4522     }
4523   return (GET_CODE (addr) == UNSPEC
4524 	  && XVECLEN (addr, 0) == 1
4525 	  && XINT (addr, 1) == ARC_UNSPEC_GOT
4526 	  && GET_CODE (XVECEXP (addr, 0, 0)) == SYMBOL_REF);
4527 }
4528 
4529 /* Return true if ADDR is a valid pic address.
4530    A valid pic address on arc should look like
4531    const (unspec (SYMBOL_REF/LABEL) (ARC_UNSPEC_GOTOFF/ARC_UNSPEC_GOT))  */
4532 
4533 bool
arc_legitimate_pic_addr_p(rtx addr)4534 arc_legitimate_pic_addr_p (rtx addr)
4535 {
4536   if (GET_CODE (addr) == LABEL_REF)
4537     return true;
4538   if (GET_CODE (addr) != CONST)
4539     return false;
4540 
4541   addr = XEXP (addr, 0);
4542 
4543 
4544   if (GET_CODE (addr) == PLUS)
4545     {
4546       if (GET_CODE (XEXP (addr, 1)) != CONST_INT)
4547 	return false;
4548       addr = XEXP (addr, 0);
4549     }
4550 
4551   if (GET_CODE (addr) != UNSPEC
4552       || XVECLEN (addr, 0) != 1)
4553     return false;
4554 
4555   /* Must be @GOT or @GOTOFF.  */
4556   if (XINT (addr, 1) != ARC_UNSPEC_GOT
4557       && XINT (addr, 1) != ARC_UNSPEC_GOTOFF)
4558     return false;
4559 
4560   if (GET_CODE (XVECEXP (addr, 0, 0)) != SYMBOL_REF
4561       && GET_CODE (XVECEXP (addr, 0, 0)) != LABEL_REF)
4562     return false;
4563 
4564   return true;
4565 }
4566 
4567 
4568 
4569 /* Return true if OP contains a symbol reference.  */
4570 
4571 static bool
symbolic_reference_mentioned_p(rtx op)4572 symbolic_reference_mentioned_p (rtx op)
4573 {
4574   register const char *fmt;
4575   register int i;
4576 
4577   if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4578     return true;
4579 
4580   fmt = GET_RTX_FORMAT (GET_CODE (op));
4581   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4582     {
4583       if (fmt[i] == 'E')
4584 	{
4585 	  register int j;
4586 
4587 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4588 	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4589 	      return true;
4590 	}
4591 
4592       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4593 	return true;
4594     }
4595 
4596   return false;
4597 }
4598 
4599 /* Return true if OP contains a SYMBOL_REF that is not wrapped in an unspec.
4600    If SKIP_LOCAL is true, skip symbols that bind locally.
4601    This is used further down in this file, and, without SKIP_LOCAL,
4602    in the addsi3 / subsi3 expanders when generating PIC code.  */
4603 
4604 bool
arc_raw_symbolic_reference_mentioned_p(rtx op,bool skip_local)4605 arc_raw_symbolic_reference_mentioned_p (rtx op, bool skip_local)
4606 {
4607   register const char *fmt;
4608   register int i;
4609 
4610   if (GET_CODE(op) == UNSPEC)
4611     return false;
4612 
4613   if (GET_CODE (op) == SYMBOL_REF)
4614     {
4615       tree decl = SYMBOL_REF_DECL (op);
4616       return !skip_local || !decl || !default_binds_local_p (decl);
4617     }
4618 
4619   fmt = GET_RTX_FORMAT (GET_CODE (op));
4620   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4621     {
4622       if (fmt[i] == 'E')
4623 	{
4624 	  register int j;
4625 
4626 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4627 	    if (arc_raw_symbolic_reference_mentioned_p (XVECEXP (op, i, j),
4628 							skip_local))
4629 	      return true;
4630 	}
4631 
4632       else if (fmt[i] == 'e'
4633 	       && arc_raw_symbolic_reference_mentioned_p (XEXP (op, i),
4634 							  skip_local))
4635 	return true;
4636     }
4637 
4638   return false;
4639 }
4640 
4641 /* Legitimize a pic address reference in ORIG.
4642    The return value is the legitimated address.
4643    If OLDX is non-zero, it is the target to assign the address to first.  */
4644 
4645 rtx
arc_legitimize_pic_address(rtx orig,rtx oldx)4646 arc_legitimize_pic_address (rtx orig, rtx oldx)
4647 {
4648   rtx addr = orig;
4649   rtx pat = orig;
4650   rtx base;
4651 
4652   if (oldx == orig)
4653     oldx = NULL;
4654 
4655   if (GET_CODE (addr) == LABEL_REF)
4656     ; /* Do nothing.  */
4657   else if (GET_CODE (addr) == SYMBOL_REF
4658 	   && (CONSTANT_POOL_ADDRESS_P (addr)
4659 	       || SYMBOL_REF_LOCAL_P (addr)))
4660     {
4661       /* This symbol may be referenced via a displacement from the PIC
4662 	 base address (@GOTOFF).  */
4663 
4664       /* FIXME: if we had a way to emit pc-relative adds that don't
4665 	 create a GOT entry, we could do without the use of the gp register.  */
4666       crtl->uses_pic_offset_table = 1;
4667       pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ARC_UNSPEC_GOTOFF);
4668       pat = gen_rtx_CONST (Pmode, pat);
4669       pat = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, pat);
4670 
4671       if (oldx == NULL)
4672 	oldx = gen_reg_rtx (Pmode);
4673 
4674       if (oldx != 0)
4675 	{
4676 	  emit_move_insn (oldx, pat);
4677 	  pat = oldx;
4678 	}
4679 
4680     }
4681   else if (GET_CODE (addr) == SYMBOL_REF)
4682     {
4683       /* This symbol must be referenced via a load from the
4684 	 Global Offset Table (@GOTPC).  */
4685 
4686       pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ARC_UNSPEC_GOT);
4687       pat = gen_rtx_CONST (Pmode, pat);
4688       pat = gen_const_mem (Pmode, pat);
4689 
4690       if (oldx == 0)
4691 	oldx = gen_reg_rtx (Pmode);
4692 
4693       emit_move_insn (oldx, pat);
4694       pat = oldx;
4695     }
4696   else
4697     {
4698       if (GET_CODE (addr) == CONST)
4699 	{
4700 	  addr = XEXP (addr, 0);
4701 	  if (GET_CODE (addr) == UNSPEC)
4702 	    {
4703 	      /* Check that the unspec is one of the ones we generate?  */
4704 	    }
4705 	  else
4706 	    gcc_assert (GET_CODE (addr) == PLUS);
4707 	}
4708 
4709       if (GET_CODE (addr) == PLUS)
4710 	{
4711 	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
4712 
4713 	  /* Check first to see if this is a constant offset from a @GOTOFF
4714 	     symbol reference.  */
4715 	  if ((GET_CODE (op0) == LABEL_REF
4716 	       || (GET_CODE (op0) == SYMBOL_REF
4717 		   && (CONSTANT_POOL_ADDRESS_P (op0)
4718 		       || SYMBOL_REF_LOCAL_P (op0))))
4719 	      && GET_CODE (op1) == CONST_INT)
4720 	    {
4721 	      /* FIXME: like above, could do without gp reference.  */
4722 	      crtl->uses_pic_offset_table = 1;
4723 	      pat
4724 		= gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), ARC_UNSPEC_GOTOFF);
4725 	      pat = gen_rtx_PLUS (Pmode, pat, op1);
4726 	      pat = gen_rtx_CONST (Pmode, pat);
4727 	      pat = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, pat);
4728 
4729 	      if (oldx != 0)
4730 		{
4731 		  emit_move_insn (oldx, pat);
4732 		  pat = oldx;
4733 		}
4734 	    }
4735 	  else
4736 	    {
4737 	      base = arc_legitimize_pic_address (XEXP (addr, 0), oldx);
4738 	      pat  = arc_legitimize_pic_address (XEXP (addr, 1),
4739 					     base == oldx ? NULL_RTX : oldx);
4740 
4741 	      if (GET_CODE (pat) == CONST_INT)
4742 		pat = plus_constant (Pmode, base, INTVAL (pat));
4743 	      else
4744 		{
4745 		  if (GET_CODE (pat) == PLUS && CONSTANT_P (XEXP (pat, 1)))
4746 		    {
4747 		      base = gen_rtx_PLUS (Pmode, base, XEXP (pat, 0));
4748 		      pat = XEXP (pat, 1);
4749 		    }
4750 		  pat = gen_rtx_PLUS (Pmode, base, pat);
4751 		}
4752 	    }
4753 	}
4754     }
4755 
4756  return pat;
4757 }
4758 
4759 /* Output address constant X to FILE, taking PIC into account.  */
4760 
4761 void
arc_output_pic_addr_const(FILE * file,rtx x,int code)4762 arc_output_pic_addr_const (FILE * file, rtx x, int code)
4763 {
4764   char buf[256];
4765 
4766  restart:
4767   switch (GET_CODE (x))
4768     {
4769     case PC:
4770       if (flag_pic)
4771 	putc ('.', file);
4772       else
4773 	gcc_unreachable ();
4774       break;
4775 
4776     case SYMBOL_REF:
4777       output_addr_const (file, x);
4778 
4779       /* Local functions do not get references through the PLT.  */
4780       if (code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
4781 	fputs ("@plt", file);
4782       break;
4783 
4784     case LABEL_REF:
4785       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (XEXP (x, 0)));
4786       assemble_name (file, buf);
4787       break;
4788 
4789     case CODE_LABEL:
4790       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
4791       assemble_name (file, buf);
4792       break;
4793 
4794     case CONST_INT:
4795       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4796       break;
4797 
4798     case CONST:
4799       arc_output_pic_addr_const (file, XEXP (x, 0), code);
4800       break;
4801 
4802     case CONST_DOUBLE:
4803       if (GET_MODE (x) == VOIDmode)
4804 	{
4805 	  /* We can use %d if the number is one word and positive.  */
4806 	  if (CONST_DOUBLE_HIGH (x))
4807 	    fprintf (file, HOST_WIDE_INT_PRINT_DOUBLE_HEX,
4808 		     CONST_DOUBLE_HIGH (x), CONST_DOUBLE_LOW (x));
4809 	  else if  (CONST_DOUBLE_LOW (x) < 0)
4810 	    fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (x));
4811 	  else
4812 	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
4813 	}
4814       else
4815 	/* We can't handle floating point constants;
4816 	   PRINT_OPERAND must handle them.  */
4817 	output_operand_lossage ("floating constant misused");
4818       break;
4819 
4820     case PLUS:
4821       /* FIXME: Not needed here.  */
4822       /* Some assemblers need integer constants to appear last (eg masm).  */
4823       if (GET_CODE (XEXP (x, 0)) == CONST_INT)
4824 	{
4825 	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
4826 	  fprintf (file, "+");
4827 	  arc_output_pic_addr_const (file, XEXP (x, 0), code);
4828 	}
4829       else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4830 	{
4831 	  arc_output_pic_addr_const (file, XEXP (x, 0), code);
4832 	  if (INTVAL (XEXP (x, 1)) >= 0)
4833 	    fprintf (file, "+");
4834 	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
4835 	}
4836       else
4837 	gcc_unreachable();
4838       break;
4839 
4840     case MINUS:
4841       /* Avoid outputting things like x-x or x+5-x,
4842 	 since some assemblers can't handle that.  */
4843       x = simplify_subtraction (x);
4844       if (GET_CODE (x) != MINUS)
4845 	goto restart;
4846 
4847       arc_output_pic_addr_const (file, XEXP (x, 0), code);
4848       fprintf (file, "-");
4849       if (GET_CODE (XEXP (x, 1)) == CONST_INT
4850 	  && INTVAL (XEXP (x, 1)) < 0)
4851 	{
4852 	  fprintf (file, "(");
4853 	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
4854 	  fprintf (file, ")");
4855 	}
4856       else
4857 	arc_output_pic_addr_const (file, XEXP (x, 1), code);
4858       break;
4859 
4860     case ZERO_EXTEND:
4861     case SIGN_EXTEND:
4862       arc_output_pic_addr_const (file, XEXP (x, 0), code);
4863       break;
4864 
4865 
4866     case UNSPEC:
4867       gcc_assert (XVECLEN (x, 0) == 1);
4868       if (XINT (x, 1) == ARC_UNSPEC_GOT)
4869 	fputs ("pcl,", file);
4870       arc_output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
4871       switch (XINT (x, 1))
4872 	{
4873 	case ARC_UNSPEC_GOT:
4874 	  fputs ("@gotpc", file);
4875 	  break;
4876 	case ARC_UNSPEC_GOTOFF:
4877 	  fputs ("@gotoff", file);
4878 	  break;
4879 	case ARC_UNSPEC_PLT:
4880 	  fputs ("@plt", file);
4881 	  break;
4882 	default:
4883 	  output_operand_lossage ("invalid UNSPEC as operand: %d", XINT (x,1));
4884 	  break;
4885 	}
4886        break;
4887 
4888     default:
4889       output_operand_lossage ("invalid expression as operand");
4890     }
4891 }
4892 
4893 #define SYMBOLIC_CONST(X)	\
4894 (GET_CODE (X) == SYMBOL_REF						\
4895  || GET_CODE (X) == LABEL_REF						\
4896  || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X)))
4897 
4898 /* Emit insns to move operands[1] into operands[0].  */
4899 
4900 void
emit_pic_move(rtx * operands,machine_mode)4901 emit_pic_move (rtx *operands, machine_mode)
4902 {
4903   rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode);
4904 
4905   if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1]))
4906     operands[1] = force_reg (Pmode, operands[1]);
4907   else
4908     operands[1] = arc_legitimize_pic_address (operands[1], temp);
4909 }
4910 
4911 
4912 /* The function returning the number of words, at the beginning of an
4913    argument, must be put in registers.  The returned value must be
4914    zero for arguments that are passed entirely in registers or that
4915    are entirely pushed on the stack.
4916 
4917    On some machines, certain arguments must be passed partially in
4918    registers and partially in memory.  On these machines, typically
4919    the first N words of arguments are passed in registers, and the
4920    rest on the stack.  If a multi-word argument (a `double' or a
4921    structure) crosses that boundary, its first few words must be
4922    passed in registers and the rest must be pushed.  This function
4923    tells the compiler when this occurs, and how many of the words
4924    should go in registers.
4925 
4926    `FUNCTION_ARG' for these arguments should return the first register
4927    to be used by the caller for this argument; likewise
4928    `FUNCTION_INCOMING_ARG', for the called function.
4929 
4930    The function is used to implement macro FUNCTION_ARG_PARTIAL_NREGS.  */
4931 
4932 /* If REGNO is the least arg reg available then what is the total number of arg
4933    regs available.  */
4934 #define GPR_REST_ARG_REGS(REGNO) \
4935   ((REGNO) <= MAX_ARC_PARM_REGS ? MAX_ARC_PARM_REGS - (REGNO) : 0 )
4936 
4937 /* Since arc parm regs are contiguous.  */
4938 #define ARC_NEXT_ARG_REG(REGNO) ( (REGNO) + 1 )
4939 
4940 /* Implement TARGET_ARG_PARTIAL_BYTES.  */
4941 
4942 static int
arc_arg_partial_bytes(cumulative_args_t cum_v,machine_mode mode,tree type,bool named ATTRIBUTE_UNUSED)4943 arc_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
4944 		       tree type, bool named ATTRIBUTE_UNUSED)
4945 {
4946   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4947   int bytes = (mode == BLKmode
4948 	       ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode));
4949   int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4950   int arg_num = *cum;
4951   int ret;
4952 
4953   arg_num = ROUND_ADVANCE_CUM (arg_num, mode, type);
4954   ret = GPR_REST_ARG_REGS (arg_num);
4955 
4956   /* ICEd at function.c:2361, and ret is copied to data->partial */
4957     ret = (ret >= words ? 0 : ret * UNITS_PER_WORD);
4958 
4959   return ret;
4960 }
4961 
4962 /* This function is used to control a function argument is passed in a
4963    register, and which register.
4964 
4965    The arguments are CUM, of type CUMULATIVE_ARGS, which summarizes
4966    (in a way defined by INIT_CUMULATIVE_ARGS and FUNCTION_ARG_ADVANCE)
4967    all of the previous arguments so far passed in registers; MODE, the
4968    machine mode of the argument; TYPE, the data type of the argument
4969    as a tree node or 0 if that is not known (which happens for C
4970    support library functions); and NAMED, which is 1 for an ordinary
4971    argument and 0 for nameless arguments that correspond to `...' in
4972    the called function's prototype.
4973 
4974    The returned value should either be a `reg' RTX for the hard
4975    register in which to pass the argument, or zero to pass the
4976    argument on the stack.
4977 
4978    For machines like the Vax and 68000, where normally all arguments
4979    are pushed, zero suffices as a definition.
4980 
4981    The usual way to make the ANSI library `stdarg.h' work on a machine
4982    where some arguments are usually passed in registers, is to cause
4983    nameless arguments to be passed on the stack instead.  This is done
4984    by making the function return 0 whenever NAMED is 0.
4985 
4986    You may use the macro `MUST_PASS_IN_STACK (MODE, TYPE)' in the
4987    definition of this function to determine if this argument is of a
4988    type that must be passed in the stack.  If `REG_PARM_STACK_SPACE'
4989    is not defined and the function returns non-zero for such an
4990    argument, the compiler will abort.  If `REG_PARM_STACK_SPACE' is
4991    defined, the argument will be computed in the stack and then loaded
4992    into a register.
4993 
4994    The function is used to implement macro FUNCTION_ARG.  */
4995 /* On the ARC the first MAX_ARC_PARM_REGS args are normally in registers
4996    and the rest are pushed.  */
4997 
4998 static rtx
arc_function_arg(cumulative_args_t cum_v,machine_mode mode,const_tree type ATTRIBUTE_UNUSED,bool named ATTRIBUTE_UNUSED)4999 arc_function_arg (cumulative_args_t cum_v,
5000 		  machine_mode mode,
5001 		  const_tree type ATTRIBUTE_UNUSED,
5002 		  bool named ATTRIBUTE_UNUSED)
5003 {
5004   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5005   int arg_num = *cum;
5006   rtx ret;
5007   const char *debstr ATTRIBUTE_UNUSED;
5008 
5009   arg_num = ROUND_ADVANCE_CUM (arg_num, mode, type);
5010   /* Return a marker for use in the call instruction.  */
5011   if (mode == VOIDmode)
5012     {
5013       ret = const0_rtx;
5014       debstr = "<0>";
5015     }
5016   else if (GPR_REST_ARG_REGS (arg_num) > 0)
5017     {
5018       ret = gen_rtx_REG (mode, arg_num);
5019       debstr = reg_names [arg_num];
5020     }
5021   else
5022     {
5023       ret = NULL_RTX;
5024       debstr = "memory";
5025     }
5026   return ret;
5027 }
5028 
5029 /* The function to update the summarizer variable *CUM to advance past
5030    an argument in the argument list.  The values MODE, TYPE and NAMED
5031    describe that argument.  Once this is done, the variable *CUM is
5032    suitable for analyzing the *following* argument with
5033    `FUNCTION_ARG', etc.
5034 
5035    This function need not do anything if the argument in question was
5036    passed on the stack.  The compiler knows how to track the amount of
5037    stack space used for arguments without any special help.
5038 
5039    The function is used to implement macro FUNCTION_ARG_ADVANCE.  */
5040 /* For the ARC: the cum set here is passed on to function_arg where we
5041    look at its value and say which reg to use. Strategy: advance the
5042    regnumber here till we run out of arg regs, then set *cum to last
5043    reg. In function_arg, since *cum > last arg reg we would return 0
5044    and thus the arg will end up on the stack. For straddling args of
5045    course function_arg_partial_nregs will come into play.  */
5046 
5047 static void
arc_function_arg_advance(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)5048 arc_function_arg_advance (cumulative_args_t cum_v,
5049 			  machine_mode mode,
5050 			  const_tree type,
5051 			  bool named ATTRIBUTE_UNUSED)
5052 {
5053   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5054   int bytes = (mode == BLKmode
5055 	       ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode));
5056   int words = (bytes + UNITS_PER_WORD  - 1) / UNITS_PER_WORD;
5057   int i;
5058 
5059   if (words)
5060     *cum = ROUND_ADVANCE_CUM (*cum, mode, type);
5061   for (i = 0; i < words; i++)
5062     *cum = ARC_NEXT_ARG_REG (*cum);
5063 
5064 }
5065 
5066 /* Define how to find the value returned by a function.
5067    VALTYPE is the data type of the value (as a tree).
5068    If the precise function being called is known, FN_DECL_OR_TYPE is its
5069    FUNCTION_DECL; otherwise, FN_DECL_OR_TYPE is its type.  */
5070 
5071 static rtx
arc_function_value(const_tree valtype,const_tree fn_decl_or_type ATTRIBUTE_UNUSED,bool outgoing ATTRIBUTE_UNUSED)5072 arc_function_value (const_tree valtype,
5073 		    const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
5074 		    bool outgoing ATTRIBUTE_UNUSED)
5075 {
5076   machine_mode mode = TYPE_MODE (valtype);
5077   int unsignedp ATTRIBUTE_UNUSED;
5078 
5079   unsignedp = TYPE_UNSIGNED (valtype);
5080   if (INTEGRAL_TYPE_P (valtype) || TREE_CODE (valtype) == OFFSET_TYPE)
5081     PROMOTE_MODE (mode, unsignedp, valtype);
5082   return gen_rtx_REG (mode, 0);
5083 }
5084 
5085 /* Returns the return address that is used by builtin_return_address.  */
5086 
5087 rtx
arc_return_addr_rtx(int count,ATTRIBUTE_UNUSED rtx frame)5088 arc_return_addr_rtx (int count, ATTRIBUTE_UNUSED rtx frame)
5089 {
5090   if (count != 0)
5091     return const0_rtx;
5092 
5093   return get_hard_reg_initial_val (Pmode , RETURN_ADDR_REGNUM);
5094 }
5095 
5096 /* Nonzero if the constant value X is a legitimate general operand
5097    when generating PIC code.  It is given that flag_pic is on and
5098    that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
5099 
5100 bool
arc_legitimate_pic_operand_p(rtx x)5101 arc_legitimate_pic_operand_p (rtx x)
5102 {
5103   return !arc_raw_symbolic_reference_mentioned_p (x, true);
5104 }
5105 
5106 /* Determine if a given RTX is a valid constant.  We already know this
5107    satisfies CONSTANT_P.  */
5108 
5109 bool
arc_legitimate_constant_p(machine_mode,rtx x)5110 arc_legitimate_constant_p (machine_mode, rtx x)
5111 {
5112   if (!flag_pic)
5113     return true;
5114 
5115   switch (GET_CODE (x))
5116     {
5117     case CONST:
5118       x = XEXP (x, 0);
5119 
5120       if (GET_CODE (x) == PLUS)
5121 	{
5122 	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5123 	    return false;
5124 	  x = XEXP (x, 0);
5125 	}
5126 
5127       /* Only some unspecs are valid as "constants".  */
5128       if (GET_CODE (x) == UNSPEC)
5129 	switch (XINT (x, 1))
5130 	  {
5131 	  case ARC_UNSPEC_PLT:
5132 	  case ARC_UNSPEC_GOTOFF:
5133 	  case ARC_UNSPEC_GOT:
5134 	  case UNSPEC_PROF:
5135 	    return true;
5136 
5137 	  default:
5138 	    gcc_unreachable ();
5139 	  }
5140 
5141       /* We must have drilled down to a symbol.  */
5142       if (arc_raw_symbolic_reference_mentioned_p (x, false))
5143 	return false;
5144 
5145       /* Return true.  */
5146       break;
5147 
5148     case LABEL_REF:
5149     case SYMBOL_REF:
5150       return false;
5151 
5152     default:
5153       break;
5154     }
5155 
5156   /* Otherwise we handle everything else in the move patterns.  */
5157   return true;
5158 }
5159 
5160 static bool
arc_legitimate_address_p(machine_mode mode,rtx x,bool strict)5161 arc_legitimate_address_p (machine_mode mode, rtx x, bool strict)
5162 {
5163   if (RTX_OK_FOR_BASE_P (x, strict))
5164      return true;
5165   if (LEGITIMATE_OFFSET_ADDRESS_P (mode, x, TARGET_INDEXED_LOADS, strict))
5166      return true;
5167   if (LEGITIMATE_SCALED_ADDRESS_P (mode, x, strict))
5168     return true;
5169   if (LEGITIMATE_SMALL_DATA_ADDRESS_P (x))
5170      return true;
5171   if (GET_CODE (x) == CONST_INT && LARGE_INT (INTVAL (x)))
5172      return true;
5173   if ((GET_MODE_SIZE (mode) != 16)
5174       && (GET_CODE (x) == SYMBOL_REF
5175 	  || GET_CODE (x) == LABEL_REF
5176 	  || GET_CODE (x) == CONST))
5177     {
5178       if (!flag_pic || arc_legitimate_pic_addr_p (x))
5179 	return true;
5180     }
5181   if ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC
5182        || GET_CODE (x) == POST_DEC || GET_CODE (x) == POST_INC)
5183       && RTX_OK_FOR_BASE_P (XEXP (x, 0), strict))
5184     return true;
5185       /* We're restricted here by the `st' insn.  */
5186   if ((GET_CODE (x) == PRE_MODIFY || GET_CODE (x) == POST_MODIFY)
5187       && GET_CODE (XEXP ((x), 1)) == PLUS
5188       && rtx_equal_p (XEXP ((x), 0), XEXP (XEXP (x, 1), 0))
5189       && LEGITIMATE_OFFSET_ADDRESS_P (QImode, XEXP (x, 1),
5190 				      TARGET_AUTO_MODIFY_REG, strict))
5191     return true;
5192   return false;
5193 }
5194 
5195 /* Return true iff ADDR (a legitimate address expression)
5196    has an effect that depends on the machine mode it is used for.  */
5197 
5198 static bool
arc_mode_dependent_address_p(const_rtx addr,addr_space_t)5199 arc_mode_dependent_address_p (const_rtx addr, addr_space_t)
5200 {
5201   /* SYMBOL_REF is not mode dependent: it is either a small data reference,
5202      which is valid for loads and stores, or a limm offset, which is valid for
5203      loads.  */
5204   /* Scaled indices are scaled by the access mode; likewise for scaled
5205      offsets, which are needed for maximum offset stores.  */
5206   if (GET_CODE (addr) == PLUS
5207       && (GET_CODE (XEXP ((addr), 0)) == MULT
5208 	  || (CONST_INT_P (XEXP ((addr), 1))
5209 	      && !SMALL_INT (INTVAL (XEXP ((addr), 1))))))
5210     return true;
5211   return false;
5212 }
5213 
5214 /* Determine if it's legal to put X into the constant pool.  */
5215 
5216 static bool
arc_cannot_force_const_mem(machine_mode mode,rtx x)5217 arc_cannot_force_const_mem (machine_mode mode, rtx x)
5218 {
5219   return !arc_legitimate_constant_p (mode, x);
5220 }
5221 
5222 /* IDs for all the ARC builtins.  */
5223 
5224 enum arc_builtin_id
5225   {
5226 #define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK)	\
5227     ARC_BUILTIN_ ## NAME,
5228 #include "builtins.def"
5229 #undef DEF_BUILTIN
5230 
5231     ARC_BUILTIN_COUNT
5232   };
5233 
5234 struct GTY(()) arc_builtin_description
5235 {
5236   enum insn_code icode;
5237   int n_args;
5238   tree fndecl;
5239 };
5240 
5241 static GTY(()) struct arc_builtin_description
5242 arc_bdesc[ARC_BUILTIN_COUNT] =
5243 {
5244 #define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK)		\
5245   { (enum insn_code) CODE_FOR_ ## ICODE, N_ARGS, NULL_TREE },
5246 #include "builtins.def"
5247 #undef DEF_BUILTIN
5248 };
5249 
5250 /* Transform UP into lowercase and write the result to LO.
5251    You must provide enough space for LO.  Return LO.  */
5252 
5253 static char*
arc_tolower(char * lo,const char * up)5254 arc_tolower (char *lo, const char *up)
5255 {
5256   char *lo0 = lo;
5257 
5258   for (; *up; up++, lo++)
5259     *lo = TOLOWER (*up);
5260 
5261   *lo = '\0';
5262 
5263   return lo0;
5264 }
5265 
5266 /* Implement `TARGET_BUILTIN_DECL'.  */
5267 
5268 static tree
arc_builtin_decl(unsigned id,bool initialize_p ATTRIBUTE_UNUSED)5269 arc_builtin_decl (unsigned id, bool initialize_p ATTRIBUTE_UNUSED)
5270 {
5271   if (id < ARC_BUILTIN_COUNT)
5272     return arc_bdesc[id].fndecl;
5273 
5274   return error_mark_node;
5275 }
5276 
5277 static void
arc_init_builtins(void)5278 arc_init_builtins (void)
5279 {
5280   tree pcvoid_type_node
5281     = build_pointer_type (build_qualified_type (void_type_node,
5282 						TYPE_QUAL_CONST));
5283   tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node,
5284 						    V8HImode);
5285 
5286   tree void_ftype_void
5287     = build_function_type_list (void_type_node, NULL_TREE);
5288   tree int_ftype_int
5289     = build_function_type_list (integer_type_node, integer_type_node,
5290 				NULL_TREE);
5291   tree int_ftype_pcvoid_int
5292     = build_function_type_list (integer_type_node, pcvoid_type_node,
5293 				integer_type_node, NULL_TREE);
5294   tree void_ftype_usint_usint
5295     = build_function_type_list (void_type_node, long_unsigned_type_node,
5296 				long_unsigned_type_node, NULL_TREE);
5297   tree int_ftype_int_int
5298     = build_function_type_list (integer_type_node, integer_type_node,
5299 				integer_type_node, NULL_TREE);
5300   tree usint_ftype_usint
5301     = build_function_type_list  (long_unsigned_type_node,
5302 				 long_unsigned_type_node, NULL_TREE);
5303   tree void_ftype_usint
5304     = build_function_type_list (void_type_node, long_unsigned_type_node,
5305 				NULL_TREE);
5306   tree int_ftype_void
5307     = build_function_type_list (integer_type_node, void_type_node,
5308 				NULL_TREE);
5309   tree void_ftype_int
5310     = build_function_type_list (void_type_node, integer_type_node,
5311 				NULL_TREE);
5312   tree int_ftype_short
5313     = build_function_type_list (integer_type_node, short_integer_type_node,
5314 				NULL_TREE);
5315 
5316   /* Old ARC SIMD types.  */
5317   tree v8hi_ftype_v8hi_v8hi
5318     = build_function_type_list (V8HI_type_node, V8HI_type_node,
5319 				V8HI_type_node, NULL_TREE);
5320   tree v8hi_ftype_v8hi_int
5321     = build_function_type_list (V8HI_type_node, V8HI_type_node,
5322 				integer_type_node, NULL_TREE);
5323   tree v8hi_ftype_v8hi_int_int
5324     = build_function_type_list (V8HI_type_node, V8HI_type_node,
5325 				integer_type_node, integer_type_node,
5326 				NULL_TREE);
5327   tree void_ftype_v8hi_int_int
5328     = build_function_type_list (void_type_node, V8HI_type_node,
5329 				integer_type_node, integer_type_node,
5330 				NULL_TREE);
5331   tree void_ftype_v8hi_int_int_int
5332     = build_function_type_list (void_type_node, V8HI_type_node,
5333 				integer_type_node, integer_type_node,
5334 				integer_type_node, NULL_TREE);
5335   tree v8hi_ftype_int_int
5336     = build_function_type_list (V8HI_type_node, integer_type_node,
5337 				integer_type_node, NULL_TREE);
5338   tree void_ftype_int_int
5339     = build_function_type_list (void_type_node, integer_type_node,
5340 				integer_type_node, NULL_TREE);
5341   tree v8hi_ftype_v8hi
5342     = build_function_type_list (V8HI_type_node, V8HI_type_node,
5343 				NULL_TREE);
5344 
5345   /* Add the builtins.  */
5346 #define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK)			\
5347   {									\
5348     int id = ARC_BUILTIN_ ## NAME;					\
5349     const char *Name = "__builtin_arc_" #NAME;				\
5350     char *name = (char*) alloca (1 + strlen (Name));			\
5351 									\
5352     gcc_assert (id < ARC_BUILTIN_COUNT);				\
5353     if (MASK)								\
5354       arc_bdesc[id].fndecl						\
5355 	= add_builtin_function (arc_tolower(name, Name), TYPE, id,	\
5356 				BUILT_IN_MD, NULL, NULL_TREE);		\
5357   }
5358 #include "builtins.def"
5359 #undef DEF_BUILTIN
5360 }
5361 
5362 /* Helper to expand __builtin_arc_aligned (void* val, int
5363   alignval).  */
5364 
5365 static rtx
arc_expand_builtin_aligned(tree exp)5366 arc_expand_builtin_aligned (tree exp)
5367 {
5368   tree arg0 = CALL_EXPR_ARG (exp, 0);
5369   tree arg1 = CALL_EXPR_ARG (exp, 1);
5370   fold (arg1);
5371   rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5372   rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5373 
5374   if (!CONST_INT_P (op1))
5375     {
5376       /* If we can't fold the alignment to a constant integer
5377 	 whilst optimizing, this is probably a user error.  */
5378       if (optimize)
5379 	warning (0, "__builtin_arc_aligned with non-constant alignment");
5380     }
5381   else
5382     {
5383       HOST_WIDE_INT alignTest = INTVAL (op1);
5384       /* Check alignTest is positive, and a power of two.  */
5385       if (alignTest <= 0 || alignTest != (alignTest & -alignTest))
5386 	{
5387 	  error ("invalid alignment value for __builtin_arc_aligned");
5388 	  return NULL_RTX;
5389 	}
5390 
5391       if (CONST_INT_P (op0))
5392 	{
5393 	  HOST_WIDE_INT pnt = INTVAL (op0);
5394 
5395 	  if ((pnt & (alignTest - 1)) == 0)
5396 	    return const1_rtx;
5397 	}
5398       else
5399 	{
5400 	  unsigned  align = get_pointer_alignment (arg0);
5401 	  unsigned  numBits = alignTest * BITS_PER_UNIT;
5402 
5403 	  if (align && align >= numBits)
5404 	    return const1_rtx;
5405 	  /* Another attempt to ascertain alignment.  Check the type
5406 	     we are pointing to.  */
5407 	  if (POINTER_TYPE_P (TREE_TYPE (arg0))
5408 	      && TYPE_ALIGN (TREE_TYPE (TREE_TYPE (arg0))) >= numBits)
5409 	    return const1_rtx;
5410 	}
5411     }
5412 
5413   /* Default to false.  */
5414   return const0_rtx;
5415 }
5416 
5417 /* Helper arc_expand_builtin, generates a pattern for the given icode
5418    and arguments.  */
5419 
5420 static rtx_insn *
apply_GEN_FCN(enum insn_code icode,rtx * arg)5421 apply_GEN_FCN (enum insn_code icode, rtx *arg)
5422 {
5423   switch (insn_data[icode].n_generator_args)
5424     {
5425     case 0:
5426       return GEN_FCN (icode) ();
5427     case 1:
5428       return GEN_FCN (icode) (arg[0]);
5429     case 2:
5430       return GEN_FCN (icode) (arg[0], arg[1]);
5431     case 3:
5432       return GEN_FCN (icode) (arg[0], arg[1], arg[2]);
5433     case 4:
5434       return GEN_FCN (icode) (arg[0], arg[1], arg[2], arg[3]);
5435     case 5:
5436       return GEN_FCN (icode) (arg[0], arg[1], arg[2], arg[3], arg[4]);
5437     default:
5438       gcc_unreachable ();
5439     }
5440 }
5441 
5442 /* Expand an expression EXP that calls a built-in function,
5443    with result going to TARGET if that's convenient
5444    (and in mode MODE if that's convenient).
5445    SUBTARGET may be used as the target for computing one of EXP's operands.
5446    IGNORE is nonzero if the value is to be ignored.  */
5447 
5448 static rtx
arc_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)5449 arc_expand_builtin (tree exp,
5450 		    rtx target,
5451 		    rtx subtarget ATTRIBUTE_UNUSED,
5452 		    machine_mode mode ATTRIBUTE_UNUSED,
5453 		    int ignore ATTRIBUTE_UNUSED)
5454 {
5455   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
5456   unsigned int id = DECL_FUNCTION_CODE (fndecl);
5457   const struct arc_builtin_description *d = &arc_bdesc[id];
5458   int i, j, n_args = call_expr_nargs (exp);
5459   rtx pat = NULL_RTX;
5460   rtx xop[5];
5461   enum insn_code icode = d->icode;
5462   machine_mode tmode = insn_data[icode].operand[0].mode;
5463   int nonvoid;
5464   tree arg0;
5465   tree arg1;
5466   tree arg2;
5467   tree arg3;
5468   rtx op0;
5469   rtx op1;
5470   rtx op2;
5471   rtx op3;
5472   rtx op4;
5473   machine_mode mode0;
5474   machine_mode mode1;
5475   machine_mode mode2;
5476   machine_mode mode3;
5477   machine_mode mode4;
5478 
5479   if (id >= ARC_BUILTIN_COUNT)
5480     internal_error ("bad builtin fcode");
5481 
5482   /* 1st part: Expand special builtins.  */
5483   switch (id)
5484     {
5485     case ARC_BUILTIN_NOP:
5486       emit_insn (gen_nopv ());
5487       return NULL_RTX;
5488 
5489     case ARC_BUILTIN_RTIE:
5490     case ARC_BUILTIN_SYNC:
5491     case ARC_BUILTIN_BRK:
5492     case ARC_BUILTIN_SWI:
5493     case ARC_BUILTIN_UNIMP_S:
5494       gcc_assert (icode != 0);
5495       emit_insn (GEN_FCN (icode) (const1_rtx));
5496       return NULL_RTX;
5497 
5498     case ARC_BUILTIN_ALIGNED:
5499       return arc_expand_builtin_aligned (exp);
5500 
5501     case ARC_BUILTIN_CLRI:
5502       target = gen_reg_rtx (SImode);
5503       emit_insn (gen_clri (target, const1_rtx));
5504       return target;
5505 
5506     case ARC_BUILTIN_TRAP_S:
5507     case ARC_BUILTIN_SLEEP:
5508       arg0 = CALL_EXPR_ARG (exp, 0);
5509       fold (arg0);
5510       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5511 
5512       if  (!CONST_INT_P (op0) || !satisfies_constraint_L (op0))
5513 	{
5514 	  error ("builtin operand should be an unsigned 6-bit value");
5515 	  return NULL_RTX;
5516 	}
5517       gcc_assert (icode != 0);
5518       emit_insn (GEN_FCN (icode) (op0));
5519       return NULL_RTX;
5520 
5521     case ARC_BUILTIN_VDORUN:
5522     case ARC_BUILTIN_VDIRUN:
5523       arg0 = CALL_EXPR_ARG (exp, 0);
5524       arg1 = CALL_EXPR_ARG (exp, 1);
5525       op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
5526       op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
5527 
5528       target = gen_rtx_REG (SImode, (id == ARC_BUILTIN_VDIRUN) ? 131 : 139);
5529 
5530       mode0 =  insn_data[icode].operand[1].mode;
5531       mode1 =  insn_data[icode].operand[2].mode;
5532 
5533       if (!insn_data[icode].operand[1].predicate (op0, mode0))
5534 	op0 = copy_to_mode_reg (mode0, op0);
5535 
5536       if (!insn_data[icode].operand[2].predicate (op1, mode1))
5537 	op1 = copy_to_mode_reg (mode1, op1);
5538 
5539       pat = GEN_FCN (icode) (target, op0, op1);
5540       if (!pat)
5541 	return NULL_RTX;
5542 
5543       emit_insn (pat);
5544       return NULL_RTX;
5545 
5546     case ARC_BUILTIN_VDIWR:
5547     case ARC_BUILTIN_VDOWR:
5548       arg0 = CALL_EXPR_ARG (exp, 0);
5549       arg1 = CALL_EXPR_ARG (exp, 1);
5550       op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
5551       op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
5552 
5553       if (!CONST_INT_P (op0)
5554 	  || !(UNSIGNED_INT3 (INTVAL (op0))))
5555 	error ("operand 1 should be an unsigned 3-bit immediate");
5556 
5557       mode1 =  insn_data[icode].operand[1].mode;
5558 
5559       if (icode == CODE_FOR_vdiwr_insn)
5560 	target = gen_rtx_REG (SImode,
5561 			      ARC_FIRST_SIMD_DMA_CONFIG_IN_REG + INTVAL (op0));
5562       else if (icode == CODE_FOR_vdowr_insn)
5563 	target = gen_rtx_REG (SImode,
5564 			      ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG + INTVAL (op0));
5565       else
5566 	gcc_unreachable ();
5567 
5568       if (!insn_data[icode].operand[2].predicate (op1, mode1))
5569 	op1 = copy_to_mode_reg (mode1, op1);
5570 
5571       pat = GEN_FCN (icode) (target, op1);
5572       if (!pat)
5573 	return NULL_RTX;
5574 
5575       emit_insn (pat);
5576       return NULL_RTX;
5577 
5578     case ARC_BUILTIN_VASRW:
5579     case ARC_BUILTIN_VSR8:
5580     case ARC_BUILTIN_VSR8AW:
5581       arg0 = CALL_EXPR_ARG (exp, 0);
5582       arg1 = CALL_EXPR_ARG (exp, 1);
5583       op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
5584       op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
5585       op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
5586 
5587       target = gen_reg_rtx (V8HImode);
5588       mode0 =  insn_data[icode].operand[1].mode;
5589       mode1 =  insn_data[icode].operand[2].mode;
5590 
5591       if (!insn_data[icode].operand[1].predicate (op0, mode0))
5592 	op0 = copy_to_mode_reg (mode0, op0);
5593 
5594       if ((!insn_data[icode].operand[2].predicate (op1, mode1))
5595 	  || !(UNSIGNED_INT3 (INTVAL (op1))))
5596 	error ("operand 2 should be an unsigned 3-bit value (I0-I7)");
5597 
5598       pat = GEN_FCN (icode) (target, op0, op1, op2);
5599       if (!pat)
5600 	return NULL_RTX;
5601 
5602       emit_insn (pat);
5603       return target;
5604 
5605     case ARC_BUILTIN_VLD32WH:
5606     case ARC_BUILTIN_VLD32WL:
5607     case ARC_BUILTIN_VLD64:
5608     case ARC_BUILTIN_VLD32:
5609       rtx src_vreg;
5610       icode = d->icode;
5611       arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg.  */
5612       arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7.  */
5613       arg2 = CALL_EXPR_ARG (exp, 2); /* u8.  */
5614 
5615       src_vreg = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
5616       op0 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
5617       op1 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL);
5618       op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
5619 
5620       /* target <- src vreg.  */
5621       emit_insn (gen_move_insn (target, src_vreg));
5622 
5623       /* target <- vec_concat: target, mem (Ib, u8).  */
5624       mode0 =  insn_data[icode].operand[3].mode;
5625       mode1 =  insn_data[icode].operand[1].mode;
5626 
5627       if ((!insn_data[icode].operand[3].predicate (op0, mode0))
5628 	  || !(UNSIGNED_INT3 (INTVAL (op0))))
5629 	error ("operand 1 should be an unsigned 3-bit value (I0-I7)");
5630 
5631       if ((!insn_data[icode].operand[1].predicate (op1, mode1))
5632 	  || !(UNSIGNED_INT8 (INTVAL (op1))))
5633 	error ("operand 2 should be an unsigned 8-bit value");
5634 
5635       pat = GEN_FCN (icode) (target, op1, op2, op0);
5636       if (!pat)
5637 	return NULL_RTX;
5638 
5639       emit_insn (pat);
5640       return target;
5641 
5642     case ARC_BUILTIN_VLD64W:
5643     case ARC_BUILTIN_VLD128:
5644       arg0 = CALL_EXPR_ARG (exp, 0); /* dest vreg.  */
5645       arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7.  */
5646 
5647       op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
5648       op1 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
5649       op2 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
5650 
5651       /* target <- src vreg.  */
5652       target = gen_reg_rtx (V8HImode);
5653 
5654       /* target <- vec_concat: target, mem (Ib, u8).  */
5655       mode0 =  insn_data[icode].operand[1].mode;
5656       mode1 =  insn_data[icode].operand[2].mode;
5657       mode2 =  insn_data[icode].operand[3].mode;
5658 
5659       if ((!insn_data[icode].operand[2].predicate (op1, mode1))
5660 	  || !(UNSIGNED_INT3 (INTVAL (op1))))
5661 	error ("operand 1 should be an unsigned 3-bit value (I0-I7)");
5662 
5663       if ((!insn_data[icode].operand[3].predicate (op2, mode2))
5664 	  || !(UNSIGNED_INT8 (INTVAL (op2))))
5665 	error ("operand 2 should be an unsigned 8-bit value");
5666 
5667       pat = GEN_FCN (icode) (target, op0, op1, op2);
5668 
5669       if (!pat)
5670 	return NULL_RTX;
5671 
5672       emit_insn (pat);
5673       return target;
5674 
5675     case ARC_BUILTIN_VST128:
5676     case ARC_BUILTIN_VST64:
5677       arg0 = CALL_EXPR_ARG (exp, 0); /* src vreg.  */
5678       arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7.  */
5679       arg2 = CALL_EXPR_ARG (exp, 2); /* u8.  */
5680 
5681       op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
5682       op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
5683       op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL);
5684       op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
5685 
5686       mode0 = insn_data[icode].operand[0].mode;
5687       mode1 = insn_data[icode].operand[1].mode;
5688       mode2 = insn_data[icode].operand[2].mode;
5689       mode3 = insn_data[icode].operand[3].mode;
5690 
5691       if ((!insn_data[icode].operand[1].predicate (op1, mode1))
5692 	  || !(UNSIGNED_INT3 (INTVAL (op1))))
5693 	error ("operand 2 should be an unsigned 3-bit value (I0-I7)");
5694 
5695       if ((!insn_data[icode].operand[2].predicate (op2, mode2))
5696 	  || !(UNSIGNED_INT8 (INTVAL (op2))))
5697 	error ("operand 3 should be an unsigned 8-bit value");
5698 
5699       if (!insn_data[icode].operand[3].predicate (op3, mode3))
5700 	op3 = copy_to_mode_reg (mode3, op3);
5701 
5702       pat = GEN_FCN (icode) (op0, op1, op2, op3);
5703       if (!pat)
5704 	return NULL_RTX;
5705 
5706       emit_insn (pat);
5707       return NULL_RTX;
5708 
5709     case ARC_BUILTIN_VST16_N:
5710     case ARC_BUILTIN_VST32_N:
5711       arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg.  */
5712       arg1 = CALL_EXPR_ARG (exp, 1); /* u3.  */
5713       arg2 = CALL_EXPR_ARG (exp, 2); /* [I]0-7.  */
5714       arg3 = CALL_EXPR_ARG (exp, 3); /* u8.  */
5715 
5716       op0 = expand_expr (arg3, NULL_RTX, SImode, EXPAND_NORMAL);
5717       op1 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
5718       op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL);
5719       op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
5720       op4 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
5721 
5722       mode0 = insn_data[icode].operand[0].mode;
5723       mode2 = insn_data[icode].operand[2].mode;
5724       mode3 = insn_data[icode].operand[3].mode;
5725       mode4 = insn_data[icode].operand[4].mode;
5726 
5727       /* Do some correctness checks for the operands.  */
5728       if ((!insn_data[icode].operand[0].predicate (op0, mode0))
5729 	  || !(UNSIGNED_INT8 (INTVAL (op0))))
5730 	error ("operand 4 should be an unsigned 8-bit value (0-255)");
5731 
5732       if ((!insn_data[icode].operand[2].predicate (op2, mode2))
5733 	  || !(UNSIGNED_INT3 (INTVAL (op2))))
5734 	error ("operand 3 should be an unsigned 3-bit value (I0-I7)");
5735 
5736       if (!insn_data[icode].operand[3].predicate (op3, mode3))
5737 	op3 = copy_to_mode_reg (mode3, op3);
5738 
5739       if ((!insn_data[icode].operand[4].predicate (op4, mode4))
5740 	   || !(UNSIGNED_INT3 (INTVAL (op4))))
5741 	error ("operand 2 should be an unsigned 3-bit value (subreg 0-7)");
5742       else if (icode == CODE_FOR_vst32_n_insn
5743 	       && ((INTVAL (op4) % 2) != 0))
5744 	error ("operand 2 should be an even 3-bit value (subreg 0,2,4,6)");
5745 
5746       pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
5747       if (!pat)
5748 	return NULL_RTX;
5749 
5750       emit_insn (pat);
5751       return NULL_RTX;
5752 
5753     default:
5754       break;
5755     }
5756 
5757   /* 2nd part: Expand regular builtins.  */
5758   if (icode == 0)
5759     internal_error ("bad builtin fcode");
5760 
5761   nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
5762   j = 0;
5763 
5764   if (nonvoid)
5765     {
5766       if (target == NULL_RTX
5767 	  || GET_MODE (target) != tmode
5768 	  || !insn_data[icode].operand[0].predicate (target, tmode))
5769 	{
5770 	  target = gen_reg_rtx (tmode);
5771 	}
5772       xop[j++] = target;
5773     }
5774 
5775   gcc_assert (n_args <= 4);
5776   for (i = 0; i < n_args; i++, j++)
5777     {
5778       tree arg = CALL_EXPR_ARG (exp, i);
5779       machine_mode mode = insn_data[icode].operand[j].mode;
5780       rtx op = expand_expr (arg, NULL_RTX, mode, EXPAND_NORMAL);
5781       machine_mode opmode = GET_MODE (op);
5782       char c = insn_data[icode].operand[j].constraint[0];
5783 
5784       /* SIMD extension requires exact immediate operand match.  */
5785       if ((id > ARC_BUILTIN_SIMD_BEGIN)
5786 	  && (id < ARC_BUILTIN_SIMD_END)
5787 	  && (c != 'v')
5788 	  && (c != 'r'))
5789 	{
5790 	  if (!CONST_INT_P (op))
5791 	    error ("builtin requires an immediate for operand %d", j);
5792 	  switch (c)
5793 	    {
5794 	    case 'L':
5795 	      if (!satisfies_constraint_L (op))
5796 		error ("operand %d should be a 6 bit unsigned immediate", j);
5797 	      break;
5798 	    case 'P':
5799 	      if (!satisfies_constraint_P (op))
5800 		error ("operand %d should be a 8 bit unsigned immediate", j);
5801 	      break;
5802 	    case 'K':
5803 	      if (!satisfies_constraint_K (op))
5804 		error ("operand %d should be a 3 bit unsigned immediate", j);
5805 	      break;
5806 	    default:
5807 	      error ("unknown builtin immediate operand type for operand %d",
5808 		     j);
5809 	    }
5810 	}
5811 
5812       if (CONST_INT_P (op))
5813 	opmode = mode;
5814 
5815       if ((opmode == SImode) && (mode == HImode))
5816 	{
5817 	  opmode = HImode;
5818 	  op = gen_lowpart (HImode, op);
5819 	}
5820 
5821       /* In case the insn wants input operands in modes different from
5822 	 the result, abort.  */
5823       gcc_assert (opmode == mode || opmode == VOIDmode);
5824 
5825       if (!insn_data[icode].operand[i + nonvoid].predicate (op, mode))
5826 	op = copy_to_mode_reg (mode, op);
5827 
5828       xop[j] = op;
5829     }
5830 
5831   pat = apply_GEN_FCN (icode, xop);
5832   if (pat == NULL_RTX)
5833     return NULL_RTX;
5834 
5835   emit_insn (pat);
5836 
5837   if (nonvoid)
5838     return target;
5839   else
5840     return const0_rtx;
5841 }
5842 
5843 /* Returns true if the operands[opno] is a valid compile-time constant to be
5844    used as register number in the code for builtins.  Else it flags an error
5845    and returns false.  */
5846 
5847 bool
check_if_valid_regno_const(rtx * operands,int opno)5848 check_if_valid_regno_const (rtx *operands, int opno)
5849 {
5850 
5851   switch (GET_CODE (operands[opno]))
5852     {
5853     case SYMBOL_REF :
5854     case CONST :
5855     case CONST_INT :
5856       return true;
5857     default:
5858 	error ("register number must be a compile-time constant. Try giving higher optimization levels");
5859 	break;
5860     }
5861   return false;
5862 }
5863 
5864 /* Check that after all the constant folding, whether the operand to
5865    __builtin_arc_sleep is an unsigned int of 6 bits.  If not, flag an error.  */
5866 
5867 bool
check_if_valid_sleep_operand(rtx * operands,int opno)5868 check_if_valid_sleep_operand (rtx *operands, int opno)
5869 {
5870   switch (GET_CODE (operands[opno]))
5871     {
5872     case CONST :
5873     case CONST_INT :
5874 	if( UNSIGNED_INT6 (INTVAL (operands[opno])))
5875 	    return true;
5876     default:
5877 	fatal_error (input_location,
5878 		     "operand for sleep instruction must be an unsigned 6 bit compile-time constant");
5879 	break;
5880     }
5881   return false;
5882 }
5883 
5884 /* Return true if it is ok to make a tail-call to DECL.  */
5885 
5886 static bool
arc_function_ok_for_sibcall(tree decl ATTRIBUTE_UNUSED,tree exp ATTRIBUTE_UNUSED)5887 arc_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
5888 			     tree exp ATTRIBUTE_UNUSED)
5889 {
5890   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
5891   if (ARC_INTERRUPT_P (arc_compute_function_type (cfun)))
5892     return false;
5893 
5894   /* Everything else is ok.  */
5895   return true;
5896 }
5897 
5898 /* Output code to add DELTA to the first argument, and then jump
5899    to FUNCTION.  Used for C++ multiple inheritance.  */
5900 
5901 static void
arc_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)5902 arc_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
5903 		     HOST_WIDE_INT delta,
5904 		     HOST_WIDE_INT vcall_offset,
5905 		     tree function)
5906 {
5907   int mi_delta = delta;
5908   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
5909   int shift = 0;
5910   int this_regno
5911     = aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function) ? 1 : 0;
5912   rtx fnaddr;
5913 
5914   if (mi_delta < 0)
5915     mi_delta = - mi_delta;
5916 
5917   /* Add DELTA.  When possible use a plain add, otherwise load it into
5918      a register first.  */
5919 
5920   while (mi_delta != 0)
5921     {
5922       if ((mi_delta & (3 << shift)) == 0)
5923 	shift += 2;
5924       else
5925 	{
5926 	  asm_fprintf (file, "\t%s\t%s, %s, %d\n",
5927 		       mi_op, reg_names[this_regno], reg_names[this_regno],
5928 		       mi_delta & (0xff << shift));
5929 	  mi_delta &= ~(0xff << shift);
5930 	  shift += 8;
5931 	}
5932     }
5933 
5934   /* If needed, add *(*THIS + VCALL_OFFSET) to THIS.  */
5935   if (vcall_offset != 0)
5936     {
5937       /* ld  r12,[this]           --> temp = *this
5938 	 add r12,r12,vcall_offset --> temp = *(*this + vcall_offset)
5939 	 ld r12,[r12]
5940 	 add this,this,r12        --> this+ = *(*this + vcall_offset) */
5941       asm_fprintf (file, "\tld\t%s, [%s]\n",
5942 		   ARC_TEMP_SCRATCH_REG, reg_names[this_regno]);
5943       asm_fprintf (file, "\tadd\t%s, %s, " HOST_WIDE_INT_PRINT_DEC "\n",
5944 		   ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG, vcall_offset);
5945       asm_fprintf (file, "\tld\t%s, [%s]\n",
5946 		   ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG);
5947       asm_fprintf (file, "\tadd\t%s, %s, %s\n", reg_names[this_regno],
5948 		   reg_names[this_regno], ARC_TEMP_SCRATCH_REG);
5949     }
5950 
5951   fnaddr = XEXP (DECL_RTL (function), 0);
5952 
5953   if (arc_is_longcall_p (fnaddr))
5954     fputs ("\tj\t", file);
5955   else
5956     fputs ("\tb\t", file);
5957   assemble_name (file, XSTR (fnaddr, 0));
5958   fputc ('\n', file);
5959 }
5960 
5961 /* Return true if a 32 bit "long_call" should be generated for
5962    this calling SYM_REF.  We generate a long_call if the function:
5963 
5964         a.  has an __attribute__((long call))
5965      or b.  the -mlong-calls command line switch has been specified
5966 
5967    However we do not generate a long call if the function has an
5968    __attribute__ ((short_call)) or __attribute__ ((medium_call))
5969 
5970    This function will be called by C fragments contained in the machine
5971    description file.  */
5972 
5973 bool
arc_is_longcall_p(rtx sym_ref)5974 arc_is_longcall_p (rtx sym_ref)
5975 {
5976   if (GET_CODE (sym_ref) != SYMBOL_REF)
5977     return false;
5978 
5979   return (SYMBOL_REF_LONG_CALL_P (sym_ref)
5980 	  || (TARGET_LONG_CALLS_SET
5981 	      && !SYMBOL_REF_SHORT_CALL_P (sym_ref)
5982 	      && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref)));
5983 
5984 }
5985 
5986 /* Likewise for short calls.  */
5987 
5988 bool
arc_is_shortcall_p(rtx sym_ref)5989 arc_is_shortcall_p (rtx sym_ref)
5990 {
5991   if (GET_CODE (sym_ref) != SYMBOL_REF)
5992     return false;
5993 
5994   return (SYMBOL_REF_SHORT_CALL_P (sym_ref)
5995 	  || (!TARGET_LONG_CALLS_SET && !TARGET_MEDIUM_CALLS
5996 	      && !SYMBOL_REF_LONG_CALL_P (sym_ref)
5997 	      && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref)));
5998 
5999 }
6000 
6001 /* Emit profiling code for calling CALLEE.  Return true if a special
6002    call pattern needs to be generated.  */
6003 
6004 bool
arc_profile_call(rtx callee)6005 arc_profile_call (rtx callee)
6006 {
6007   rtx from = XEXP (DECL_RTL (current_function_decl), 0);
6008 
6009   if (TARGET_UCB_MCOUNT)
6010     /* Profiling is done by instrumenting the callee.  */
6011     return false;
6012 
6013   if (CONSTANT_P (callee))
6014     {
6015       rtx count_ptr
6016 	= gen_rtx_CONST (Pmode,
6017 			 gen_rtx_UNSPEC (Pmode,
6018 					 gen_rtvec (3, from, callee,
6019 						    CONST0_RTX (Pmode)),
6020 					 UNSPEC_PROF));
6021       rtx counter = gen_rtx_MEM (SImode, count_ptr);
6022       /* ??? The increment would better be done atomically, but as there is
6023 	 no proper hardware support, that would be too expensive.  */
6024       emit_move_insn (counter,
6025 		      force_reg (SImode, plus_constant (SImode, counter, 1)));
6026       return false;
6027     }
6028   else
6029     {
6030       rtx count_list_ptr
6031 	= gen_rtx_CONST (Pmode,
6032 			 gen_rtx_UNSPEC (Pmode,
6033 					 gen_rtvec (3, from, CONST0_RTX (Pmode),
6034 						    CONST0_RTX (Pmode)),
6035 					 UNSPEC_PROF));
6036       emit_move_insn (gen_rtx_REG (Pmode, 8), count_list_ptr);
6037       emit_move_insn (gen_rtx_REG (Pmode, 9), callee);
6038       return true;
6039     }
6040 }
6041 
6042 /* Worker function for TARGET_RETURN_IN_MEMORY.  */
6043 
6044 static bool
arc_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)6045 arc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6046 {
6047   if (AGGREGATE_TYPE_P (type) || TREE_ADDRESSABLE (type))
6048     return true;
6049   else
6050     {
6051       HOST_WIDE_INT size = int_size_in_bytes (type);
6052       return (size == -1 || size > (TARGET_V2 ? 16 : 8));
6053     }
6054 }
6055 
6056 
6057 /* This was in rtlanal.c, and can go in there when we decide we want
6058    to submit the change for inclusion in the GCC tree.  */
6059 /* Like note_stores, but allow the callback to have side effects on the rtl
6060    (like the note_stores of yore):
6061    Call FUN on each register or MEM that is stored into or clobbered by X.
6062    (X would be the pattern of an insn).  DATA is an arbitrary pointer,
6063    ignored by note_stores, but passed to FUN.
6064    FUN may alter parts of the RTL.
6065 
6066    FUN receives three arguments:
6067    1. the REG, MEM, CC0 or PC being stored in or clobbered,
6068    2. the SET or CLOBBER rtx that does the store,
6069    3. the pointer DATA provided to note_stores.
6070 
6071   If the item being stored in or clobbered is a SUBREG of a hard register,
6072   the SUBREG will be passed.  */
6073 
6074 /* For now.  */ static
6075 void
walk_stores(rtx x,void (* fun)(rtx,rtx,void *),void * data)6076 walk_stores (rtx x, void (*fun) (rtx, rtx, void *), void *data)
6077 {
6078   int i;
6079 
6080   if (GET_CODE (x) == COND_EXEC)
6081     x = COND_EXEC_CODE (x);
6082 
6083   if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
6084     {
6085       rtx dest = SET_DEST (x);
6086 
6087       while ((GET_CODE (dest) == SUBREG
6088 	      && (!REG_P (SUBREG_REG (dest))
6089 		  || REGNO (SUBREG_REG (dest)) >= FIRST_PSEUDO_REGISTER))
6090 	     || GET_CODE (dest) == ZERO_EXTRACT
6091 	     || GET_CODE (dest) == STRICT_LOW_PART)
6092 	dest = XEXP (dest, 0);
6093 
6094       /* If we have a PARALLEL, SET_DEST is a list of EXPR_LIST expressions,
6095 	 each of whose first operand is a register.  */
6096       if (GET_CODE (dest) == PARALLEL)
6097 	{
6098 	  for (i = XVECLEN (dest, 0) - 1; i >= 0; i--)
6099 	    if (XEXP (XVECEXP (dest, 0, i), 0) != 0)
6100 	      (*fun) (XEXP (XVECEXP (dest, 0, i), 0), x, data);
6101 	}
6102       else
6103 	(*fun) (dest, x, data);
6104     }
6105 
6106   else if (GET_CODE (x) == PARALLEL)
6107     for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
6108       walk_stores (XVECEXP (x, 0, i), fun, data);
6109 }
6110 
6111 static bool
arc_pass_by_reference(cumulative_args_t ca_v ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,const_tree type,bool named ATTRIBUTE_UNUSED)6112 arc_pass_by_reference (cumulative_args_t ca_v ATTRIBUTE_UNUSED,
6113 		       machine_mode mode ATTRIBUTE_UNUSED,
6114 		       const_tree type,
6115 		       bool named ATTRIBUTE_UNUSED)
6116 {
6117   return (type != 0
6118 	  && (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
6119 	      || TREE_ADDRESSABLE (type)));
6120 }
6121 
6122 /* Implement TARGET_CAN_USE_DOLOOP_P.  */
6123 
6124 static bool
arc_can_use_doloop_p(const widest_int & iterations,const widest_int &,unsigned int loop_depth,bool entered_at_top)6125 arc_can_use_doloop_p (const widest_int &iterations, const widest_int &,
6126 		      unsigned int loop_depth, bool entered_at_top)
6127 {
6128   if (loop_depth > 1)
6129     return false;
6130   /* Setting up the loop with two sr instructions costs 6 cycles.  */
6131   if (TARGET_ARC700
6132       && !entered_at_top
6133       && wi::gtu_p (iterations, 0)
6134       && wi::leu_p (iterations, flag_pic ? 6 : 3))
6135     return false;
6136   return true;
6137 }
6138 
6139 /* NULL if INSN insn is valid within a low-overhead loop.
6140    Otherwise return why doloop cannot be applied.  */
6141 
6142 static const char *
arc_invalid_within_doloop(const rtx_insn * insn)6143 arc_invalid_within_doloop (const rtx_insn *insn)
6144 {
6145   if (CALL_P (insn))
6146     return "Function call in the loop.";
6147   return NULL;
6148 }
6149 
6150 /* The same functionality as arc_hazard.  It is called in machine
6151    reorg before any other optimization.  Hence, the NOP size is taken
6152    into account when doing branch shortening.  */
6153 
6154 static void
workaround_arc_anomaly(void)6155 workaround_arc_anomaly (void)
6156 {
6157   rtx_insn *insn, *succ0;
6158 
6159   /* For any architecture: call arc_hazard here.  */
6160   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6161     {
6162       succ0 = next_real_insn (insn);
6163       if (arc_hazard (insn, succ0))
6164 	{
6165 	  emit_insn_before (gen_nopv (), succ0);
6166 	}
6167     }
6168 }
6169 
6170 static int arc_reorg_in_progress = 0;
6171 
6172 /* ARC's machince specific reorg function.  */
6173 
6174 static void
arc_reorg(void)6175 arc_reorg (void)
6176 {
6177   rtx_insn *insn;
6178   rtx pattern;
6179   rtx pc_target;
6180   long offset;
6181   int changed;
6182 
6183   workaround_arc_anomaly ();
6184 
6185   cfun->machine->arc_reorg_started = 1;
6186   arc_reorg_in_progress = 1;
6187 
6188   /* Emit special sections for profiling.  */
6189   if (crtl->profile)
6190     {
6191       section *save_text_section;
6192       rtx_insn *insn;
6193       int size = get_max_uid () >> 4;
6194       htab_t htab = htab_create (size, unspec_prof_hash, unspec_prof_htab_eq,
6195 				 NULL);
6196 
6197       save_text_section = in_section;
6198       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6199 	if (NONJUMP_INSN_P (insn))
6200 	  walk_stores (PATTERN (insn), write_profile_sections, htab);
6201       if (htab_elements (htab))
6202 	in_section = 0;
6203       switch_to_section (save_text_section);
6204       htab_delete (htab);
6205     }
6206 
6207   /* Link up loop ends with their loop start.  */
6208   {
6209     for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6210       if (GET_CODE (insn) == JUMP_INSN
6211 	  && recog_memoized (insn) == CODE_FOR_doloop_end_i)
6212 	{
6213 	  rtx_insn *top_label
6214 	    = as_a <rtx_insn *> (XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 1), 0));
6215 	  rtx num = GEN_INT (CODE_LABEL_NUMBER (top_label));
6216 	  rtx_insn *lp, *prev = prev_nonnote_insn (top_label);
6217 	  rtx_insn *lp_simple = NULL;
6218 	  rtx_insn *next = NULL;
6219 	  rtx op0 = XEXP (XVECEXP (PATTERN (insn), 0, 1), 0);
6220 	  HOST_WIDE_INT loop_end_id
6221 	    = -INTVAL (XEXP (XVECEXP (PATTERN (insn), 0, 4), 0));
6222 	  int seen_label = 0;
6223 
6224 	  for (lp = prev;
6225 	       (lp && NONJUMP_INSN_P (lp)
6226 		&& recog_memoized (lp) != CODE_FOR_doloop_begin_i);
6227 	       lp = prev_nonnote_insn (lp))
6228 	    ;
6229 	  if (!lp || !NONJUMP_INSN_P (lp)
6230 	      || dead_or_set_regno_p (lp, LP_COUNT))
6231 	    {
6232 	      for (prev = next = insn, lp = NULL ; prev || next;)
6233 		{
6234 		  if (prev)
6235 		    {
6236 		      if (NONJUMP_INSN_P (prev)
6237 			  && recog_memoized (prev) == CODE_FOR_doloop_begin_i
6238 			  && (INTVAL (XEXP (XVECEXP (PATTERN (prev), 0, 5), 0))
6239 			      == loop_end_id))
6240 			{
6241 			  lp = prev;
6242 			  break;
6243 			}
6244 		      else if (LABEL_P (prev))
6245 			seen_label = 1;
6246 		      prev = prev_nonnote_insn (prev);
6247 		    }
6248 		  if (next)
6249 		    {
6250 		      if (NONJUMP_INSN_P (next)
6251 			  && recog_memoized (next) == CODE_FOR_doloop_begin_i
6252 			  && (INTVAL (XEXP (XVECEXP (PATTERN (next), 0, 5), 0))
6253 			      == loop_end_id))
6254 			{
6255 			  lp = next;
6256 			  break;
6257 			}
6258 		      next = next_nonnote_insn (next);
6259 		    }
6260 		}
6261 	      prev = NULL;
6262 	    }
6263 	  else
6264 	    lp_simple = lp;
6265 	  if (lp && !dead_or_set_regno_p (lp, LP_COUNT))
6266 	    {
6267 	      rtx begin_cnt = XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0);
6268 	      if (INTVAL (XEXP (XVECEXP (PATTERN (lp), 0, 4), 0)))
6269 		/* The loop end insn has been duplicated.  That can happen
6270 		   when there is a conditional block at the very end of
6271 		   the loop.  */
6272 		goto failure;
6273 	      /* If Register allocation failed to allocate to the right
6274 		 register, There is no point into teaching reload to
6275 		 fix this up with reloads, as that would cost more
6276 		 than using an ordinary core register with the
6277 		 doloop_fallback pattern.  */
6278 	      if ((true_regnum (op0) != LP_COUNT || !REG_P (begin_cnt))
6279 	      /* Likewise, if the loop setup is evidently inside the loop,
6280 		 we loose.  */
6281 		  || (!lp_simple && lp != next && !seen_label))
6282 		{
6283 		  remove_insn (lp);
6284 		  goto failure;
6285 		}
6286 	      /* It is common that the optimizers copy the loop count from
6287 		 another register, and doloop_begin_i is stuck with the
6288 		 source of the move.  Making doloop_begin_i only accept "l"
6289 		 is nonsentical, as this then makes reload evict the pseudo
6290 		 used for the loop end.  The underlying cause is that the
6291 		 optimizers don't understand that the register allocation for
6292 		 doloop_begin_i should be treated as part of the loop.
6293 		 Try to work around this problem by verifying the previous
6294 		 move exists.  */
6295 	      if (true_regnum (begin_cnt) != LP_COUNT)
6296 		{
6297 		  rtx_insn *mov;
6298 		  rtx set, note;
6299 
6300 		  for (mov = prev_nonnote_insn (lp); mov;
6301 		       mov = prev_nonnote_insn (mov))
6302 		    {
6303 		      if (!NONJUMP_INSN_P (mov))
6304 			mov = 0;
6305 		      else if ((set = single_set (mov))
6306 			  && rtx_equal_p (SET_SRC (set), begin_cnt)
6307 			  && rtx_equal_p (SET_DEST (set), op0))
6308 			break;
6309 		    }
6310 		  if (mov)
6311 		    {
6312 		      XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0) = op0;
6313 		      note = find_regno_note (lp, REG_DEAD, REGNO (begin_cnt));
6314 		      if (note)
6315 			remove_note (lp, note);
6316 		    }
6317 		  else
6318 		    {
6319 		      remove_insn (lp);
6320 		      goto failure;
6321 		    }
6322 		}
6323 	      XEXP (XVECEXP (PATTERN (insn), 0, 4), 0) = num;
6324 	      XEXP (XVECEXP (PATTERN (lp), 0, 4), 0) = num;
6325 	      if (next == lp)
6326 		XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const2_rtx;
6327 	      else if (!lp_simple)
6328 		XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const1_rtx;
6329 	      else if (prev != lp)
6330 		{
6331 		  remove_insn (lp);
6332 		  add_insn_after (lp, prev, NULL);
6333 		}
6334 	      if (!lp_simple)
6335 		{
6336 		  XEXP (XVECEXP (PATTERN (lp), 0, 7), 0)
6337 		    = gen_rtx_LABEL_REF (Pmode, top_label);
6338 		  add_reg_note (lp, REG_LABEL_OPERAND, top_label);
6339 		  LABEL_NUSES (top_label)++;
6340 		}
6341 	      /* We can avoid tedious loop start / end setting for empty loops
6342 		 be merely setting the loop count to its final value.  */
6343 	      if (next_active_insn (top_label) == insn)
6344 		{
6345 		  rtx lc_set
6346 		    = gen_rtx_SET (XEXP (XVECEXP (PATTERN (lp), 0, 3), 0),
6347 				   const0_rtx);
6348 
6349 		  rtx_insn *lc_set_insn = emit_insn_before (lc_set, insn);
6350 		  delete_insn (lp);
6351 		  delete_insn (insn);
6352 		  insn = lc_set_insn;
6353 		}
6354 	      /* If the loop is non-empty with zero length, we can't make it
6355 		 a zero-overhead loop.  That can happen for empty asms.  */
6356 	      else
6357 		{
6358 		  rtx_insn *scan;
6359 
6360 		  for (scan = top_label;
6361 		       (scan && scan != insn
6362 			&& (!NONJUMP_INSN_P (scan) || !get_attr_length (scan)));
6363 		       scan = NEXT_INSN (scan));
6364 		  if (scan == insn)
6365 		    {
6366 		      remove_insn (lp);
6367 		      goto failure;
6368 		    }
6369 		}
6370 	    }
6371 	  else
6372 	    {
6373 	      /* Sometimes the loop optimizer makes a complete hash of the
6374 		 loop.  If it were only that the loop is not entered at the
6375 		 top, we could fix this up by setting LP_START with SR .
6376 		 However, if we can't find the loop begin were it should be,
6377 		 chances are that it does not even dominate the loop, but is
6378 		 inside the loop instead.  Using SR there would kill
6379 		 performance.
6380 		 We use the doloop_fallback pattern here, which executes
6381 		 in two cycles on the ARC700 when predicted correctly.  */
6382 	    failure:
6383 	      if (!REG_P (op0))
6384 		{
6385 		  rtx op3 = XEXP (XVECEXP (PATTERN (insn), 0, 5), 0);
6386 
6387 		  emit_insn_before (gen_move_insn (op3, op0), insn);
6388 		  PATTERN (insn)
6389 		    = gen_doloop_fallback_m (op3, JUMP_LABEL (insn), op0);
6390 		}
6391 	      else
6392 		XVEC (PATTERN (insn), 0)
6393 		  = gen_rtvec (2, XVECEXP (PATTERN (insn), 0, 0),
6394 			       XVECEXP (PATTERN (insn), 0, 1));
6395 	      INSN_CODE (insn) = -1;
6396 	    }
6397 	}
6398     }
6399 
6400 /* FIXME: should anticipate ccfsm action, generate special patterns for
6401    to-be-deleted branches that have no delay slot and have at least the
6402    length of the size increase forced on other insns that are conditionalized.
6403    This can also have an insn_list inside that enumerates insns which are
6404    not actually conditionalized because the destinations are dead in the
6405    not-execute case.
6406    Could also tag branches that we want to be unaligned if they get no delay
6407    slot, or even ones that we don't want to do delay slot sheduling for
6408    because we can unalign them.
6409 
6410    However, there are cases when conditional execution is only possible after
6411    delay slot scheduling:
6412 
6413    - If a delay slot is filled with a nocond/set insn from above, the previous
6414      basic block can become elegible for conditional execution.
6415    - If a delay slot is filled with a nocond insn from the fall-through path,
6416      the branch with that delay slot can become eligble for conditional
6417      execution (however, with the same sort of data flow analysis that dbr
6418      does, we could have figured out before that we don't need to
6419      conditionalize this insn.)
6420      - If a delay slot insn is filled with an insn from the target, the
6421        target label gets its uses decremented (even deleted if falling to zero),
6422    thus possibly creating more condexec opportunities there.
6423    Therefore, we should still be prepared to apply condexec optimization on
6424    non-prepared branches if the size increase of conditionalized insns is no
6425    more than the size saved from eliminating the branch.  An invocation option
6426    could also be used to reserve a bit of extra size for condbranches so that
6427    this'll work more often (could also test in arc_reorg if the block is
6428    'close enough' to be eligible for condexec to make this likely, and
6429    estimate required size increase).  */
6430   /* Generate BRcc insns, by combining cmp and Bcc insns wherever possible.  */
6431   if (TARGET_NO_BRCC_SET)
6432     return;
6433 
6434   do
6435     {
6436       init_insn_lengths();
6437       changed = 0;
6438 
6439       if (optimize > 1 && !TARGET_NO_COND_EXEC)
6440 	{
6441 	  arc_ifcvt ();
6442 	  unsigned int flags = pass_data_arc_ifcvt.todo_flags_finish;
6443 	  df_finish_pass ((flags & TODO_df_verify) != 0);
6444 	}
6445 
6446       /* Call shorten_branches to calculate the insn lengths.  */
6447       shorten_branches (get_insns());
6448       cfun->machine->ccfsm_current_insn = NULL_RTX;
6449 
6450       if (!INSN_ADDRESSES_SET_P())
6451 	  fatal_error (input_location, "Insn addresses not set after shorten_branches");
6452 
6453       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6454 	{
6455 	  rtx label;
6456 	  enum attr_type insn_type;
6457 
6458 	  /* If a non-jump insn (or a casesi jump table), continue.  */
6459 	  if (GET_CODE (insn) != JUMP_INSN ||
6460 	      GET_CODE (PATTERN (insn)) == ADDR_VEC
6461 	      || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
6462 	    continue;
6463 
6464 	  /* If we already have a brcc, note if it is suitable for brcc_s.
6465 	     Be a bit generous with the brcc_s range so that we can take
6466 	     advantage of any code shortening from delay slot scheduling.  */
6467 	  if (recog_memoized (insn) == CODE_FOR_cbranchsi4_scratch)
6468 	    {
6469 	      rtx pat = PATTERN (insn);
6470 	      rtx op = XEXP (SET_SRC (XVECEXP (pat, 0, 0)), 0);
6471 	      rtx *ccp = &XEXP (XVECEXP (pat, 0, 1), 0);
6472 
6473 	      offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
6474 	      if ((offset >= -140 && offset < 140)
6475 		  && rtx_equal_p (XEXP (op, 1), const0_rtx)
6476 		  && compact_register_operand (XEXP (op, 0), VOIDmode)
6477 		  && equality_comparison_operator (op, VOIDmode))
6478 		PUT_MODE (*ccp, CC_Zmode);
6479 	      else if (GET_MODE (*ccp) == CC_Zmode)
6480 		PUT_MODE (*ccp, CC_ZNmode);
6481 	      continue;
6482 	    }
6483 	  if ((insn_type =  get_attr_type (insn)) == TYPE_BRCC
6484 	      || insn_type == TYPE_BRCC_NO_DELAY_SLOT)
6485 	    continue;
6486 
6487 	  /* OK. so we have a jump insn.  */
6488 	  /* We need to check that it is a bcc.  */
6489 	  /* Bcc => set (pc) (if_then_else ) */
6490 	  pattern = PATTERN (insn);
6491 	  if (GET_CODE (pattern) != SET
6492 	      || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
6493 	      || ANY_RETURN_P (XEXP (SET_SRC (pattern), 1)))
6494 	    continue;
6495 
6496 	  /* Now check if the jump is beyond the s9 range.  */
6497 	  if (CROSSING_JUMP_P (insn))
6498 	    continue;
6499 	  offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
6500 
6501 	  if(offset > 253 || offset < -254)
6502 	    continue;
6503 
6504 	  pc_target = SET_SRC (pattern);
6505 
6506 	  /* Avoid FPU instructions.  */
6507 	  if ((GET_MODE (XEXP (XEXP (pc_target, 0), 0)) == CC_FPUmode)
6508 	      || (GET_MODE (XEXP (XEXP (pc_target, 0), 0)) == CC_FPU_UNEQmode))
6509 	    continue;
6510 
6511 	  /* Now go back and search for the set cc insn.  */
6512 
6513 	  label = XEXP (pc_target, 1);
6514 
6515 	    {
6516 	      rtx pat;
6517 	      rtx_insn *scan, *link_insn = NULL;
6518 
6519 	      for (scan = PREV_INSN (insn);
6520 		   scan && GET_CODE (scan) != CODE_LABEL;
6521 		   scan = PREV_INSN (scan))
6522 		{
6523 		  if (! INSN_P (scan))
6524 		    continue;
6525 		  pat = PATTERN (scan);
6526 		  if (GET_CODE (pat) == SET
6527 		      && cc_register (SET_DEST (pat), VOIDmode))
6528 		    {
6529 		      link_insn = scan;
6530 		      break;
6531 		    }
6532 		}
6533 	      if (!link_insn)
6534 		continue;
6535 	      else
6536 		/* Check if this is a data dependency.  */
6537 		{
6538 		  rtx op, cc_clob_rtx, op0, op1, brcc_insn, note;
6539 		  rtx cmp0, cmp1;
6540 
6541 		  /* Ok this is the set cc. copy args here.  */
6542 		  op = XEXP (pc_target, 0);
6543 
6544 		  op0 = cmp0 = XEXP (SET_SRC (pat), 0);
6545 		  op1 = cmp1 = XEXP (SET_SRC (pat), 1);
6546 		  if (GET_CODE (op0) == ZERO_EXTRACT
6547 		      && XEXP (op0, 1) == const1_rtx
6548 		      && (GET_CODE (op) == EQ
6549 			  || GET_CODE (op) == NE))
6550 		    {
6551 		      /* btst / b{eq,ne} -> bbit{0,1} */
6552 		      op0 = XEXP (cmp0, 0);
6553 		      op1 = XEXP (cmp0, 2);
6554 		    }
6555 		  else if (!register_operand (op0, VOIDmode)
6556 			  || !general_operand (op1, VOIDmode))
6557 		    continue;
6558 		  /* Be careful not to break what cmpsfpx_raw is
6559 		     trying to create for checking equality of
6560 		     single-precision floats.  */
6561 		  else if (TARGET_SPFP
6562 			   && GET_MODE (op0) == SFmode
6563 			   && GET_MODE (op1) == SFmode)
6564 		    continue;
6565 
6566 		  /* None of the two cmp operands should be set between the
6567 		     cmp and the branch.  */
6568 		  if (reg_set_between_p (op0, link_insn, insn))
6569 		    continue;
6570 
6571 		  if (reg_set_between_p (op1, link_insn, insn))
6572 		    continue;
6573 
6574 		  /* Since the MODE check does not work, check that this is
6575 		     CC reg's last set location before insn, and also no
6576 		     instruction between the cmp and branch uses the
6577 		     condition codes.  */
6578 		  if ((reg_set_between_p (SET_DEST (pat), link_insn, insn))
6579 		      || (reg_used_between_p (SET_DEST (pat), link_insn, insn)))
6580 		    continue;
6581 
6582 		  /* CC reg should be dead after insn.  */
6583 		  if (!find_regno_note (insn, REG_DEAD, CC_REG))
6584 		    continue;
6585 
6586 		  op = gen_rtx_fmt_ee (GET_CODE (op),
6587 				       GET_MODE (op), cmp0, cmp1);
6588 		  /* If we create a LIMM where there was none before,
6589 		     we only benefit if we can avoid a scheduling bubble
6590 		     for the ARC600.  Otherwise, we'd only forgo chances
6591 		     at short insn generation, and risk out-of-range
6592 		     branches.  */
6593 		  if (!brcc_nolimm_operator (op, VOIDmode)
6594 		      && !long_immediate_operand (op1, VOIDmode)
6595 		      && (TARGET_ARC700
6596 			  || next_active_insn (link_insn) != insn))
6597 		    continue;
6598 
6599 		  /* Emit bbit / brcc (or brcc_s if possible).
6600 		     CC_Zmode indicates that brcc_s is possible.  */
6601 
6602 		  if (op0 != cmp0)
6603 		    cc_clob_rtx = gen_rtx_REG (CC_ZNmode, CC_REG);
6604 		  else if ((offset >= -140 && offset < 140)
6605 			   && rtx_equal_p (op1, const0_rtx)
6606 			   && compact_register_operand (op0, VOIDmode)
6607 			   && (GET_CODE (op) == EQ
6608 			       || GET_CODE (op) == NE))
6609 		    cc_clob_rtx = gen_rtx_REG (CC_Zmode, CC_REG);
6610 		  else
6611 		    cc_clob_rtx = gen_rtx_REG (CCmode, CC_REG);
6612 
6613 		  brcc_insn
6614 		    = gen_rtx_IF_THEN_ELSE (VOIDmode, op, label, pc_rtx);
6615 		  brcc_insn = gen_rtx_SET (pc_rtx, brcc_insn);
6616 		  cc_clob_rtx = gen_rtx_CLOBBER (VOIDmode, cc_clob_rtx);
6617 		  brcc_insn
6618 		    = gen_rtx_PARALLEL
6619 			(VOIDmode, gen_rtvec (2, brcc_insn, cc_clob_rtx));
6620 		  brcc_insn = emit_jump_insn_before (brcc_insn, insn);
6621 
6622 		  JUMP_LABEL (brcc_insn) = JUMP_LABEL (insn);
6623 		  note = find_reg_note (insn, REG_BR_PROB, 0);
6624 		  if (note)
6625 		    {
6626 		      XEXP (note, 1) = REG_NOTES (brcc_insn);
6627 		      REG_NOTES (brcc_insn) = note;
6628 		    }
6629 		  note = find_reg_note (link_insn, REG_DEAD, op0);
6630 		  if (note)
6631 		    {
6632 		      remove_note (link_insn, note);
6633 		      XEXP (note, 1) = REG_NOTES (brcc_insn);
6634 		      REG_NOTES (brcc_insn) = note;
6635 		    }
6636 		  note = find_reg_note (link_insn, REG_DEAD, op1);
6637 		  if (note)
6638 		    {
6639 		      XEXP (note, 1) = REG_NOTES (brcc_insn);
6640 		      REG_NOTES (brcc_insn) = note;
6641 		    }
6642 
6643 		  changed = 1;
6644 
6645 		  /* Delete the bcc insn.  */
6646 		  set_insn_deleted (insn);
6647 
6648 		  /* Delete the cmp insn.  */
6649 		  set_insn_deleted (link_insn);
6650 
6651 		}
6652 	    }
6653 	}
6654       /* Clear out insn_addresses.  */
6655       INSN_ADDRESSES_FREE ();
6656 
6657     } while (changed);
6658 
6659   if (INSN_ADDRESSES_SET_P())
6660     fatal_error (input_location, "insn addresses not freed");
6661 
6662   arc_reorg_in_progress = 0;
6663 }
6664 
6665  /* Check if the operands are valid for BRcc.d generation
6666     Valid Brcc.d patterns are
6667         Brcc.d b, c, s9
6668         Brcc.d b, u6, s9
6669 
6670         For cc={GT, LE, GTU, LEU}, u6=63 can not be allowed,
6671       since they are encoded by the assembler as {GE, LT, HS, LS} 64, which
6672       does not have a delay slot
6673 
6674   Assumed precondition: Second operand is either a register or a u6 value.  */
6675 
6676 bool
valid_brcc_with_delay_p(rtx * operands)6677 valid_brcc_with_delay_p (rtx *operands)
6678 {
6679   if (optimize_size && GET_MODE (operands[4]) == CC_Zmode)
6680     return false;
6681   return brcc_nolimm_operator (operands[0], VOIDmode);
6682 }
6683 
6684 /* ??? Hack.  This should no really be here.  See PR32143.  */
6685 static bool
arc_decl_anon_ns_mem_p(const_tree decl)6686 arc_decl_anon_ns_mem_p (const_tree decl)
6687 {
6688   while (1)
6689     {
6690       if (decl == NULL_TREE || decl == error_mark_node)
6691 	return false;
6692       if (TREE_CODE (decl) == NAMESPACE_DECL
6693 	  && DECL_NAME (decl) == NULL_TREE)
6694 	return true;
6695       /* Classes and namespaces inside anonymous namespaces have
6696 	 TREE_PUBLIC == 0, so we can shortcut the search.  */
6697       else if (TYPE_P (decl))
6698 	return (TREE_PUBLIC (TYPE_NAME (decl)) == 0);
6699       else if (TREE_CODE (decl) == NAMESPACE_DECL)
6700 	return (TREE_PUBLIC (decl) == 0);
6701       else
6702 	decl = DECL_CONTEXT (decl);
6703     }
6704 }
6705 
6706 /* Implement TARGET_IN_SMALL_DATA_P.  Return true if it would be safe to
6707    access DECL using %gp_rel(...)($gp).  */
6708 
6709 static bool
arc_in_small_data_p(const_tree decl)6710 arc_in_small_data_p (const_tree decl)
6711 {
6712   HOST_WIDE_INT size;
6713 
6714   if (TREE_CODE (decl) == STRING_CST || TREE_CODE (decl) == FUNCTION_DECL)
6715     return false;
6716 
6717 
6718   /* We don't yet generate small-data references for -mabicalls.  See related
6719      -G handling in override_options.  */
6720   if (TARGET_NO_SDATA_SET)
6721     return false;
6722 
6723   if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl) != 0)
6724     {
6725       const char *name;
6726 
6727       /* Reject anything that isn't in a known small-data section.  */
6728       name = DECL_SECTION_NAME (decl);
6729       if (strcmp (name, ".sdata") != 0 && strcmp (name, ".sbss") != 0)
6730 	return false;
6731 
6732       /* If a symbol is defined externally, the assembler will use the
6733 	 usual -G rules when deciding how to implement macros.  */
6734       if (!DECL_EXTERNAL (decl))
6735 	  return true;
6736     }
6737   /* Only global variables go into sdata section for now.  */
6738   else if (1)
6739     {
6740       /* Don't put constants into the small data section: we want them
6741 	 to be in ROM rather than RAM.  */
6742       if (TREE_CODE (decl) != VAR_DECL)
6743 	return false;
6744 
6745       if (TREE_READONLY (decl)
6746 	  && !TREE_SIDE_EFFECTS (decl)
6747 	  && (!DECL_INITIAL (decl) || TREE_CONSTANT (DECL_INITIAL (decl))))
6748 	return false;
6749 
6750       /* TREE_PUBLIC might change after the first call, because of the patch
6751 	 for PR19238.  */
6752       if (default_binds_local_p_1 (decl, 1)
6753 	  || arc_decl_anon_ns_mem_p (decl))
6754 	return false;
6755 
6756       /* To ensure -mvolatile-cache works
6757 	 ld.di does not have a gp-relative variant.  */
6758       if (TREE_THIS_VOLATILE (decl))
6759 	return false;
6760     }
6761 
6762   /* Disable sdata references to weak variables.  */
6763   if (DECL_WEAK (decl))
6764     return false;
6765 
6766   size = int_size_in_bytes (TREE_TYPE (decl));
6767 
6768 /*   if (AGGREGATE_TYPE_P (TREE_TYPE (decl))) */
6769 /*     return false; */
6770 
6771   /* Allow only <=4B long data types into sdata.  */
6772   return (size > 0 && size <= 4);
6773 }
6774 
6775 /* Return true if X is a small data address that can be rewritten
6776    as a gp+symref.  */
6777 
6778 static bool
arc_rewrite_small_data_p(const_rtx x)6779 arc_rewrite_small_data_p (const_rtx x)
6780 {
6781   if (GET_CODE (x) == CONST)
6782     x = XEXP (x, 0);
6783 
6784   if (GET_CODE (x) == PLUS)
6785     {
6786       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6787 	x = XEXP (x, 0);
6788     }
6789 
6790   return (GET_CODE (x) ==  SYMBOL_REF
6791 	  && SYMBOL_REF_SMALL_P(x));
6792 }
6793 
6794 /* If possible, rewrite OP so that it refers to small data using
6795    explicit relocations.  */
6796 
6797 rtx
arc_rewrite_small_data(rtx op)6798 arc_rewrite_small_data (rtx op)
6799 {
6800   op = copy_insn (op);
6801   subrtx_ptr_iterator::array_type array;
6802   FOR_EACH_SUBRTX_PTR (iter, array, &op, ALL)
6803     {
6804       rtx *loc = *iter;
6805       if (arc_rewrite_small_data_p (*loc))
6806 	{
6807 	  gcc_assert (SDATA_BASE_REGNUM == PIC_OFFSET_TABLE_REGNUM);
6808 	  *loc = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, *loc);
6809 	  if (loc != &op)
6810 	    {
6811 	      if (GET_CODE (op) == MEM && &XEXP (op, 0) == loc)
6812 		; /* OK.  */
6813 	      else if (GET_CODE (op) == MEM
6814 		       && GET_CODE (XEXP (op, 0)) == PLUS
6815 		       && GET_CODE (XEXP (XEXP (op, 0), 0)) == MULT)
6816 		*loc = force_reg (Pmode, *loc);
6817 	      else
6818 		gcc_unreachable ();
6819 	    }
6820 	  iter.skip_subrtxes ();
6821 	}
6822       else if (GET_CODE (*loc) == PLUS
6823 	       && rtx_equal_p (XEXP (*loc, 0), pic_offset_table_rtx))
6824 	iter.skip_subrtxes ();
6825     }
6826   return op;
6827 }
6828 
6829 /* Return true if OP refers to small data symbols directly, not through
6830    a PLUS.  */
6831 
6832 bool
small_data_pattern(rtx op,machine_mode)6833 small_data_pattern (rtx op, machine_mode)
6834 {
6835   if (GET_CODE (op) == SEQUENCE)
6836     return false;
6837   subrtx_iterator::array_type array;
6838   FOR_EACH_SUBRTX (iter, array, op, ALL)
6839     {
6840       const_rtx x = *iter;
6841       if (GET_CODE (x) == PLUS
6842 	  && rtx_equal_p (XEXP (x, 0), pic_offset_table_rtx))
6843 	iter.skip_subrtxes ();
6844       else if (arc_rewrite_small_data_p (x))
6845 	return true;
6846     }
6847   return false;
6848 }
6849 
6850 /* Return true if OP is an acceptable memory operand for ARCompact
6851    16-bit gp-relative load instructions.
6852    op shd look like : [r26, symref@sda]
6853    i.e. (mem (plus (reg 26) (symref with smalldata flag set))
6854   */
6855 /* volatile cache option still to be handled.  */
6856 
6857 bool
compact_sda_memory_operand(rtx op,machine_mode mode)6858 compact_sda_memory_operand (rtx op, machine_mode mode)
6859 {
6860   rtx addr;
6861   int size;
6862 
6863   /* Eliminate non-memory operations.  */
6864   if (GET_CODE (op) != MEM)
6865     return false;
6866 
6867   if (mode == VOIDmode)
6868     mode = GET_MODE (op);
6869 
6870   size = GET_MODE_SIZE (mode);
6871 
6872   /* dword operations really put out 2 instructions, so eliminate them.  */
6873   if (size > UNITS_PER_WORD)
6874     return false;
6875 
6876   /* Decode the address now.  */
6877   addr = XEXP (op, 0);
6878 
6879   return LEGITIMATE_SMALL_DATA_ADDRESS_P  (addr);
6880 }
6881 
6882 /* Implement ASM_OUTPUT_ALIGNED_DECL_LOCAL.  */
6883 
6884 void
arc_asm_output_aligned_decl_local(FILE * stream,tree decl,const char * name,unsigned HOST_WIDE_INT size,unsigned HOST_WIDE_INT align,unsigned HOST_WIDE_INT globalize_p)6885 arc_asm_output_aligned_decl_local (FILE * stream, tree decl, const char * name,
6886 				   unsigned HOST_WIDE_INT size,
6887 				   unsigned HOST_WIDE_INT align,
6888 				   unsigned HOST_WIDE_INT globalize_p)
6889 {
6890   int in_small_data =   arc_in_small_data_p (decl);
6891 
6892   if (in_small_data)
6893     switch_to_section (get_named_section (NULL, ".sbss", 0));
6894   /*    named_section (0,".sbss",0); */
6895   else
6896     switch_to_section (bss_section);
6897 
6898   if (globalize_p)
6899     (*targetm.asm_out.globalize_label) (stream, name);
6900 
6901   ASM_OUTPUT_ALIGN (stream, floor_log2 ((align) / BITS_PER_UNIT));
6902   ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
6903   ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
6904   ASM_OUTPUT_LABEL (stream, name);
6905 
6906   if (size != 0)
6907     ASM_OUTPUT_SKIP (stream, size);
6908 }
6909 
6910 static bool
arc_preserve_reload_p(rtx in)6911 arc_preserve_reload_p (rtx in)
6912 {
6913   return (GET_CODE (in) == PLUS
6914 	  && RTX_OK_FOR_BASE_P (XEXP (in, 0), true)
6915 	  && CONST_INT_P (XEXP (in, 1))
6916 	  && !((INTVAL (XEXP (in, 1)) & 511)));
6917 }
6918 
6919 int
arc_register_move_cost(machine_mode,enum reg_class from_class,enum reg_class to_class)6920 arc_register_move_cost (machine_mode,
6921 			enum reg_class from_class, enum reg_class to_class)
6922 {
6923   /* The ARC600 has no bypass for extension registers, hence a nop might be
6924      needed to be inserted after a write so that reads are safe.  */
6925   if (TARGET_ARC600)
6926     {
6927       if (to_class == MPY_WRITABLE_CORE_REGS)
6928 	return 3;
6929      /* Instructions modifying LP_COUNT need 4 additional cycles before
6930 	the register will actually contain the value.  */
6931       else if (to_class == LPCOUNT_REG)
6932 	return 6;
6933       else if (to_class == WRITABLE_CORE_REGS)
6934 	return 6;
6935     }
6936 
6937   /* The ARC700 stalls for 3 cycles when *reading* from lp_count.  */
6938   if (TARGET_ARC700
6939       && (from_class == LPCOUNT_REG || from_class == ALL_CORE_REGS
6940 	  || from_class == WRITABLE_CORE_REGS))
6941     return 8;
6942 
6943   /* Force an attempt to 'mov Dy,Dx' to spill.  */
6944   if (TARGET_ARC700 && TARGET_DPFP
6945       && from_class == DOUBLE_REGS && to_class == DOUBLE_REGS)
6946     return 100;
6947 
6948   return 2;
6949 }
6950 
6951 /* Emit code for an addsi3 instruction with OPERANDS.
6952    COND_P indicates if this will use conditional execution.
6953    Return the length of the instruction.
6954    If OUTPUT_P is false, don't actually output the instruction, just return
6955    its length.  */
6956 int
arc_output_addsi(rtx * operands,bool cond_p,bool output_p)6957 arc_output_addsi (rtx *operands, bool cond_p, bool output_p)
6958 {
6959   char format[32];
6960 
6961   int match = operands_match_p (operands[0], operands[1]);
6962   int match2 = operands_match_p (operands[0], operands[2]);
6963   int intval = (REG_P (operands[2]) ? 1
6964 		: CONST_INT_P (operands[2]) ? INTVAL (operands[2]) : 0xbadc057);
6965   int neg_intval = -intval;
6966   int short_0 = satisfies_constraint_Rcq (operands[0]);
6967   int short_p = (!cond_p && short_0 && satisfies_constraint_Rcq (operands[1]));
6968   int ret = 0;
6969 
6970 #define ADDSI_OUTPUT1(FORMAT) do {\
6971   if (output_p) \
6972     output_asm_insn (FORMAT, operands);\
6973   return ret; \
6974 } while (0)
6975 #define ADDSI_OUTPUT(LIST) do {\
6976   if (output_p) \
6977     sprintf LIST;\
6978   ADDSI_OUTPUT1 (format);\
6979   return ret; \
6980 } while (0)
6981 
6982   /* First try to emit a 16 bit insn.  */
6983   ret = 2;
6984   if (!cond_p
6985       /* If we are actually about to output this insn, don't try a 16 bit
6986 	 variant if we already decided that we don't want that
6987 	 (I.e. we upsized this insn to align some following insn.)
6988 	 E.g. add_s r0,sp,70 is 16 bit, but add r0,sp,70 requires a LIMM -
6989 	 but add1 r0,sp,35 doesn't.  */
6990       && (!output_p || (get_attr_length (current_output_insn) & 2)))
6991     {
6992       if (short_p
6993 	  && (REG_P (operands[2])
6994 	      ? (match || satisfies_constraint_Rcq (operands[2]))
6995 	      : (unsigned) intval <= (match ? 127 : 7)))
6996 	ADDSI_OUTPUT1 ("add%? %0,%1,%2");
6997       if (short_0 && REG_P (operands[1]) && match2)
6998 	ADDSI_OUTPUT1 ("add%? %0,%2,%1");
6999       if ((short_0 || REGNO (operands[0]) == STACK_POINTER_REGNUM)
7000 	  && REGNO (operands[1]) == STACK_POINTER_REGNUM && !(intval & ~124))
7001 	ADDSI_OUTPUT1 ("add%? %0,%1,%2");
7002 
7003       if ((short_p && (unsigned) neg_intval <= (match ? 31 : 7))
7004 	  || (REGNO (operands[0]) == STACK_POINTER_REGNUM
7005 	      && match && !(neg_intval & ~124)))
7006 	ADDSI_OUTPUT1 ("sub%? %0,%1,%n2");
7007     }
7008 
7009   /* Now try to emit a 32 bit insn without long immediate.  */
7010   ret = 4;
7011   if (!match && match2 && REG_P (operands[1]))
7012     ADDSI_OUTPUT1 ("add%? %0,%2,%1");
7013   if (match || !cond_p)
7014     {
7015       int limit = (match && !cond_p) ? 0x7ff : 0x3f;
7016       int range_factor = neg_intval & intval;
7017       int shift;
7018 
7019       if (intval == (HOST_WIDE_INT) (HOST_WIDE_INT_M1U << 31))
7020 	ADDSI_OUTPUT1 ("bxor%? %0,%1,31");
7021 
7022       /* If we can use a straight add / sub instead of a {add,sub}[123] of
7023 	 same size, do, so - the insn latency is lower.  */
7024       /* -0x800 is a 12-bit constant for add /add3 / sub / sub3, but
7025 	 0x800 is not.  */
7026       if ((intval >= 0 && intval <= limit)
7027 	       || (intval == -0x800 && limit == 0x7ff))
7028 	ADDSI_OUTPUT1 ("add%? %0,%1,%2");
7029       else if ((intval < 0 && neg_intval <= limit)
7030 	       || (intval == 0x800 && limit == 0x7ff))
7031 	ADDSI_OUTPUT1 ("sub%? %0,%1,%n2");
7032       shift = range_factor >= 8 ? 3 : (range_factor >> 1);
7033       gcc_assert (shift == 0 || shift == 1 || shift == 2 || shift == 3);
7034       gcc_assert ((((1 << shift) - 1) & intval) == 0);
7035       if (((intval < 0 && intval != -0x4000)
7036 	   /* sub[123] is slower than add_s / sub, only use it if it
7037 	      avoids a long immediate.  */
7038 	   && neg_intval <= limit << shift)
7039 	  || (intval == 0x4000 && limit == 0x7ff))
7040 	ADDSI_OUTPUT ((format, "sub%d%%? %%0,%%1,%d",
7041 		       shift, neg_intval >> shift));
7042       else if ((intval >= 0 && intval <= limit << shift)
7043 	       || (intval == -0x4000 && limit == 0x7ff))
7044 	ADDSI_OUTPUT ((format, "add%d%%? %%0,%%1,%d", shift, intval >> shift));
7045     }
7046   /* Try to emit a 16 bit opcode with long immediate.  */
7047   ret = 6;
7048   if (short_p && match)
7049     ADDSI_OUTPUT1 ("add%? %0,%1,%S2");
7050 
7051   /* We have to use a 32 bit opcode, and with a long immediate.  */
7052   ret = 8;
7053   ADDSI_OUTPUT1 (intval < 0 ? "sub%? %0,%1,%n2" : "add%? %0,%1,%S2");
7054 }
7055 
7056 /* Emit code for an commutative_cond_exec instruction with OPERANDS.
7057    Return the length of the instruction.
7058    If OUTPUT_P is false, don't actually output the instruction, just return
7059    its length.  */
7060 int
arc_output_commutative_cond_exec(rtx * operands,bool output_p)7061 arc_output_commutative_cond_exec (rtx *operands, bool output_p)
7062 {
7063   enum rtx_code commutative_op = GET_CODE (operands[3]);
7064   const char *pat = NULL;
7065 
7066   /* Canonical rtl should not have a constant in the first operand position.  */
7067   gcc_assert (!CONSTANT_P (operands[1]));
7068 
7069   switch (commutative_op)
7070     {
7071       case AND:
7072 	if (satisfies_constraint_C1p (operands[2]))
7073 	  pat = "bmsk%? %0,%1,%Z2";
7074 	else if (satisfies_constraint_Ccp (operands[2]))
7075 	  pat = "bclr%? %0,%1,%M2";
7076 	else if (satisfies_constraint_CnL (operands[2]))
7077 	  pat = "bic%? %0,%1,%n2-1";
7078 	break;
7079       case IOR:
7080 	if (satisfies_constraint_C0p (operands[2]))
7081 	  pat = "bset%? %0,%1,%z2";
7082 	break;
7083       case XOR:
7084 	if (satisfies_constraint_C0p (operands[2]))
7085 	  pat = "bxor%? %0,%1,%z2";
7086 	break;
7087       case PLUS:
7088 	return arc_output_addsi (operands, true, output_p);
7089       default: break;
7090     }
7091   if (output_p)
7092     output_asm_insn (pat ? pat : "%O3.%d5 %0,%1,%2", operands);
7093   if (pat || REG_P (operands[2]) || satisfies_constraint_L (operands[2]))
7094     return 4;
7095   return 8;
7096 }
7097 
7098 /* Helper function of arc_expand_movmem.  ADDR points to a chunk of memory.
7099    Emit code and return an potentially modified address such that offsets
7100    up to SIZE are can be added to yield a legitimate address.
7101    if REUSE is set, ADDR is a register that may be modified.  */
7102 
7103 static rtx
force_offsettable(rtx addr,HOST_WIDE_INT size,bool reuse)7104 force_offsettable (rtx addr, HOST_WIDE_INT size, bool reuse)
7105 {
7106   rtx base = addr;
7107   rtx offs = const0_rtx;
7108 
7109   if (GET_CODE (base) == PLUS)
7110     {
7111       offs = XEXP (base, 1);
7112       base = XEXP (base, 0);
7113     }
7114   if (!REG_P (base)
7115       || (REGNO (base) != STACK_POINTER_REGNUM
7116 	  && REGNO_PTR_FRAME_P (REGNO (addr)))
7117       || !CONST_INT_P (offs) || !SMALL_INT (INTVAL (offs))
7118       || !SMALL_INT (INTVAL (offs) + size))
7119     {
7120       if (reuse)
7121 	emit_insn (gen_add2_insn (addr, offs));
7122       else
7123 	addr = copy_to_mode_reg (Pmode, addr);
7124     }
7125   return addr;
7126 }
7127 
7128 /* Like move_by_pieces, but take account of load latency, and actual
7129    offset ranges.  Return true on success.  */
7130 
7131 bool
arc_expand_movmem(rtx * operands)7132 arc_expand_movmem (rtx *operands)
7133 {
7134   rtx dst = operands[0];
7135   rtx src = operands[1];
7136   rtx dst_addr, src_addr;
7137   HOST_WIDE_INT size;
7138   int align = INTVAL (operands[3]);
7139   unsigned n_pieces;
7140   int piece = align;
7141   rtx store[2];
7142   rtx tmpx[2];
7143   int i;
7144 
7145   if (!CONST_INT_P (operands[2]))
7146     return false;
7147   size = INTVAL (operands[2]);
7148   /* move_by_pieces_ninsns is static, so we can't use it.  */
7149   if (align >= 4)
7150     {
7151       if (TARGET_LL64)
7152 	n_pieces = (size + 4) / 8U + ((size >> 1) & 1) + (size & 1);
7153       else
7154 	n_pieces = (size + 2) / 4U + (size & 1);
7155     }
7156   else if (align == 2)
7157     n_pieces = (size + 1) / 2U;
7158   else
7159     n_pieces = size;
7160   if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15))
7161     return false;
7162   /* Force 32 bit aligned and larger datum to use 64 bit transfers, if
7163      possible.  */
7164   if (TARGET_LL64 && (piece >= 4) && (size >= 8))
7165     piece = 8;
7166   else if (piece > 4)
7167     piece = 4;
7168   dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0);
7169   src_addr = force_offsettable (XEXP (operands[1], 0), size, 0);
7170   store[0] = store[1] = NULL_RTX;
7171   tmpx[0] = tmpx[1] = NULL_RTX;
7172   for (i = 0; size > 0; i ^= 1, size -= piece)
7173     {
7174       rtx tmp;
7175       machine_mode mode;
7176 
7177       while (piece > size)
7178 	piece >>= 1;
7179       mode = smallest_mode_for_size (piece * BITS_PER_UNIT, MODE_INT);
7180       /* If we don't re-use temporaries, the scheduler gets carried away,
7181 	 and the register pressure gets unnecessarily high.  */
7182       if (0 && tmpx[i] && GET_MODE (tmpx[i]) == mode)
7183 	tmp = tmpx[i];
7184       else
7185 	tmpx[i] = tmp = gen_reg_rtx (mode);
7186       dst_addr = force_offsettable (dst_addr, piece, 1);
7187       src_addr = force_offsettable (src_addr, piece, 1);
7188       if (store[i])
7189 	emit_insn (store[i]);
7190       emit_move_insn (tmp, change_address (src, mode, src_addr));
7191       store[i] = gen_move_insn (change_address (dst, mode, dst_addr), tmp);
7192       dst_addr = plus_constant (Pmode, dst_addr, piece);
7193       src_addr = plus_constant (Pmode, src_addr, piece);
7194     }
7195   if (store[i])
7196     emit_insn (store[i]);
7197   if (store[i^1])
7198     emit_insn (store[i^1]);
7199   return true;
7200 }
7201 
7202 /* Prepare operands for move in MODE.  Return true iff the move has
7203    been emitted.  */
7204 
7205 bool
prepare_move_operands(rtx * operands,machine_mode mode)7206 prepare_move_operands (rtx *operands, machine_mode mode)
7207 {
7208   /* We used to do this only for MODE_INT Modes, but addresses to floating
7209      point variables may well be in the small data section.  */
7210   if (1)
7211     {
7212       if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[0], Pmode))
7213 	operands[0] = arc_rewrite_small_data (operands[0]);
7214       else if (mode == SImode && flag_pic && SYMBOLIC_CONST (operands[1]))
7215 	{
7216 	  emit_pic_move (operands, SImode);
7217 
7218 	  /* Disable any REG_EQUALs associated with the symref
7219 	     otherwise the optimization pass undoes the work done
7220 	     here and references the variable directly.  */
7221 	}
7222       else if (GET_CODE (operands[0]) != MEM
7223 	       && !TARGET_NO_SDATA_SET
7224 	       && small_data_pattern (operands[1], Pmode))
7225        {
7226 	  /* This is to take care of address calculations involving sdata
7227 	     variables.  */
7228 	  operands[1] = arc_rewrite_small_data (operands[1]);
7229 
7230 	  emit_insn (gen_rtx_SET (operands[0],operands[1]));
7231 	  /* ??? This note is useless, since it only restates the set itself.
7232 	     We should rather use the original SYMBOL_REF.  However, there is
7233 	     the problem that we are lying to the compiler about these
7234 	     SYMBOL_REFs to start with.  symbol@sda should be encoded specially
7235 	     so that we can tell it apart from an actual symbol.  */
7236 	  set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]);
7237 
7238 	  /* Take care of the REG_EQUAL note that will be attached to mark the
7239 	     output reg equal to the initial symbol_ref after this code is
7240 	     executed.  */
7241 	  emit_move_insn (operands[0], operands[0]);
7242 	  return true;
7243 	}
7244     }
7245 
7246   if (MEM_P (operands[0])
7247       && !(reload_in_progress || reload_completed))
7248     {
7249       operands[1] = force_reg (mode, operands[1]);
7250       if (!move_dest_operand (operands[0], mode))
7251 	{
7252 	  rtx addr = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
7253 	  /* This is like change_address_1 (operands[0], mode, 0, 1) ,
7254 	     except that we can't use that function because it is static.  */
7255 	  rtx pat = change_address (operands[0], mode, addr);
7256 	  MEM_COPY_ATTRIBUTES (pat, operands[0]);
7257 	  operands[0] = pat;
7258 	}
7259       if (!cse_not_expected)
7260 	{
7261 	  rtx pat = XEXP (operands[0], 0);
7262 
7263 	  pat = arc_legitimize_address_0 (pat, pat, mode);
7264 	  if (pat)
7265 	    {
7266 	      pat = change_address (operands[0], mode, pat);
7267 	      MEM_COPY_ATTRIBUTES (pat, operands[0]);
7268 	      operands[0] = pat;
7269 	    }
7270 	}
7271     }
7272 
7273   if (MEM_P (operands[1]) && !cse_not_expected)
7274     {
7275       rtx pat = XEXP (operands[1], 0);
7276 
7277       pat = arc_legitimize_address_0 (pat, pat, mode);
7278       if (pat)
7279 	{
7280 	  pat = change_address (operands[1], mode, pat);
7281 	  MEM_COPY_ATTRIBUTES (pat, operands[1]);
7282 	  operands[1] = pat;
7283 	}
7284     }
7285 
7286   return false;
7287 }
7288 
7289 /* Prepare OPERANDS for an extension using CODE to OMODE.
7290    Return true iff the move has been emitted.  */
7291 
7292 bool
prepare_extend_operands(rtx * operands,enum rtx_code code,machine_mode omode)7293 prepare_extend_operands (rtx *operands, enum rtx_code code,
7294 			 machine_mode omode)
7295 {
7296   if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[1], Pmode))
7297     {
7298       /* This is to take care of address calculations involving sdata
7299 	 variables.  */
7300       operands[1]
7301 	= gen_rtx_fmt_e (code, omode, arc_rewrite_small_data (operands[1]));
7302       emit_insn (gen_rtx_SET (operands[0], operands[1]));
7303       set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]);
7304 
7305       /* Take care of the REG_EQUAL note that will be attached to mark the
7306 	 output reg equal to the initial extension after this code is
7307 	 executed.  */
7308       emit_move_insn (operands[0], operands[0]);
7309       return true;
7310     }
7311   return false;
7312 }
7313 
7314 /* Output a library call to a function called FNAME that has been arranged
7315    to be local to any dso.  */
7316 
7317 const char *
arc_output_libcall(const char * fname)7318 arc_output_libcall (const char *fname)
7319 {
7320   unsigned len = strlen (fname);
7321   static char buf[64];
7322 
7323   gcc_assert (len < sizeof buf - 35);
7324   if (TARGET_LONG_CALLS_SET
7325      || (TARGET_MEDIUM_CALLS && arc_ccfsm_cond_exec_p ()))
7326     {
7327       if (flag_pic)
7328 	sprintf (buf, "add r12,pcl,@%s-(.&-4)\n\tjl%%!%%* [r12]", fname);
7329       else
7330 	sprintf (buf, "jl%%! @%s", fname);
7331     }
7332   else
7333     sprintf (buf, "bl%%!%%* @%s", fname);
7334   return buf;
7335 }
7336 
7337 /* Return the SImode highpart of the DImode value IN.  */
7338 
7339 rtx
disi_highpart(rtx in)7340 disi_highpart (rtx in)
7341 {
7342   return simplify_gen_subreg (SImode, in, DImode, TARGET_BIG_ENDIAN ? 0 : 4);
7343 }
7344 
7345 /* Return length adjustment for INSN.
7346    For ARC600:
7347    A write to a core reg greater or equal to 32 must not be immediately
7348    followed by a use.  Anticipate the length requirement to insert a nop
7349    between PRED and SUCC to prevent a hazard.  */
7350 
7351 static int
arc600_corereg_hazard(rtx_insn * pred,rtx_insn * succ)7352 arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ)
7353 {
7354   if (!TARGET_ARC600)
7355     return 0;
7356   /* If SUCC is a doloop_end_i with a preceding label, we must output a nop
7357      in front of SUCC anyway, so there will be separation between PRED and
7358      SUCC.  */
7359   if (recog_memoized (succ) == CODE_FOR_doloop_end_i
7360       && LABEL_P (prev_nonnote_insn (succ)))
7361     return 0;
7362   if (recog_memoized (succ) == CODE_FOR_doloop_begin_i)
7363     return 0;
7364   if (GET_CODE (PATTERN (pred)) == SEQUENCE)
7365     pred = as_a <rtx_sequence *> (PATTERN (pred))->insn (1);
7366   if (GET_CODE (PATTERN (succ)) == SEQUENCE)
7367     succ = as_a <rtx_sequence *> (PATTERN (succ))->insn (0);
7368   if (recog_memoized (pred) == CODE_FOR_mulsi_600
7369       || recog_memoized (pred) == CODE_FOR_umul_600
7370       || recog_memoized (pred) == CODE_FOR_mac_600
7371       || recog_memoized (pred) == CODE_FOR_mul64_600
7372       || recog_memoized (pred) == CODE_FOR_mac64_600
7373       || recog_memoized (pred) == CODE_FOR_umul64_600
7374       || recog_memoized (pred) == CODE_FOR_umac64_600)
7375     return 0;
7376   subrtx_iterator::array_type array;
7377   FOR_EACH_SUBRTX (iter, array, PATTERN (pred), NONCONST)
7378     {
7379       const_rtx x = *iter;
7380       switch (GET_CODE (x))
7381 	{
7382 	case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
7383 	  break;
7384 	default:
7385 	  /* This is also fine for PRE/POST_MODIFY, because they
7386 	     contain a SET.  */
7387 	  continue;
7388 	}
7389       rtx dest = XEXP (x, 0);
7390       /* Check if this sets a an extension register.  N.B. we use 61 for the
7391 	 condition codes, which is definitely not an extension register.  */
7392       if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61
7393 	  /* Check if the same register is used by the PAT.  */
7394 	  && (refers_to_regno_p
7395 	      (REGNO (dest),
7396 	       REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U,
7397 	       PATTERN (succ), 0)))
7398 	return 4;
7399     }
7400   return 0;
7401 }
7402 
7403 /* Given a rtx, check if it is an assembly instruction or not.  */
7404 
7405 static int
arc_asm_insn_p(rtx x)7406 arc_asm_insn_p (rtx x)
7407 {
7408   int i, j;
7409 
7410   if (x == 0)
7411     return 0;
7412 
7413   switch (GET_CODE (x))
7414     {
7415     case ASM_OPERANDS:
7416     case ASM_INPUT:
7417       return 1;
7418 
7419     case SET:
7420       return arc_asm_insn_p (SET_SRC (x));
7421 
7422     case PARALLEL:
7423       j = 0;
7424       for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
7425 	j += arc_asm_insn_p (XVECEXP (x, 0, i));
7426       if ( j > 0)
7427 	return 1;
7428       break;
7429 
7430     default:
7431       break;
7432     }
7433 
7434   return 0;
7435 }
7436 
7437 /* We might have a CALL to a non-returning function before a loop end.
7438    ??? Although the manual says that's OK (the target is outside the
7439    loop, and the loop counter unused there), the assembler barfs on
7440    this for ARC600, so we must insert a nop before such a call too.
7441    For ARC700, and ARCv2 is not allowed to have the last ZOL
7442    instruction a jump to a location where lp_count is modified.  */
7443 
7444 static bool
arc_loop_hazard(rtx_insn * pred,rtx_insn * succ)7445 arc_loop_hazard (rtx_insn *pred, rtx_insn *succ)
7446 {
7447   rtx_insn *jump  = NULL;
7448   rtx label_rtx = NULL_RTX;
7449   rtx_insn *label = NULL;
7450   basic_block succ_bb;
7451 
7452   if (recog_memoized (succ) != CODE_FOR_doloop_end_i)
7453     return false;
7454 
7455   /* Phase 1: ARC600 and ARCv2HS doesn't allow any control instruction
7456      (i.e., jump/call) as the last instruction of a ZOL.  */
7457   if (TARGET_ARC600 || TARGET_HS)
7458     if (JUMP_P (pred) || CALL_P (pred)
7459 	|| arc_asm_insn_p (PATTERN (pred))
7460 	|| GET_CODE (PATTERN (pred)) == SEQUENCE)
7461       return true;
7462 
7463   /* Phase 2: Any architecture, it is not allowed to have the last ZOL
7464      instruction a jump to a location where lp_count is modified.  */
7465 
7466   /* Phase 2a: Dig for the jump instruction.  */
7467   if (JUMP_P (pred))
7468     jump = pred;
7469   else if (GET_CODE (PATTERN (pred)) == SEQUENCE
7470 	   && JUMP_P (XVECEXP (PATTERN (pred), 0, 0)))
7471     jump = as_a <rtx_insn *> XVECEXP (PATTERN (pred), 0, 0);
7472   else
7473     return false;
7474 
7475   /* Phase 2b: Make sure is not a millicode jump.  */
7476   if ((GET_CODE (PATTERN (jump)) == PARALLEL)
7477       && (XVECEXP (PATTERN (jump), 0, 0) == ret_rtx))
7478     return false;
7479 
7480   label_rtx = JUMP_LABEL (jump);
7481   if (!label_rtx)
7482     return false;
7483 
7484   /* Phase 2c: Make sure is not a return.  */
7485   if (ANY_RETURN_P (label_rtx))
7486     return false;
7487 
7488   /* Pahse 2d: Go to the target of the jump and check for aliveness of
7489      LP_COUNT register.  */
7490   label = safe_as_a <rtx_insn *> (label_rtx);
7491   succ_bb = BLOCK_FOR_INSN (label);
7492   if (!succ_bb)
7493     {
7494       gcc_assert (NEXT_INSN (label));
7495       if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (label)))
7496 	succ_bb = NOTE_BASIC_BLOCK (NEXT_INSN (label));
7497       else
7498 	succ_bb = BLOCK_FOR_INSN (NEXT_INSN (label));
7499     }
7500 
7501   if (succ_bb && REGNO_REG_SET_P (df_get_live_out (succ_bb), LP_COUNT))
7502     return true;
7503 
7504   return false;
7505 }
7506 
7507 /* For ARC600:
7508    A write to a core reg greater or equal to 32 must not be immediately
7509    followed by a use.  Anticipate the length requirement to insert a nop
7510    between PRED and SUCC to prevent a hazard.  */
7511 
7512 int
arc_hazard(rtx_insn * pred,rtx_insn * succ)7513 arc_hazard (rtx_insn *pred, rtx_insn *succ)
7514 {
7515   if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
7516     return 0;
7517 
7518   if (arc_loop_hazard (pred, succ))
7519     return 4;
7520 
7521   if (TARGET_ARC600)
7522     return arc600_corereg_hazard (pred, succ);
7523 
7524   return 0;
7525 }
7526 
7527 /* Return length adjustment for INSN.  */
7528 
7529 int
arc_adjust_insn_length(rtx_insn * insn,int len,bool)7530 arc_adjust_insn_length (rtx_insn *insn, int len, bool)
7531 {
7532   if (!INSN_P (insn))
7533     return len;
7534   /* We already handle sequences by ignoring the delay sequence flag.  */
7535   if (GET_CODE (PATTERN (insn)) == SEQUENCE)
7536     return len;
7537 
7538   /* It is impossible to jump to the very end of a Zero-Overhead Loop, as
7539      the ZOL mechanism only triggers when advancing to the end address,
7540      so if there's a label at the end of a ZOL, we need to insert a nop.
7541      The ARC600 ZOL also has extra restrictions on jumps at the end of a
7542      loop.  */
7543   if (recog_memoized (insn) == CODE_FOR_doloop_end_i)
7544     {
7545       rtx_insn *prev = prev_nonnote_insn (insn);
7546 
7547       return ((LABEL_P (prev)
7548 	       || (TARGET_ARC600
7549 		   && (JUMP_P (prev)
7550 		       || CALL_P (prev) /* Could be a noreturn call.  */
7551 		       || (NONJUMP_INSN_P (prev)
7552 			   && GET_CODE (PATTERN (prev)) == SEQUENCE))))
7553 	      ? len + 4 : len);
7554     }
7555 
7556   /* Check for return with but one preceding insn since function
7557      start / call.  */
7558   if (TARGET_PAD_RETURN
7559       && JUMP_P (insn)
7560       && GET_CODE (PATTERN (insn)) != ADDR_VEC
7561       && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7562       && get_attr_type (insn) == TYPE_RETURN)
7563     {
7564       rtx_insn *prev = prev_active_insn (insn);
7565 
7566       if (!prev || !(prev = prev_active_insn (prev))
7567 	  || ((NONJUMP_INSN_P (prev)
7568 	       && GET_CODE (PATTERN (prev)) == SEQUENCE)
7569 	      ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
7570 			   NON_SIBCALL)
7571 	      : CALL_ATTR (prev, NON_SIBCALL)))
7572 	return len + 4;
7573     }
7574   if (TARGET_ARC600)
7575     {
7576       rtx_insn *succ = next_real_insn (insn);
7577 
7578       /* One the ARC600, a write to an extension register must be separated
7579 	 from a read.  */
7580       if (succ && INSN_P (succ))
7581 	len += arc600_corereg_hazard (insn, succ);
7582     }
7583 
7584   /* Restore extracted operands - otherwise splitters like the addsi3_mixed one
7585      can go awry.  */
7586   extract_constrain_insn_cached (insn);
7587 
7588   return len;
7589 }
7590 
7591 /* Values for length_sensitive.  */
7592 enum
7593 {
7594   ARC_LS_NONE,// Jcc
7595   ARC_LS_25, // 25 bit offset, B
7596   ARC_LS_21, // 21 bit offset, Bcc
7597   ARC_LS_U13,// 13 bit unsigned offset, LP
7598   ARC_LS_10, // 10 bit offset, B_s, Beq_s, Bne_s
7599   ARC_LS_9,  //  9 bit offset, BRcc
7600   ARC_LS_8,  //  8 bit offset, BRcc_s
7601   ARC_LS_U7, //  7 bit unsigned offset, LPcc
7602   ARC_LS_7   //  7 bit offset, Bcc_s
7603 };
7604 
7605 /* While the infrastructure patch is waiting for review, duplicate the
7606    struct definitions, to allow this file to compile.  */
7607 #if 1
7608 typedef struct
7609 {
7610   unsigned align_set;
7611   /* Cost as a branch / call target or call return address.  */
7612   int target_cost;
7613   int fallthrough_cost;
7614   int branch_cost;
7615   int length;
7616   /* 0 for not length sensitive, 1 for largest offset range,
7617  *      2 for next smaller etc.  */
7618   unsigned length_sensitive : 8;
7619   bool enabled;
7620 } insn_length_variant_t;
7621 
7622 typedef struct insn_length_parameters_s
7623 {
7624   int align_unit_log;
7625   int align_base_log;
7626   int max_variants;
7627   int (*get_variants) (rtx_insn *, int, bool, bool, insn_length_variant_t *);
7628 } insn_length_parameters_t;
7629 
7630 static void
7631 arc_insn_length_parameters (insn_length_parameters_t *ilp) ATTRIBUTE_UNUSED;
7632 #endif
7633 
7634 static int
arc_get_insn_variants(rtx_insn * insn,int len,bool,bool target_p,insn_length_variant_t * ilv)7635 arc_get_insn_variants (rtx_insn *insn, int len, bool, bool target_p,
7636 		       insn_length_variant_t *ilv)
7637 {
7638   if (!NONDEBUG_INSN_P (insn))
7639     return 0;
7640   enum attr_type type;
7641   /* shorten_branches doesn't take optimize_size into account yet for the
7642      get_variants mechanism, so turn this off for now.  */
7643   if (optimize_size)
7644     return 0;
7645   if (rtx_sequence *pat = dyn_cast <rtx_sequence *> (PATTERN (insn)))
7646     {
7647       /* The interaction of a short delay slot insn with a short branch is
7648 	 too weird for shorten_branches to piece together, so describe the
7649 	 entire SEQUENCE.  */
7650       rtx_insn *inner;
7651       if (TARGET_UPSIZE_DBR
7652 	  && get_attr_length (pat->insn (1)) <= 2
7653 	  && (((type = get_attr_type (inner = pat->insn (0)))
7654 	       == TYPE_UNCOND_BRANCH)
7655 	      || type == TYPE_BRANCH)
7656 	  && get_attr_delay_slot_filled (inner) == DELAY_SLOT_FILLED_YES)
7657 	{
7658 	  int n_variants
7659 	    = arc_get_insn_variants (inner, get_attr_length (inner), true,
7660 				     target_p, ilv+1);
7661 	  /* The short variant gets split into a higher-cost aligned
7662 	     and a lower cost unaligned variant.  */
7663 	  gcc_assert (n_variants);
7664 	  gcc_assert (ilv[1].length_sensitive == ARC_LS_7
7665 		      || ilv[1].length_sensitive == ARC_LS_10);
7666 	  gcc_assert (ilv[1].align_set == 3);
7667 	  ilv[0] = ilv[1];
7668 	  ilv[0].align_set = 1;
7669 	  ilv[0].branch_cost += 1;
7670 	  ilv[1].align_set = 2;
7671 	  n_variants++;
7672 	  for (int i = 0; i < n_variants; i++)
7673 	    ilv[i].length += 2;
7674 	  /* In case an instruction with aligned size is wanted, and
7675 	     the short variants are unavailable / too expensive, add
7676 	     versions of long branch + long delay slot.  */
7677 	  for (int i = 2, end = n_variants; i < end; i++, n_variants++)
7678 	    {
7679 	      ilv[n_variants] = ilv[i];
7680 	      ilv[n_variants].length += 2;
7681 	    }
7682 	  return n_variants;
7683 	}
7684       return 0;
7685     }
7686   insn_length_variant_t *first_ilv = ilv;
7687   type = get_attr_type (insn);
7688   bool delay_filled
7689     = (get_attr_delay_slot_filled (insn) == DELAY_SLOT_FILLED_YES);
7690   int branch_align_cost = delay_filled ? 0 : 1;
7691   int branch_unalign_cost = delay_filled ? 0 : TARGET_UNALIGN_BRANCH ? 0 : 1;
7692   /* If the previous instruction is an sfunc call, this insn is always
7693      a target, even though the middle-end is unaware of this.  */
7694   bool force_target = false;
7695   rtx_insn *prev = prev_active_insn (insn);
7696   if (prev && arc_next_active_insn (prev, 0) == insn
7697       && ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE)
7698 	  ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
7699 		       NON_SIBCALL)
7700 	  : (CALL_ATTR (prev, NON_SIBCALL)
7701 	     && NEXT_INSN (PREV_INSN (prev)) == prev)))
7702     force_target = true;
7703 
7704   switch (type)
7705     {
7706     case TYPE_BRCC:
7707       /* Short BRCC only comes in no-delay-slot version, and without limm  */
7708       if (!delay_filled)
7709 	{
7710 	  ilv->align_set = 3;
7711 	  ilv->length = 2;
7712 	  ilv->branch_cost = 1;
7713 	  ilv->enabled = (len == 2);
7714 	  ilv->length_sensitive = ARC_LS_8;
7715 	  ilv++;
7716 	}
7717       /* Fall through.  */
7718     case TYPE_BRCC_NO_DELAY_SLOT:
7719       /* doloop_fallback* patterns are TYPE_BRCC_NO_DELAY_SLOT for
7720 	 (delay slot) scheduling purposes, but they are longer.  */
7721       if (GET_CODE (PATTERN (insn)) == PARALLEL
7722 	  && GET_CODE (XVECEXP (PATTERN (insn), 0, 1)) == SET)
7723 	return 0;
7724       /* Standard BRCC: 4 bytes, or 8 bytes with limm.  */
7725       ilv->length = ((type == TYPE_BRCC) ? 4 : 8);
7726       ilv->align_set = 3;
7727       ilv->branch_cost = branch_align_cost;
7728       ilv->enabled = (len <= ilv->length);
7729       ilv->length_sensitive = ARC_LS_9;
7730       if ((target_p || force_target)
7731 	  || (!delay_filled && TARGET_UNALIGN_BRANCH))
7732 	{
7733 	  ilv[1] = *ilv;
7734 	  ilv->align_set = 1;
7735 	  ilv++;
7736 	  ilv->align_set = 2;
7737 	  ilv->target_cost = 1;
7738 	  ilv->branch_cost = branch_unalign_cost;
7739 	}
7740       ilv++;
7741 
7742       rtx op, op0;
7743       op = XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 0);
7744       op0 = XEXP (op, 0);
7745 
7746       if (GET_CODE (op0) == ZERO_EXTRACT
7747 	  && satisfies_constraint_L (XEXP (op0, 2)))
7748 	op0 = XEXP (op0, 0);
7749       if (satisfies_constraint_Rcq (op0))
7750 	{
7751 	  ilv->length = ((type == TYPE_BRCC) ? 6 : 10);
7752 	  ilv->align_set = 3;
7753 	  ilv->branch_cost = 1 + branch_align_cost;
7754 	  ilv->fallthrough_cost = 1;
7755 	  ilv->enabled = true;
7756 	  ilv->length_sensitive = ARC_LS_21;
7757 	  if (!delay_filled && TARGET_UNALIGN_BRANCH)
7758 	    {
7759 	      ilv[1] = *ilv;
7760 	      ilv->align_set = 1;
7761 	      ilv++;
7762 	      ilv->align_set = 2;
7763 	      ilv->branch_cost = 1 + branch_unalign_cost;
7764 	    }
7765 	  ilv++;
7766 	}
7767       ilv->length = ((type == TYPE_BRCC) ? 8 : 12);
7768       ilv->align_set = 3;
7769       ilv->branch_cost = 1 + branch_align_cost;
7770       ilv->fallthrough_cost = 1;
7771       ilv->enabled = true;
7772       ilv->length_sensitive = ARC_LS_21;
7773       if ((target_p || force_target)
7774 	  || (!delay_filled && TARGET_UNALIGN_BRANCH))
7775 	{
7776 	  ilv[1] = *ilv;
7777 	  ilv->align_set = 1;
7778 	  ilv++;
7779 	  ilv->align_set = 2;
7780 	  ilv->target_cost = 1;
7781 	  ilv->branch_cost = 1 + branch_unalign_cost;
7782 	}
7783       ilv++;
7784       break;
7785 
7786     case TYPE_SFUNC:
7787       ilv->length = 12;
7788       goto do_call;
7789     case TYPE_CALL_NO_DELAY_SLOT:
7790       ilv->length = 8;
7791       goto do_call;
7792     case TYPE_CALL:
7793       ilv->length = 4;
7794       ilv->length_sensitive
7795 	= GET_CODE (PATTERN (insn)) == COND_EXEC ? ARC_LS_21 : ARC_LS_25;
7796     do_call:
7797       ilv->align_set = 3;
7798       ilv->fallthrough_cost = branch_align_cost;
7799       ilv->enabled = true;
7800       if ((target_p || force_target)
7801 	  || (!delay_filled && TARGET_UNALIGN_BRANCH))
7802 	{
7803 	  ilv[1] = *ilv;
7804 	  ilv->align_set = 1;
7805 	  ilv++;
7806 	  ilv->align_set = 2;
7807 	  ilv->target_cost = 1;
7808 	  ilv->fallthrough_cost = branch_unalign_cost;
7809 	}
7810       ilv++;
7811       break;
7812     case TYPE_UNCOND_BRANCH:
7813       /* Strictly speaking, this should be ARC_LS_10 for equality comparisons,
7814 	 but that makes no difference at the moment.  */
7815       ilv->length_sensitive = ARC_LS_7;
7816       ilv[1].length_sensitive = ARC_LS_25;
7817       goto do_branch;
7818     case TYPE_BRANCH:
7819       ilv->length_sensitive = ARC_LS_10;
7820       ilv[1].length_sensitive = ARC_LS_21;
7821     do_branch:
7822       ilv->align_set = 3;
7823       ilv->length = 2;
7824       ilv->branch_cost = branch_align_cost;
7825       ilv->enabled = (len == ilv->length);
7826       ilv++;
7827       ilv->length = 4;
7828       ilv->align_set = 3;
7829       ilv->branch_cost = branch_align_cost;
7830       ilv->enabled = true;
7831       if ((target_p || force_target)
7832 	  || (!delay_filled && TARGET_UNALIGN_BRANCH))
7833 	{
7834 	  ilv[1] = *ilv;
7835 	  ilv->align_set = 1;
7836 	  ilv++;
7837 	  ilv->align_set = 2;
7838 	  ilv->target_cost = 1;
7839 	  ilv->branch_cost = branch_unalign_cost;
7840 	}
7841       ilv++;
7842       break;
7843     case TYPE_JUMP:
7844       return 0;
7845     default:
7846       /* For every short insn, there is generally also a long insn.
7847 	 trap_s is an exception.  */
7848       if ((len & 2) == 0 || recog_memoized (insn) == CODE_FOR_trap_s)
7849 	return 0;
7850       ilv->align_set = 3;
7851       ilv->length = len;
7852       ilv->enabled = 1;
7853       ilv++;
7854       ilv->align_set = 3;
7855       ilv->length = len + 2;
7856       ilv->enabled = 1;
7857       if (target_p || force_target)
7858 	{
7859 	  ilv[1] = *ilv;
7860 	  ilv->align_set = 1;
7861 	  ilv++;
7862 	  ilv->align_set = 2;
7863 	  ilv->target_cost = 1;
7864 	}
7865       ilv++;
7866     }
7867   /* If the previous instruction is an sfunc call, this insn is always
7868      a target, even though the middle-end is unaware of this.
7869      Therefore, if we have a call predecessor, transfer the target cost
7870      to the fallthrough and branch costs.  */
7871   if (force_target)
7872     {
7873       for (insn_length_variant_t *p = first_ilv; p < ilv; p++)
7874 	{
7875 	  p->fallthrough_cost += p->target_cost;
7876 	  p->branch_cost += p->target_cost;
7877 	  p->target_cost = 0;
7878 	}
7879     }
7880 
7881   return ilv - first_ilv;
7882 }
7883 
7884 static void
arc_insn_length_parameters(insn_length_parameters_t * ilp)7885 arc_insn_length_parameters (insn_length_parameters_t *ilp)
7886 {
7887   ilp->align_unit_log = 1;
7888   ilp->align_base_log = 1;
7889   ilp->max_variants = 7;
7890   ilp->get_variants = arc_get_insn_variants;
7891 }
7892 
7893 /* Return a copy of COND from *STATEP, inverted if that is indicated by the
7894    CC field of *STATEP.  */
7895 
7896 static rtx
arc_get_ccfsm_cond(struct arc_ccfsm * statep,bool reverse)7897 arc_get_ccfsm_cond (struct arc_ccfsm *statep, bool reverse)
7898 {
7899   rtx cond = statep->cond;
7900   int raw_cc = get_arc_condition_code (cond);
7901   if (reverse)
7902     raw_cc = ARC_INVERSE_CONDITION_CODE (raw_cc);
7903 
7904   if (statep->cc == raw_cc)
7905     return copy_rtx (cond);
7906 
7907   gcc_assert (ARC_INVERSE_CONDITION_CODE (raw_cc) == statep->cc);
7908 
7909   machine_mode ccm = GET_MODE (XEXP (cond, 0));
7910   enum rtx_code code = reverse_condition (GET_CODE (cond));
7911   if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode)
7912     code = reverse_condition_maybe_unordered (GET_CODE (cond));
7913 
7914   return gen_rtx_fmt_ee (code, GET_MODE (cond),
7915 			 copy_rtx (XEXP (cond, 0)), copy_rtx (XEXP (cond, 1)));
7916 }
7917 
7918 /* Return version of PAT conditionalized with COND, which is part of INSN.
7919    ANNULLED indicates if INSN is an annulled delay-slot insn.
7920    Register further changes if necessary.  */
7921 static rtx
conditionalize_nonjump(rtx pat,rtx cond,rtx insn,bool annulled)7922 conditionalize_nonjump (rtx pat, rtx cond, rtx insn, bool annulled)
7923 {
7924   /* For commutative operators, we generally prefer to have
7925      the first source match the destination.  */
7926   if (GET_CODE (pat) == SET)
7927     {
7928       rtx src = SET_SRC (pat);
7929 
7930       if (COMMUTATIVE_P (src))
7931 	{
7932 	  rtx src0 = XEXP (src, 0);
7933 	  rtx src1 = XEXP (src, 1);
7934 	  rtx dst = SET_DEST (pat);
7935 
7936 	  if (rtx_equal_p (src1, dst) && !rtx_equal_p (src0, dst)
7937 	      /* Leave add_n alone - the canonical form is to
7938 		 have the complex summand first.  */
7939 	      && REG_P (src0))
7940 	    pat = gen_rtx_SET (dst,
7941 			       gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src),
7942 					       src1, src0));
7943 	}
7944     }
7945 
7946   /* dwarf2out.c:dwarf2out_frame_debug_expr doesn't know
7947      what to do with COND_EXEC.  */
7948   if (RTX_FRAME_RELATED_P (insn))
7949     {
7950       /* If this is the delay slot insn of an anulled branch,
7951 	 dwarf2out.c:scan_trace understands the anulling semantics
7952 	 without the COND_EXEC.  */
7953       gcc_assert (annulled);
7954       rtx note = alloc_reg_note (REG_FRAME_RELATED_EXPR, pat,
7955 				 REG_NOTES (insn));
7956       validate_change (insn, &REG_NOTES (insn), note, 1);
7957     }
7958   pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat);
7959   return pat;
7960 }
7961 
7962 /* Use the ccfsm machinery to do if conversion.  */
7963 
7964 static unsigned
arc_ifcvt(void)7965 arc_ifcvt (void)
7966 {
7967   struct arc_ccfsm *statep = &cfun->machine->ccfsm_current;
7968   basic_block merge_bb = 0;
7969 
7970   memset (statep, 0, sizeof *statep);
7971   for (rtx_insn *insn = get_insns (); insn; insn = next_insn (insn))
7972     {
7973       arc_ccfsm_advance (insn, statep);
7974 
7975       switch (statep->state)
7976 	{
7977 	case 0:
7978 	  if (JUMP_P (insn))
7979 	    merge_bb = 0;
7980 	  break;
7981 	case 1: case 2:
7982 	  {
7983 	    /* Deleted branch.  */
7984 	    gcc_assert (!merge_bb);
7985 	    merge_bb = BLOCK_FOR_INSN (insn);
7986 	    basic_block succ_bb
7987 	      = BLOCK_FOR_INSN (NEXT_INSN (NEXT_INSN (PREV_INSN (insn))));
7988 	    arc_ccfsm_post_advance (insn, statep);
7989 	    gcc_assert (!IN_RANGE (statep->state, 1, 2));
7990 	    rtx_insn *seq = NEXT_INSN (PREV_INSN (insn));
7991 	    if (seq != insn)
7992 	      {
7993 		rtx slot = XVECEXP (PATTERN (seq), 0, 1);
7994 		rtx pat = PATTERN (slot);
7995 		if (INSN_ANNULLED_BRANCH_P (insn))
7996 		  {
7997 		    rtx cond
7998 		      = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (slot));
7999 		    pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat);
8000 		  }
8001 		if (!validate_change (seq, &PATTERN (seq), pat, 0))
8002 		  gcc_unreachable ();
8003 		PUT_CODE (slot, NOTE);
8004 		NOTE_KIND (slot) = NOTE_INSN_DELETED;
8005 		if (merge_bb && succ_bb)
8006 		  merge_blocks (merge_bb, succ_bb);
8007 	      }
8008 	    else if (merge_bb && succ_bb)
8009 	      {
8010 		set_insn_deleted (insn);
8011 		merge_blocks (merge_bb, succ_bb);
8012 	      }
8013 	    else
8014 	      {
8015 		PUT_CODE (insn, NOTE);
8016 		NOTE_KIND (insn) = NOTE_INSN_DELETED;
8017 	      }
8018 	    continue;
8019 	  }
8020 	case 3:
8021 	  if (LABEL_P (insn)
8022 	      && statep->target_label == CODE_LABEL_NUMBER (insn))
8023 	    {
8024 	      arc_ccfsm_post_advance (insn, statep);
8025 	      basic_block succ_bb = BLOCK_FOR_INSN (insn);
8026 	      if (merge_bb && succ_bb)
8027 		merge_blocks (merge_bb, succ_bb);
8028 	      else if (--LABEL_NUSES (insn) == 0)
8029 		{
8030 		  const char *name = LABEL_NAME (insn);
8031 		  PUT_CODE (insn, NOTE);
8032 		  NOTE_KIND (insn) = NOTE_INSN_DELETED_LABEL;
8033 		  NOTE_DELETED_LABEL_NAME (insn) = name;
8034 		}
8035 	      merge_bb = 0;
8036 	      continue;
8037 	    }
8038 	  /* Fall through.  */
8039 	case 4: case 5:
8040 	  if (!NONDEBUG_INSN_P (insn))
8041 	    break;
8042 
8043 	  /* Conditionalized insn.  */
8044 
8045 	  rtx_insn *prev, *pprev;
8046 	  rtx *patp, pat, cond;
8047 	  bool annulled; annulled = false;
8048 
8049 	  /* If this is a delay slot insn in a non-annulled branch,
8050 	     don't conditionalize it.  N.B., this should be fine for
8051 	     conditional return too.  However, don't do this for
8052 	     unconditional branches, as these would be encountered when
8053 	     processing an 'else' part.  */
8054 	  prev = PREV_INSN (insn);
8055 	  pprev = PREV_INSN (prev);
8056 	  if (pprev && NEXT_INSN (NEXT_INSN (pprev)) == NEXT_INSN (insn)
8057 	      && JUMP_P (prev) && get_attr_cond (prev) == COND_USE)
8058 	    {
8059 	      if (!INSN_ANNULLED_BRANCH_P (prev))
8060 		break;
8061 	      annulled = true;
8062 	    }
8063 
8064 	  patp = &PATTERN (insn);
8065 	  pat = *patp;
8066 	  cond = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (insn));
8067 	  if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8068 	    {
8069 	      /* ??? don't conditionalize if all side effects are dead
8070 		 in the not-execute case.  */
8071 
8072 	      pat = conditionalize_nonjump (pat, cond, insn, annulled);
8073 	    }
8074 	  else if (simplejump_p (insn))
8075 	    {
8076 	      patp = &SET_SRC (pat);
8077 	      pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, *patp, pc_rtx);
8078 	    }
8079 	  else if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
8080 	    {
8081 	      pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, pat, pc_rtx);
8082 	      pat = gen_rtx_SET (pc_rtx, pat);
8083 	    }
8084 	  else
8085 	    gcc_unreachable ();
8086 	  validate_change (insn, patp, pat, 1);
8087 	  if (!apply_change_group ())
8088 	    gcc_unreachable ();
8089 	  if (JUMP_P (insn))
8090 	    {
8091 	      rtx_insn *next = next_nonnote_insn (insn);
8092 	      if (GET_CODE (next) == BARRIER)
8093 		delete_insn (next);
8094 	      if (statep->state == 3)
8095 		continue;
8096 	    }
8097 	  break;
8098 	default:
8099 	  gcc_unreachable ();
8100 	}
8101       arc_ccfsm_post_advance (insn, statep);
8102     }
8103   return 0;
8104 }
8105 
8106 /* Find annulled delay insns and convert them to use the appropriate predicate.
8107    This allows branch shortening to size up these insns properly.  */
8108 
8109 static unsigned
arc_predicate_delay_insns(void)8110 arc_predicate_delay_insns (void)
8111 {
8112   for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
8113     {
8114       rtx pat, jump, dlay, src, cond, *patp;
8115       int reverse;
8116 
8117       if (!NONJUMP_INSN_P (insn)
8118 	  || GET_CODE (pat = PATTERN (insn)) != SEQUENCE)
8119 	continue;
8120       jump = XVECEXP (pat, 0, 0);
8121       dlay = XVECEXP (pat, 0, 1);
8122       if (!JUMP_P (jump) || !INSN_ANNULLED_BRANCH_P (jump))
8123 	continue;
8124       /* If the branch insn does the annulling, leave the delay insn alone.  */
8125       if (!TARGET_AT_DBR_CONDEXEC && !INSN_FROM_TARGET_P (dlay))
8126 	continue;
8127       /* ??? Could also leave DLAY un-conditionalized if its target is dead
8128 	 on the other path.  */
8129       gcc_assert (GET_CODE (PATTERN (jump)) == SET);
8130       gcc_assert (SET_DEST (PATTERN (jump)) == pc_rtx);
8131       src = SET_SRC (PATTERN (jump));
8132       gcc_assert (GET_CODE (src) == IF_THEN_ELSE);
8133       cond = XEXP (src, 0);
8134       if (XEXP (src, 2) == pc_rtx)
8135 	reverse = 0;
8136       else if (XEXP (src, 1) == pc_rtx)
8137 	reverse = 1;
8138       else
8139 	gcc_unreachable ();
8140       if (reverse != !INSN_FROM_TARGET_P (dlay))
8141 	{
8142 	  machine_mode ccm = GET_MODE (XEXP (cond, 0));
8143 	  enum rtx_code code = reverse_condition (GET_CODE (cond));
8144 	  if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode)
8145 	    code = reverse_condition_maybe_unordered (GET_CODE (cond));
8146 
8147 	  cond = gen_rtx_fmt_ee (code, GET_MODE (cond),
8148 				 copy_rtx (XEXP (cond, 0)),
8149 				 copy_rtx (XEXP (cond, 1)));
8150 	}
8151       else
8152 	cond = copy_rtx (cond);
8153       patp = &PATTERN (dlay);
8154       pat = *patp;
8155       pat = conditionalize_nonjump (pat, cond, dlay, true);
8156       validate_change (dlay, patp, pat, 1);
8157       if (!apply_change_group ())
8158 	gcc_unreachable ();
8159     }
8160   return 0;
8161 }
8162 
8163 /* For ARC600: If a write to a core reg >=32 appears in a delay slot
8164   (other than of a forward brcc), it creates a hazard when there is a read
8165   of the same register at the branch target.  We can't know what is at the
8166   branch target of calls, and for branches, we don't really know before the
8167   end of delay slot scheduling, either.  Not only can individual instruction
8168   be hoisted out into a delay slot, a basic block can also be emptied this
8169   way, and branch and/or fall through targets be redirected.  Hence we don't
8170   want such writes in a delay slot.  */
8171 
8172 /* Return nonzreo iff INSN writes to an extension core register.  */
8173 
8174 int
arc_write_ext_corereg(rtx insn)8175 arc_write_ext_corereg (rtx insn)
8176 {
8177   subrtx_iterator::array_type array;
8178   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
8179     {
8180       const_rtx x = *iter;
8181       switch (GET_CODE (x))
8182 	{
8183 	case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
8184 	  break;
8185 	default:
8186 	  /* This is also fine for PRE/POST_MODIFY, because they
8187 	     contain a SET.  */
8188 	  continue;
8189 	}
8190       const_rtx dest = XEXP (x, 0);
8191       if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61)
8192 	return 1;
8193     }
8194   return 0;
8195 }
8196 
8197 /* This is like the hook, but returns NULL when it can't / won't generate
8198    a legitimate address.  */
8199 
8200 static rtx
arc_legitimize_address_0(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode)8201 arc_legitimize_address_0 (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8202 			  machine_mode mode)
8203 {
8204   rtx addr, inner;
8205 
8206   if (flag_pic && SYMBOLIC_CONST (x))
8207      (x) =  arc_legitimize_pic_address (x, 0);
8208   addr = x;
8209   if (GET_CODE (addr) == CONST)
8210     addr = XEXP (addr, 0);
8211   if (GET_CODE (addr) == PLUS
8212       && CONST_INT_P (XEXP (addr, 1))
8213       && ((GET_CODE (XEXP (addr, 0)) == SYMBOL_REF
8214 	   && !SYMBOL_REF_FUNCTION_P (XEXP (addr, 0)))
8215 	  || (REG_P (XEXP (addr, 0))
8216 	      && (INTVAL (XEXP (addr, 1)) & 252))))
8217     {
8218       HOST_WIDE_INT offs, upper;
8219       int size = GET_MODE_SIZE (mode);
8220 
8221       offs = INTVAL (XEXP (addr, 1));
8222       upper = (offs + 256 * size) & ~511 * size;
8223       inner = plus_constant (Pmode, XEXP (addr, 0), upper);
8224 #if 0 /* ??? this produces worse code for EEMBC idctrn01  */
8225       if (GET_CODE (x) == CONST)
8226 	inner = gen_rtx_CONST (Pmode, inner);
8227 #endif
8228       addr = plus_constant (Pmode, force_reg (Pmode, inner), offs - upper);
8229       x = addr;
8230     }
8231   else if (GET_CODE (addr) == SYMBOL_REF && !SYMBOL_REF_FUNCTION_P (addr))
8232     x = force_reg (Pmode, x);
8233   if (memory_address_p ((machine_mode) mode, x))
8234      return x;
8235   return NULL_RTX;
8236 }
8237 
8238 static rtx
arc_legitimize_address(rtx orig_x,rtx oldx,machine_mode mode)8239 arc_legitimize_address (rtx orig_x, rtx oldx, machine_mode mode)
8240 {
8241   rtx new_x = arc_legitimize_address_0 (orig_x, oldx, mode);
8242 
8243   if (new_x)
8244     return new_x;
8245   return orig_x;
8246 }
8247 
8248 static rtx
arc_delegitimize_address_0(rtx x)8249 arc_delegitimize_address_0 (rtx x)
8250 {
8251   rtx u, gp;
8252 
8253   if (GET_CODE (x) == CONST && GET_CODE (u = XEXP (x, 0)) == UNSPEC)
8254     {
8255       if (XINT (u, 1) == ARC_UNSPEC_GOT)
8256 	return XVECEXP (u, 0, 0);
8257     }
8258   else if (GET_CODE (x) == PLUS
8259 	   && ((REG_P (gp = XEXP (x, 0))
8260 		&& REGNO (gp) == PIC_OFFSET_TABLE_REGNUM)
8261 	       || (GET_CODE (gp) == CONST
8262 		   && GET_CODE (u = XEXP (gp, 0)) == UNSPEC
8263 		   && XINT (u, 1) == ARC_UNSPEC_GOT
8264 		   && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF
8265 		   && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC")))
8266 	   && GET_CODE (XEXP (x, 1)) == CONST
8267 	   && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC
8268 	   && XINT (u, 1) == ARC_UNSPEC_GOTOFF)
8269     return XVECEXP (u, 0, 0);
8270   else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8271 	   && ((REG_P (gp = XEXP (XEXP (x, 0), 1))
8272 		&& REGNO (gp) == PIC_OFFSET_TABLE_REGNUM)
8273 	       || (GET_CODE (gp) == CONST
8274 		   && GET_CODE (u = XEXP (gp, 0)) == UNSPEC
8275 		   && XINT (u, 1) == ARC_UNSPEC_GOT
8276 		   && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF
8277 		   && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC")))
8278 	   && GET_CODE (XEXP (x, 1)) == CONST
8279 	   && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC
8280 	   && XINT (u, 1) == ARC_UNSPEC_GOTOFF)
8281     return gen_rtx_PLUS (GET_MODE (x), XEXP (XEXP (x, 0), 0),
8282 			 XVECEXP (u, 0, 0));
8283   else if (GET_CODE (x) == PLUS
8284 	   && (u = arc_delegitimize_address_0 (XEXP (x, 1))))
8285     return gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0), u);
8286   return NULL_RTX;
8287 }
8288 
8289 static rtx
arc_delegitimize_address(rtx x)8290 arc_delegitimize_address (rtx x)
8291 {
8292   rtx orig_x = x = delegitimize_mem_from_attrs (x);
8293   if (GET_CODE (x) == MEM)
8294     x = XEXP (x, 0);
8295   x = arc_delegitimize_address_0 (x);
8296   if (x)
8297     {
8298       if (MEM_P (orig_x))
8299 	x = replace_equiv_address_nv (orig_x, x);
8300       return x;
8301     }
8302   return orig_x;
8303 }
8304 
8305 /* Return a REG rtx for acc1.  N.B. the gcc-internal representation may
8306    differ from the hardware register number in order to allow the generic
8307    code to correctly split the concatenation of acc1 and acc2.  */
8308 
8309 rtx
gen_acc1(void)8310 gen_acc1 (void)
8311 {
8312   return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 56: 57);
8313 }
8314 
8315 /* Return a REG rtx for acc2.  N.B. the gcc-internal representation may
8316    differ from the hardware register number in order to allow the generic
8317    code to correctly split the concatenation of acc1 and acc2.  */
8318 
8319 rtx
gen_acc2(void)8320 gen_acc2 (void)
8321 {
8322   return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 57: 56);
8323 }
8324 
8325 /* Return a REG rtx for mlo.  N.B. the gcc-internal representation may
8326    differ from the hardware register number in order to allow the generic
8327    code to correctly split the concatenation of mhi and mlo.  */
8328 
8329 rtx
gen_mlo(void)8330 gen_mlo (void)
8331 {
8332   return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 59: 58);
8333 }
8334 
8335 /* Return a REG rtx for mhi.  N.B. the gcc-internal representation may
8336    differ from the hardware register number in order to allow the generic
8337    code to correctly split the concatenation of mhi and mlo.  */
8338 
8339 rtx
gen_mhi(void)8340 gen_mhi (void)
8341 {
8342   return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 58: 59);
8343 }
8344 
8345 /* FIXME: a parameter should be added, and code added to final.c,
8346    to reproduce this functionality in shorten_branches.  */
8347 #if 0
8348 /* Return nonzero iff BRANCH should be unaligned if possible by upsizing
8349    a previous instruction.  */
8350 int
8351 arc_unalign_branch_p (rtx branch)
8352 {
8353   rtx note;
8354 
8355   if (!TARGET_UNALIGN_BRANCH)
8356     return 0;
8357   /* Do not do this if we have a filled delay slot.  */
8358   if (get_attr_delay_slot_filled (branch) == DELAY_SLOT_FILLED_YES
8359       && !NEXT_INSN (branch)->deleted ())
8360     return 0;
8361   note = find_reg_note (branch, REG_BR_PROB, 0);
8362   return (!note
8363 	  || (arc_unalign_prob_threshold && !br_prob_note_reliable_p (note))
8364 	  || INTVAL (XEXP (note, 0)) < arc_unalign_prob_threshold);
8365 }
8366 #endif
8367 
8368 /* When estimating sizes during arc_reorg, when optimizing for speed, there
8369    are three reasons why we need to consider branches to be length 6:
8370    - annull-false delay slot insns are implemented using conditional execution,
8371      thus preventing short insn formation where used.
8372    - for ARC600: annul-true delay slot insns are implemented where possible
8373      using conditional execution, preventing short insn formation where used.
8374    - for ARC700: likely or somewhat likely taken branches are made long and
8375      unaligned if possible to avoid branch penalty.  */
8376 
8377 bool
arc_branch_size_unknown_p(void)8378 arc_branch_size_unknown_p (void)
8379 {
8380   return !optimize_size && arc_reorg_in_progress;
8381 }
8382 
8383 /* We are about to output a return insn.  Add padding if necessary to avoid
8384    a mispredict.  A return could happen immediately after the function
8385    start, but after a call we know that there will be at least a blink
8386    restore.  */
8387 
8388 void
arc_pad_return(void)8389 arc_pad_return (void)
8390 {
8391   rtx_insn *insn = current_output_insn;
8392   rtx_insn *prev = prev_active_insn (insn);
8393   int want_long;
8394 
8395   if (!prev)
8396     {
8397       fputs ("\tnop_s\n", asm_out_file);
8398       cfun->machine->unalign ^= 2;
8399       want_long = 1;
8400     }
8401   /* If PREV is a sequence, we know it must be a branch / jump or a tailcall,
8402      because after a call, we'd have to restore blink first.  */
8403   else if (GET_CODE (PATTERN (prev)) == SEQUENCE)
8404     return;
8405   else
8406     {
8407       want_long = (get_attr_length (prev) == 2);
8408       prev = prev_active_insn (prev);
8409     }
8410   if (!prev
8411       || ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE)
8412 	  ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
8413 		       NON_SIBCALL)
8414 	  : CALL_ATTR (prev, NON_SIBCALL)))
8415     {
8416       if (want_long)
8417 	cfun->machine->size_reason
8418 	  = "call/return and return/return must be 6 bytes apart to avoid mispredict";
8419       else if (TARGET_UNALIGN_BRANCH && cfun->machine->unalign)
8420 	{
8421 	  cfun->machine->size_reason
8422 	    = "Long unaligned jump avoids non-delay slot penalty";
8423 	  want_long = 1;
8424 	}
8425       /* Disgorge delay insn, if there is any, and it may be moved.  */
8426       if (final_sequence
8427 	  /* ??? Annulled would be OK if we can and do conditionalize
8428 	     the delay slot insn accordingly.  */
8429 	  && !INSN_ANNULLED_BRANCH_P (insn)
8430 	  && (get_attr_cond (insn) != COND_USE
8431 	      || !reg_set_p (gen_rtx_REG (CCmode, CC_REG),
8432 			     XVECEXP (final_sequence, 0, 1))))
8433 	{
8434 	  prev = as_a <rtx_insn *> (XVECEXP (final_sequence, 0, 1));
8435 	  gcc_assert (!prev_real_insn (insn)
8436 		      || !arc_hazard (prev_real_insn (insn), prev));
8437 	  cfun->machine->force_short_suffix = !want_long;
8438 	  rtx save_pred = current_insn_predicate;
8439 	  final_scan_insn (prev, asm_out_file, optimize, 1, NULL);
8440 	  cfun->machine->force_short_suffix = -1;
8441 	  prev->set_deleted ();
8442 	  current_output_insn = insn;
8443 	  current_insn_predicate = save_pred;
8444 	}
8445       else if (want_long)
8446 	fputs ("\tnop\n", asm_out_file);
8447       else
8448 	{
8449 	  fputs ("\tnop_s\n", asm_out_file);
8450 	  cfun->machine->unalign ^= 2;
8451 	}
8452     }
8453   return;
8454 }
8455 
8456 /* The usual; we set up our machine_function data.  */
8457 
8458 static struct machine_function *
arc_init_machine_status(void)8459 arc_init_machine_status (void)
8460 {
8461   struct machine_function *machine;
8462   machine = ggc_cleared_alloc<machine_function> ();
8463   machine->fn_type = ARC_FUNCTION_UNKNOWN;
8464   machine->force_short_suffix = -1;
8465 
8466   return machine;
8467 }
8468 
8469 /* Implements INIT_EXPANDERS.  We just set up to call the above
8470    function.  */
8471 
8472 void
arc_init_expanders(void)8473 arc_init_expanders (void)
8474 {
8475   init_machine_status = arc_init_machine_status;
8476 }
8477 
8478 /* Check if OP is a proper parallel of a millicode call pattern.  OFFSET
8479    indicates a number of elements to ignore - that allows to have a
8480    sibcall pattern that starts with (return).  LOAD_P is zero for store
8481    multiple (for prologues), and one for load multiples (for epilogues),
8482    and two for load multiples where no final clobber of blink is required.
8483    We also skip the first load / store element since this is supposed to
8484    be checked in the instruction pattern.  */
8485 
8486 int
arc_check_millicode(rtx op,int offset,int load_p)8487 arc_check_millicode (rtx op, int offset, int load_p)
8488 {
8489   int len = XVECLEN (op, 0) - offset;
8490   int i;
8491 
8492   if (load_p == 2)
8493     {
8494       if (len < 2 || len > 13)
8495 	return 0;
8496       load_p = 1;
8497     }
8498   else
8499     {
8500       rtx elt = XVECEXP (op, 0, --len);
8501 
8502       if (GET_CODE (elt) != CLOBBER
8503 	  || !REG_P (XEXP (elt, 0))
8504 	  || REGNO (XEXP (elt, 0)) != RETURN_ADDR_REGNUM
8505 	  || len < 3 || len > 13)
8506 	return 0;
8507     }
8508   for (i = 1; i < len; i++)
8509     {
8510       rtx elt = XVECEXP (op, 0, i + offset);
8511       rtx reg, mem, addr;
8512 
8513       if (GET_CODE (elt) != SET)
8514 	return 0;
8515       mem = XEXP (elt, load_p);
8516       reg = XEXP (elt, 1-load_p);
8517       if (!REG_P (reg) || REGNO (reg) != 13U+i || !MEM_P (mem))
8518 	return 0;
8519       addr = XEXP (mem, 0);
8520       if (GET_CODE (addr) != PLUS
8521 	  || !rtx_equal_p (stack_pointer_rtx, XEXP (addr, 0))
8522 	  || !CONST_INT_P (XEXP (addr, 1)) || INTVAL (XEXP (addr, 1)) != i*4)
8523 	return 0;
8524     }
8525   return 1;
8526 }
8527 
8528 /* Accessor functions for cfun->machine->unalign.  */
8529 
8530 int
arc_get_unalign(void)8531 arc_get_unalign (void)
8532 {
8533   return cfun->machine->unalign;
8534 }
8535 
8536 void
arc_clear_unalign(void)8537 arc_clear_unalign (void)
8538 {
8539   if (cfun)
8540     cfun->machine->unalign = 0;
8541 }
8542 
8543 void
arc_toggle_unalign(void)8544 arc_toggle_unalign (void)
8545 {
8546   cfun->machine->unalign ^= 2;
8547 }
8548 
8549 /* Operands 0..2 are the operands of a addsi which uses a 12 bit
8550    constant in operand 2, but which would require a LIMM because of
8551    operand mismatch.
8552    operands 3 and 4 are new SET_SRCs for operands 0.  */
8553 
8554 void
split_addsi(rtx * operands)8555 split_addsi (rtx *operands)
8556 {
8557   int val = INTVAL (operands[2]);
8558 
8559   /* Try for two short insns first.  Lengths being equal, we prefer
8560      expansions with shorter register lifetimes.  */
8561   if (val > 127 && val <= 255
8562       && satisfies_constraint_Rcq (operands[0]))
8563     {
8564       operands[3] = operands[2];
8565       operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]);
8566     }
8567   else
8568     {
8569       operands[3] = operands[1];
8570       operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[2]);
8571     }
8572 }
8573 
8574 /* Operands 0..2 are the operands of a subsi which uses a 12 bit
8575    constant in operand 1, but which would require a LIMM because of
8576    operand mismatch.
8577    operands 3 and 4 are new SET_SRCs for operands 0.  */
8578 
8579 void
split_subsi(rtx * operands)8580 split_subsi (rtx *operands)
8581 {
8582   int val = INTVAL (operands[1]);
8583 
8584   /* Try for two short insns first.  Lengths being equal, we prefer
8585      expansions with shorter register lifetimes.  */
8586   if (satisfies_constraint_Rcq (operands[0])
8587       && satisfies_constraint_Rcq (operands[2]))
8588     {
8589       if (val >= -31 && val <= 127)
8590 	{
8591 	  operands[3] = gen_rtx_NEG (SImode, operands[2]);
8592 	  operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]);
8593 	  return;
8594 	}
8595       else if (val >= 0 && val < 255)
8596 	{
8597 	  operands[3] = operands[1];
8598 	  operands[4] = gen_rtx_MINUS (SImode, operands[0], operands[2]);
8599 	  return;
8600 	}
8601     }
8602   /* If the destination is not an ARCompact16 register, we might
8603      still have a chance to make a short insn if the source is;
8604       we need to start with a reg-reg move for this.  */
8605   operands[3] = operands[2];
8606   operands[4] = gen_rtx_MINUS (SImode, operands[1], operands[0]);
8607 }
8608 
8609 /* Handle DOUBLE_REGS uses.
8610    Operand 0: destination register
8611    Operand 1: source register  */
8612 
8613 static bool
arc_process_double_reg_moves(rtx * operands)8614 arc_process_double_reg_moves (rtx *operands)
8615 {
8616   rtx dest = operands[0];
8617   rtx src  = operands[1];
8618 
8619   enum usesDxState { none, srcDx, destDx, maxDx };
8620   enum usesDxState state = none;
8621 
8622   if (refers_to_regno_p (40, 44, src, 0))
8623     state = srcDx;
8624   if (refers_to_regno_p (40, 44, dest, 0))
8625     {
8626       /* Via arc_register_move_cost, we should never see D,D moves.  */
8627       gcc_assert (state == none);
8628       state = destDx;
8629     }
8630 
8631   if (state == none)
8632     return false;
8633 
8634   if (state == srcDx)
8635     {
8636       /* Without the LR insn, we need to split this into a
8637 	 sequence of insns which will use the DEXCLx and DADDHxy
8638 	 insns to be able to read the Dx register in question.  */
8639       if (TARGET_DPFP_DISABLE_LRSR)
8640 	{
8641 	  /* gen *movdf_insn_nolrsr */
8642 	  rtx set = gen_rtx_SET (dest, src);
8643 	  rtx use1 = gen_rtx_USE (VOIDmode, const1_rtx);
8644 	  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, use1)));
8645 	}
8646       else
8647 	{
8648 	  /* When we have 'mov D, r' or 'mov D, D' then get the target
8649 	     register pair for use with LR insn.  */
8650 	  rtx destHigh = simplify_gen_subreg(SImode, dest, DFmode, 4);
8651 	  rtx destLow  = simplify_gen_subreg(SImode, dest, DFmode, 0);
8652 
8653 	  /* Produce the two LR insns to get the high and low parts.  */
8654 	  emit_insn (gen_rtx_SET (destHigh,
8655 				  gen_rtx_UNSPEC_VOLATILE (Pmode,
8656 							   gen_rtvec (1, src),
8657 				  VUNSPEC_ARC_LR_HIGH)));
8658 	  emit_insn (gen_rtx_SET (destLow,
8659 				  gen_rtx_UNSPEC_VOLATILE (Pmode,
8660 							   gen_rtvec (1, src),
8661 				  VUNSPEC_ARC_LR)));
8662 	}
8663     }
8664   else if (state == destDx)
8665     {
8666       /* When we have 'mov r, D' or 'mov D, D' and we have access to the
8667 	 LR insn get the target register pair.  */
8668       rtx srcHigh = simplify_gen_subreg(SImode, src, DFmode, 4);
8669       rtx srcLow  = simplify_gen_subreg(SImode, src, DFmode, 0);
8670 
8671       emit_insn (gen_rtx_UNSPEC_VOLATILE (Pmode,
8672 					  gen_rtvec (3, dest, srcHigh, srcLow),
8673 					  VUNSPEC_ARC_DEXCL_NORES));
8674 
8675     }
8676   else
8677     gcc_unreachable ();
8678 
8679   return true;
8680 }
8681 
8682 /* operands 0..1 are the operands of a 64 bit move instruction.
8683    split it into two moves with operands 2/3 and 4/5.  */
8684 
8685 void
arc_split_move(rtx * operands)8686 arc_split_move (rtx *operands)
8687 {
8688   machine_mode mode = GET_MODE (operands[0]);
8689   int i;
8690   int swap = 0;
8691   rtx xop[4];
8692 
8693   if (TARGET_DPFP)
8694   {
8695     if (arc_process_double_reg_moves (operands))
8696       return;
8697   }
8698 
8699   if (TARGET_LL64
8700       && ((memory_operand (operands[0], mode)
8701 	   && even_register_operand (operands[1], mode))
8702 	  || (memory_operand (operands[1], mode)
8703 	      && even_register_operand (operands[0], mode))))
8704     {
8705       emit_move_insn (operands[0], operands[1]);
8706       return;
8707     }
8708 
8709   for (i = 0; i < 2; i++)
8710     {
8711       if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))
8712 	{
8713 	  rtx addr = XEXP (operands[i], 0);
8714 	  rtx r, o;
8715 	  enum rtx_code code;
8716 
8717 	  gcc_assert (!reg_overlap_mentioned_p (operands[0], addr));
8718 	  switch (GET_CODE (addr))
8719 	    {
8720 	    case PRE_DEC: o = GEN_INT (-8); goto pre_modify;
8721 	    case PRE_INC: o = GEN_INT (8); goto pre_modify;
8722 	    case PRE_MODIFY: o = XEXP (XEXP (addr, 1), 1);
8723 	    pre_modify:
8724 	      code = PRE_MODIFY;
8725 	      break;
8726 	    case POST_DEC: o = GEN_INT (-8); goto post_modify;
8727 	    case POST_INC: o = GEN_INT (8); goto post_modify;
8728 	    case POST_MODIFY: o = XEXP (XEXP (addr, 1), 1);
8729 	    post_modify:
8730 	      code = POST_MODIFY;
8731 	      swap = 2;
8732 	      break;
8733 	    default:
8734 	      gcc_unreachable ();
8735 	    }
8736 	  r = XEXP (addr, 0);
8737 	  xop[0+i] = adjust_automodify_address_nv
8738 		      (operands[i], SImode,
8739 		       gen_rtx_fmt_ee (code, Pmode, r,
8740 				       gen_rtx_PLUS (Pmode, r, o)),
8741 		       0);
8742 	  xop[2+i] = adjust_automodify_address_nv
8743 		      (operands[i], SImode, plus_constant (Pmode, r, 4), 4);
8744 	}
8745       else
8746 	{
8747 	  xop[0+i] = operand_subword (operands[i], 0, 0, mode);
8748 	  xop[2+i] = operand_subword (operands[i], 1, 0, mode);
8749 	}
8750     }
8751   if (reg_overlap_mentioned_p (xop[0], xop[3]))
8752     {
8753       swap = 2;
8754       gcc_assert (!reg_overlap_mentioned_p (xop[2], xop[1]));
8755     }
8756 
8757   emit_move_insn (xop[0 + swap], xop[1 + swap]);
8758   emit_move_insn (xop[2 - swap], xop[3 - swap]);
8759 
8760 }
8761 
8762 /* Select between the instruction output templates s_tmpl (for short INSNs)
8763    and l_tmpl (for long INSNs).  */
8764 
8765 const char *
arc_short_long(rtx_insn * insn,const char * s_tmpl,const char * l_tmpl)8766 arc_short_long (rtx_insn *insn, const char *s_tmpl, const char *l_tmpl)
8767 {
8768   int is_short = arc_verify_short (insn, cfun->machine->unalign, -1);
8769 
8770   extract_constrain_insn_cached (insn);
8771   return is_short ? s_tmpl : l_tmpl;
8772 }
8773 
8774 /* Searches X for any reference to REGNO, returning the rtx of the
8775    reference found if any.  Otherwise, returns NULL_RTX.  */
8776 
8777 rtx
arc_regno_use_in(unsigned int regno,rtx x)8778 arc_regno_use_in (unsigned int regno, rtx x)
8779 {
8780   const char *fmt;
8781   int i, j;
8782   rtx tem;
8783 
8784   if (REG_P (x) && refers_to_regno_p (regno, x))
8785     return x;
8786 
8787   fmt = GET_RTX_FORMAT (GET_CODE (x));
8788   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8789     {
8790       if (fmt[i] == 'e')
8791 	{
8792 	  if ((tem = regno_use_in (regno, XEXP (x, i))))
8793 	    return tem;
8794 	}
8795       else if (fmt[i] == 'E')
8796 	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8797 	  if ((tem = regno_use_in (regno , XVECEXP (x, i, j))))
8798 	    return tem;
8799     }
8800 
8801   return NULL_RTX;
8802 }
8803 
8804 /* Return the integer value of the "type" attribute for INSN, or -1 if
8805    INSN can't have attributes.  */
8806 
8807 int
arc_attr_type(rtx_insn * insn)8808 arc_attr_type (rtx_insn *insn)
8809 {
8810   if (NONJUMP_INSN_P (insn)
8811       ? (GET_CODE (PATTERN (insn)) == USE
8812 	 || GET_CODE (PATTERN (insn)) == CLOBBER)
8813       : JUMP_P (insn)
8814       ? (GET_CODE (PATTERN (insn)) == ADDR_VEC
8815 	 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
8816       : !CALL_P (insn))
8817     return -1;
8818   return get_attr_type (insn);
8819 }
8820 
8821 /* Return true if insn sets the condition codes.  */
8822 
8823 bool
arc_sets_cc_p(rtx_insn * insn)8824 arc_sets_cc_p (rtx_insn *insn)
8825 {
8826   if (NONJUMP_INSN_P (insn))
8827     if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
8828       insn = seq->insn (seq->len () - 1);
8829   return arc_attr_type (insn) == TYPE_COMPARE;
8830 }
8831 
8832 /* Return true if INSN is an instruction with a delay slot we may want
8833    to fill.  */
8834 
8835 bool
arc_need_delay(rtx_insn * insn)8836 arc_need_delay (rtx_insn *insn)
8837 {
8838   rtx_insn *next;
8839 
8840   if (!flag_delayed_branch)
8841     return false;
8842   /* The return at the end of a function needs a delay slot.  */
8843   if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
8844       && (!(next = next_active_insn (insn))
8845 	  || ((!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) != SEQUENCE)
8846 	      && arc_attr_type (next) == TYPE_RETURN))
8847       && (!TARGET_PAD_RETURN
8848 	  || (prev_active_insn (insn)
8849 	      && prev_active_insn (prev_active_insn (insn))
8850 	      && prev_active_insn (prev_active_insn (prev_active_insn (insn))))))
8851     return true;
8852   if (NONJUMP_INSN_P (insn)
8853       ? (GET_CODE (PATTERN (insn)) == USE
8854 	 || GET_CODE (PATTERN (insn)) == CLOBBER
8855 	 || GET_CODE (PATTERN (insn)) == SEQUENCE)
8856       : JUMP_P (insn)
8857       ? (GET_CODE (PATTERN (insn)) == ADDR_VEC
8858 	 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
8859       : !CALL_P (insn))
8860     return false;
8861   return num_delay_slots (insn) != 0;
8862 }
8863 
8864 /* Return true if the scheduling pass(es) has/have already run,
8865    i.e. where possible, we should try to mitigate high latencies
8866    by different instruction selection.  */
8867 
8868 bool
arc_scheduling_not_expected(void)8869 arc_scheduling_not_expected (void)
8870 {
8871   return cfun->machine->arc_reorg_started;
8872 }
8873 
8874 /* Oddly enough, sometimes we get a zero overhead loop that branch
8875    shortening doesn't think is a loop - observed with compile/pr24883.c
8876    -O3 -fomit-frame-pointer -funroll-loops.  Make sure to include the
8877    alignment visible for branch shortening  (we actually align the loop
8878    insn before it, but that is equivalent since the loop insn is 4 byte
8879    long.)  */
8880 
8881 int
arc_label_align(rtx label)8882 arc_label_align (rtx label)
8883 {
8884   int loop_align = LOOP_ALIGN (LABEL);
8885 
8886   if (loop_align > align_labels_log)
8887     {
8888       rtx_insn *prev = prev_nonnote_insn (label);
8889 
8890       if (prev && NONJUMP_INSN_P (prev)
8891 	  && GET_CODE (PATTERN (prev)) == PARALLEL
8892 	  && recog_memoized (prev) == CODE_FOR_doloop_begin_i)
8893 	return loop_align;
8894     }
8895   /* Code has a minimum p2 alignment of 1, which we must restore after an
8896      ADDR_DIFF_VEC.  */
8897   if (align_labels_log < 1)
8898     {
8899       rtx_insn *next = next_nonnote_nondebug_insn (label);
8900       if (INSN_P (next) && recog_memoized (next) >= 0)
8901 	return 1;
8902     }
8903   return align_labels_log;
8904 }
8905 
8906 /* Return true if LABEL is in executable code.  */
8907 
8908 bool
arc_text_label(rtx_insn * label)8909 arc_text_label (rtx_insn *label)
8910 {
8911   rtx_insn *next;
8912 
8913   /* ??? We use deleted labels like they were still there, see
8914      gcc.c-torture/compile/20000326-2.c .  */
8915   gcc_assert (GET_CODE (label) == CODE_LABEL
8916 	      || (GET_CODE (label) == NOTE
8917 		  && NOTE_KIND (label) == NOTE_INSN_DELETED_LABEL));
8918   next = next_nonnote_insn (label);
8919   if (next)
8920     return (!JUMP_TABLE_DATA_P (next)
8921 	    || GET_CODE (PATTERN (next)) != ADDR_VEC);
8922   else if (!PREV_INSN (label))
8923     /* ??? sometimes text labels get inserted very late, see
8924        gcc.dg/torture/stackalign/comp-goto-1.c */
8925     return true;
8926   return false;
8927 }
8928 
8929 /* Return the size of the pretend args for DECL.  */
8930 
8931 int
arc_decl_pretend_args(tree decl)8932 arc_decl_pretend_args (tree decl)
8933 {
8934   /* struct function is in DECL_STRUCT_FUNCTION (decl), but no
8935      pretend_args there...  See PR38391.  */
8936   gcc_assert (decl == current_function_decl);
8937   return crtl->args.pretend_args_size;
8938 }
8939 
8940 /* Without this, gcc.dg/tree-prof/bb-reorg.c fails to assemble
8941   when compiling with -O2 -freorder-blocks-and-partition -fprofile-use
8942   -D_PROFILE_USE; delay branch scheduling then follows a crossing jump
8943   to redirect two breqs.  */
8944 
8945 static bool
arc_can_follow_jump(const rtx_insn * follower,const rtx_insn * followee)8946 arc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
8947 {
8948   /* ??? get_attr_type is declared to take an rtx.  */
8949   union { const rtx_insn *c; rtx_insn *r; } u;
8950 
8951   u.c = follower;
8952   if (CROSSING_JUMP_P (followee))
8953     switch (get_attr_type (u.r))
8954       {
8955       case TYPE_BRCC:
8956       case TYPE_BRCC_NO_DELAY_SLOT:
8957 	return false;
8958       default:
8959 	return true;
8960       }
8961   return true;
8962 }
8963 
8964 /* Implement EPILOGUE__USES.
8965    Return true if REGNO should be added to the deemed uses of the epilogue.
8966 
8967    We use the return address
8968    arc_return_address_regs[arc_compute_function_type (cfun)] .
8969    But also, we have to make sure all the register restore instructions
8970    are known to be live in interrupt functions.  */
8971 
8972 bool
arc_epilogue_uses(int regno)8973 arc_epilogue_uses (int regno)
8974 {
8975   if (reload_completed)
8976     {
8977       if (ARC_INTERRUPT_P (cfun->machine->fn_type))
8978 	{
8979 	  if (!fixed_regs[regno])
8980 	    return true;
8981 	  return regno == arc_return_address_regs[cfun->machine->fn_type];
8982 	}
8983       else
8984 	return regno == RETURN_ADDR_REGNUM;
8985     }
8986   else
8987     return regno == arc_return_address_regs[arc_compute_function_type (cfun)];
8988 }
8989 
8990 #ifndef TARGET_NO_LRA
8991 #define TARGET_NO_LRA !TARGET_LRA
8992 #endif
8993 
8994 static bool
arc_lra_p(void)8995 arc_lra_p (void)
8996 {
8997   return !TARGET_NO_LRA;
8998 }
8999 
9000 /* ??? Should we define TARGET_REGISTER_PRIORITY?  We might perfer to use
9001    Rcq registers, because some insn are shorter with them.  OTOH we already
9002    have separate alternatives for this purpose, and other insns don't
9003    mind, so maybe we should rather prefer the other registers?
9004    We need more data, and we can only get that if we allow people to
9005    try all options.  */
9006 static int
arc_register_priority(int r)9007 arc_register_priority (int r)
9008 {
9009   switch (arc_lra_priority_tag)
9010     {
9011     case ARC_LRA_PRIORITY_NONE:
9012       return 0;
9013     case ARC_LRA_PRIORITY_NONCOMPACT:
9014       return ((((r & 7) ^ 4) - 4) & 15) != r;
9015     case ARC_LRA_PRIORITY_COMPACT:
9016       return ((((r & 7) ^ 4) - 4) & 15) == r;
9017     default:
9018       gcc_unreachable ();
9019     }
9020 }
9021 
9022 static reg_class_t
arc_spill_class(reg_class_t,machine_mode)9023 arc_spill_class (reg_class_t /* orig_class */, machine_mode)
9024 {
9025   return GENERAL_REGS;
9026 }
9027 
9028 bool
arc_legitimize_reload_address(rtx * p,machine_mode mode,int opnum,int itype)9029 arc_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
9030 			       int itype)
9031 {
9032   rtx x = *p;
9033   enum reload_type type = (enum reload_type) itype;
9034 
9035   if (GET_CODE (x) == PLUS
9036       && CONST_INT_P (XEXP (x, 1))
9037       && (RTX_OK_FOR_BASE_P (XEXP (x, 0), true)
9038 	  || (REG_P (XEXP (x, 0))
9039 	      && reg_equiv_constant (REGNO (XEXP (x, 0))))))
9040     {
9041       int scale = GET_MODE_SIZE (mode);
9042       int shift;
9043       rtx index_rtx = XEXP (x, 1);
9044       HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9045       rtx reg, sum, sum2;
9046 
9047       if (scale > 4)
9048 	scale = 4;
9049       if ((scale-1) & offset)
9050 	scale = 1;
9051       shift = scale >> 1;
9052       offset_base
9053 	= ((offset + (256 << shift))
9054 	   & ((HOST_WIDE_INT)((unsigned HOST_WIDE_INT) -512 << shift)));
9055       /* Sometimes the normal form does not suit DImode.  We
9056 	 could avoid that by using smaller ranges, but that
9057 	 would give less optimized code when SImode is
9058 	 prevalent.  */
9059       if (GET_MODE_SIZE (mode) + offset - offset_base <= (256 << shift))
9060 	{
9061 	  int regno;
9062 
9063 	  reg = XEXP (x, 0);
9064 	  regno = REGNO (reg);
9065 	  sum2 = sum = plus_constant (Pmode, reg, offset_base);
9066 
9067 	  if (reg_equiv_constant (regno))
9068 	    {
9069 	      sum2 = plus_constant (Pmode, reg_equiv_constant (regno),
9070 				    offset_base);
9071 	      if (GET_CODE (sum2) == PLUS)
9072 		sum2 = gen_rtx_CONST (Pmode, sum2);
9073 	    }
9074 	  *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9075 	  push_reload (sum2, NULL_RTX, &XEXP (*p, 0), NULL,
9076 		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum,
9077 		       type);
9078 	  return true;
9079 	}
9080     }
9081   /* We must re-recognize what we created before.  */
9082   else if (GET_CODE (x) == PLUS
9083 	   && GET_CODE (XEXP (x, 0)) == PLUS
9084 	   && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9085 	   && REG_P  (XEXP (XEXP (x, 0), 0))
9086 	   && CONST_INT_P (XEXP (x, 1)))
9087     {
9088       /* Because this address is so complex, we know it must have
9089 	 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9090 	 it is already unshared, and needs no further unsharing.  */
9091       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9092 		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9093       return true;
9094     }
9095   return false;
9096 }
9097 
9098 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
9099 
9100 static bool
arc_use_by_pieces_infrastructure_p(unsigned HOST_WIDE_INT size,unsigned int align,enum by_pieces_operation op,bool speed_p)9101 arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
9102 				    unsigned int align,
9103 				    enum by_pieces_operation op,
9104 				    bool speed_p)
9105 {
9106   /* Let the movmem expander handle small block moves.  */
9107   if (op == MOVE_BY_PIECES)
9108     return false;
9109 
9110   return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
9111 }
9112 
9113 /* Emit a (pre) memory barrier around an atomic sequence according to
9114    MODEL.  */
9115 
9116 static void
arc_pre_atomic_barrier(enum memmodel model)9117 arc_pre_atomic_barrier (enum memmodel model)
9118 {
9119   if (need_atomic_barrier_p (model, true))
9120     emit_insn (gen_memory_barrier ());
9121 }
9122 
9123 /* Emit a (post) memory barrier around an atomic sequence according to
9124    MODEL.  */
9125 
9126 static void
arc_post_atomic_barrier(enum memmodel model)9127 arc_post_atomic_barrier (enum memmodel model)
9128 {
9129   if (need_atomic_barrier_p (model, false))
9130     emit_insn (gen_memory_barrier ());
9131 }
9132 
9133 /* Expand a compare and swap pattern.  */
9134 
9135 static void
emit_unlikely_jump(rtx insn)9136 emit_unlikely_jump (rtx insn)
9137 {
9138   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
9139 
9140   insn = emit_jump_insn (insn);
9141   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
9142 }
9143 
9144 /* Expand code to perform a 8 or 16-bit compare and swap by doing
9145    32-bit compare and swap on the word containing the byte or
9146    half-word.  The difference between a weak and a strong CAS is that
9147    the weak version may simply fail.  The strong version relies on two
9148    loops, one checks if the SCOND op is succsfully or not, the other
9149    checks if the 32 bit accessed location which contains the 8 or 16
9150    bit datum is not changed by other thread.  The first loop is
9151    implemented by the atomic_compare_and_swapsi_1 pattern.  The second
9152    loops is implemented by this routine.  */
9153 
9154 static void
arc_expand_compare_and_swap_qh(rtx bool_result,rtx result,rtx mem,rtx oldval,rtx newval,rtx weak,rtx mod_s,rtx mod_f)9155 arc_expand_compare_and_swap_qh (rtx bool_result, rtx result, rtx mem,
9156 				rtx oldval, rtx newval, rtx weak,
9157 				rtx mod_s, rtx mod_f)
9158 {
9159   rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
9160   rtx addr = gen_reg_rtx (Pmode);
9161   rtx off = gen_reg_rtx (SImode);
9162   rtx oldv = gen_reg_rtx (SImode);
9163   rtx newv = gen_reg_rtx (SImode);
9164   rtx oldvalue = gen_reg_rtx (SImode);
9165   rtx newvalue = gen_reg_rtx (SImode);
9166   rtx res = gen_reg_rtx (SImode);
9167   rtx resv = gen_reg_rtx (SImode);
9168   rtx memsi, val, mask, end_label, loop_label, cc, x;
9169   machine_mode mode;
9170   bool is_weak = (weak != const0_rtx);
9171 
9172   /* Truncate the address.  */
9173   emit_insn (gen_rtx_SET (addr,
9174 			  gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
9175 
9176   /* Compute the datum offset.  */
9177   emit_insn (gen_rtx_SET (off,
9178 			  gen_rtx_AND (SImode, addr1, GEN_INT (3))));
9179   if (TARGET_BIG_ENDIAN)
9180     emit_insn (gen_rtx_SET (off,
9181 			    gen_rtx_MINUS (SImode,
9182 					   (GET_MODE (mem) == QImode) ?
9183 					   GEN_INT (3) : GEN_INT (2), off)));
9184 
9185   /* Normal read from truncated address.  */
9186   memsi = gen_rtx_MEM (SImode, addr);
9187   set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
9188   MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
9189 
9190   val = copy_to_reg (memsi);
9191 
9192   /* Convert the offset in bits.  */
9193   emit_insn (gen_rtx_SET (off,
9194 			  gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
9195 
9196   /* Get the proper mask.  */
9197   if (GET_MODE (mem) == QImode)
9198     mask = force_reg (SImode, GEN_INT (0xff));
9199   else
9200     mask = force_reg (SImode, GEN_INT (0xffff));
9201 
9202   emit_insn (gen_rtx_SET (mask,
9203 			  gen_rtx_ASHIFT (SImode, mask, off)));
9204 
9205   /* Prepare the old and new values.  */
9206   emit_insn (gen_rtx_SET (val,
9207 			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
9208 				       val)));
9209 
9210   oldval = gen_lowpart (SImode, oldval);
9211   emit_insn (gen_rtx_SET (oldv,
9212 			  gen_rtx_ASHIFT (SImode, oldval, off)));
9213 
9214   newval = gen_lowpart_common (SImode, newval);
9215   emit_insn (gen_rtx_SET (newv,
9216 			  gen_rtx_ASHIFT (SImode, newval, off)));
9217 
9218   emit_insn (gen_rtx_SET (oldv,
9219 			  gen_rtx_AND (SImode, oldv, mask)));
9220 
9221   emit_insn (gen_rtx_SET (newv,
9222 			  gen_rtx_AND (SImode, newv, mask)));
9223 
9224   if (!is_weak)
9225     {
9226       end_label = gen_label_rtx ();
9227       loop_label = gen_label_rtx ();
9228       emit_label (loop_label);
9229     }
9230 
9231   /* Make the old and new values.  */
9232   emit_insn (gen_rtx_SET (oldvalue,
9233 			  gen_rtx_IOR (SImode, oldv, val)));
9234 
9235   emit_insn (gen_rtx_SET (newvalue,
9236 			  gen_rtx_IOR (SImode, newv, val)));
9237 
9238   /* Try an 32bit atomic compare and swap.  It clobbers the CC
9239      register.  */
9240   emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue,
9241 					      weak, mod_s, mod_f));
9242 
9243   /* Regardless of the weakness of the operation, a proper boolean
9244      result needs to be provided.  */
9245   x = gen_rtx_REG (CC_Zmode, CC_REG);
9246   x = gen_rtx_EQ (SImode, x, const0_rtx);
9247   emit_insn (gen_rtx_SET (bool_result, x));
9248 
9249   if (!is_weak)
9250     {
9251       /* Check the results: if the atomic op is successfully the goto
9252 	 to end label.  */
9253       x = gen_rtx_REG (CC_Zmode, CC_REG);
9254       x = gen_rtx_EQ (VOIDmode, x, const0_rtx);
9255       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9256 				gen_rtx_LABEL_REF (Pmode, end_label), pc_rtx);
9257       emit_jump_insn (gen_rtx_SET (pc_rtx, x));
9258 
9259       /* Wait for the right moment when the accessed 32-bit location
9260 	 is stable.  */
9261       emit_insn (gen_rtx_SET (resv,
9262 			      gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
9263 					   res)));
9264       mode = SELECT_CC_MODE (NE, resv, val);
9265       cc = gen_rtx_REG (mode, CC_REG);
9266       emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, resv, val)));
9267 
9268       /* Set the new value of the 32 bit location, proper masked.  */
9269       emit_insn (gen_rtx_SET (val, resv));
9270 
9271       /* Try again if location is unstable.  Fall through if only
9272 	 scond op failed.  */
9273       x = gen_rtx_NE (VOIDmode, cc, const0_rtx);
9274       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9275 				gen_rtx_LABEL_REF (Pmode, loop_label), pc_rtx);
9276       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
9277 
9278       emit_label (end_label);
9279     }
9280 
9281   /* End: proper return the result for the given mode.  */
9282   emit_insn (gen_rtx_SET (res,
9283 			  gen_rtx_AND (SImode, res, mask)));
9284 
9285   emit_insn (gen_rtx_SET (res,
9286 			  gen_rtx_LSHIFTRT (SImode, res, off)));
9287 
9288   emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
9289 }
9290 
9291 /* Helper function used by "atomic_compare_and_swap" expand
9292    pattern.  */
9293 
9294 void
arc_expand_compare_and_swap(rtx operands[])9295 arc_expand_compare_and_swap (rtx operands[])
9296 {
9297   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
9298   machine_mode mode;
9299 
9300   bval = operands[0];
9301   rval = operands[1];
9302   mem = operands[2];
9303   oldval = operands[3];
9304   newval = operands[4];
9305   is_weak = operands[5];
9306   mod_s = operands[6];
9307   mod_f = operands[7];
9308   mode = GET_MODE (mem);
9309 
9310   if (reg_overlap_mentioned_p (rval, oldval))
9311     oldval = copy_to_reg (oldval);
9312 
9313   if (mode == SImode)
9314     {
9315       emit_insn (gen_atomic_compare_and_swapsi_1 (rval, mem, oldval, newval,
9316 						  is_weak, mod_s, mod_f));
9317       x = gen_rtx_REG (CC_Zmode, CC_REG);
9318       x = gen_rtx_EQ (SImode, x, const0_rtx);
9319       emit_insn (gen_rtx_SET (bval, x));
9320     }
9321   else
9322     {
9323       arc_expand_compare_and_swap_qh (bval, rval, mem, oldval, newval,
9324 				      is_weak, mod_s, mod_f);
9325     }
9326 }
9327 
9328 /* Helper function used by the "atomic_compare_and_swapsi_1"
9329    pattern.  */
9330 
9331 void
arc_split_compare_and_swap(rtx operands[])9332 arc_split_compare_and_swap (rtx operands[])
9333 {
9334   rtx rval, mem, oldval, newval;
9335   machine_mode mode;
9336   enum memmodel mod_s, mod_f;
9337   bool is_weak;
9338   rtx label1, label2, x, cond;
9339 
9340   rval = operands[0];
9341   mem = operands[1];
9342   oldval = operands[2];
9343   newval = operands[3];
9344   is_weak = (operands[4] != const0_rtx);
9345   mod_s = (enum memmodel) INTVAL (operands[5]);
9346   mod_f = (enum memmodel) INTVAL (operands[6]);
9347   mode = GET_MODE (mem);
9348 
9349   /* ARC atomic ops work only with 32-bit aligned memories.  */
9350   gcc_assert (mode == SImode);
9351 
9352   arc_pre_atomic_barrier (mod_s);
9353 
9354   label1 = NULL_RTX;
9355   if (!is_weak)
9356     {
9357       label1 = gen_label_rtx ();
9358       emit_label (label1);
9359     }
9360   label2 = gen_label_rtx ();
9361 
9362   /* Load exclusive.  */
9363   emit_insn (gen_arc_load_exclusivesi (rval, mem));
9364 
9365   /* Check if it is oldval.  */
9366   mode = SELECT_CC_MODE (NE, rval, oldval);
9367   cond = gen_rtx_REG (mode, CC_REG);
9368   emit_insn (gen_rtx_SET (cond, gen_rtx_COMPARE (mode, rval, oldval)));
9369 
9370   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9371   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9372 			    gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
9373   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
9374 
9375   /* Exclusively store new item.  Store clobbers CC reg.  */
9376   emit_insn (gen_arc_store_exclusivesi (mem, newval));
9377 
9378   if (!is_weak)
9379     {
9380       /* Check the result of the store.  */
9381       cond = gen_rtx_REG (CC_Zmode, CC_REG);
9382       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9383       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9384 				gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
9385       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
9386     }
9387 
9388   if (mod_f != MEMMODEL_RELAXED)
9389     emit_label (label2);
9390 
9391   arc_post_atomic_barrier (mod_s);
9392 
9393   if (mod_f == MEMMODEL_RELAXED)
9394     emit_label (label2);
9395 }
9396 
9397 /* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
9398    to perform.  MEM is the memory on which to operate.  VAL is the second
9399    operand of the binary operator.  BEFORE and AFTER are optional locations to
9400    return the value of MEM either before of after the operation.  MODEL_RTX
9401    is a CONST_INT containing the memory model to use.  */
9402 
9403 void
arc_expand_atomic_op(enum rtx_code code,rtx mem,rtx val,rtx orig_before,rtx orig_after,rtx model_rtx)9404 arc_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
9405 			 rtx orig_before, rtx orig_after, rtx model_rtx)
9406 {
9407   enum memmodel model = (enum memmodel) INTVAL (model_rtx);
9408   machine_mode mode = GET_MODE (mem);
9409   rtx label, x, cond;
9410   rtx before = orig_before, after = orig_after;
9411 
9412   /* ARC atomic ops work only with 32-bit aligned memories.  */
9413   gcc_assert (mode == SImode);
9414 
9415   arc_pre_atomic_barrier (model);
9416 
9417   label = gen_label_rtx ();
9418   emit_label (label);
9419   label = gen_rtx_LABEL_REF (VOIDmode, label);
9420 
9421   if (before == NULL_RTX)
9422     before = gen_reg_rtx (mode);
9423 
9424   if (after == NULL_RTX)
9425     after = gen_reg_rtx (mode);
9426 
9427   /* Load exclusive.  */
9428   emit_insn (gen_arc_load_exclusivesi (before, mem));
9429 
9430   switch (code)
9431     {
9432     case NOT:
9433       x = gen_rtx_AND (mode, before, val);
9434       emit_insn (gen_rtx_SET (after, x));
9435       x = gen_rtx_NOT (mode, after);
9436       emit_insn (gen_rtx_SET (after, x));
9437       break;
9438 
9439     case MINUS:
9440       if (CONST_INT_P (val))
9441 	{
9442 	  val = GEN_INT (-INTVAL (val));
9443 	  code = PLUS;
9444 	}
9445 
9446       /* FALLTHRU.  */
9447     default:
9448       x = gen_rtx_fmt_ee (code, mode, before, val);
9449       emit_insn (gen_rtx_SET (after, x));
9450       break;
9451    }
9452 
9453   /* Exclusively store new item.  Store clobbers CC reg.  */
9454   emit_insn (gen_arc_store_exclusivesi (mem, after));
9455 
9456   /* Check the result of the store.  */
9457   cond = gen_rtx_REG (CC_Zmode, CC_REG);
9458   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9459   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9460 			    label, pc_rtx);
9461   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
9462 
9463   arc_post_atomic_barrier (model);
9464 }
9465 
9466 /* Implement TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P.  */
9467 
9468 static bool
arc_no_speculation_in_delay_slots_p()9469 arc_no_speculation_in_delay_slots_p ()
9470 {
9471   return true;
9472 }
9473 
9474 /* Return a parallel of registers to represent where to find the
9475    register pieces if required, otherwise NULL_RTX.  */
9476 
9477 static rtx
arc_dwarf_register_span(rtx rtl)9478 arc_dwarf_register_span (rtx rtl)
9479 {
9480    enum machine_mode mode = GET_MODE (rtl);
9481    unsigned regno;
9482    rtx p;
9483 
9484    if (GET_MODE_SIZE (mode) != 8)
9485      return NULL_RTX;
9486 
9487    p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
9488    regno = REGNO (rtl);
9489    XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno);
9490    XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1);
9491 
9492    return p;
9493 }
9494 
9495 
9496 struct gcc_target targetm = TARGET_INITIALIZER;
9497 
9498 #include "gt-arc.h"
9499