1 /* Subroutines used for code generation on the Synopsys DesignWare ARC cpu.
2    Copyright (C) 1994-2017 Free Software Foundation, Inc.
3 
4    Sources derived from work done by Sankhya Technologies (www.sankhya.com) on
5    behalf of Synopsys Inc.
6 
7    Position Independent Code support added,Code cleaned up,
8    Comments and Support For ARC700 instructions added by
9    Saurabh Verma (saurabh.verma@codito.com)
10    Ramana Radhakrishnan(ramana.radhakrishnan@codito.com)
11 
12    Fixing ABI inconsistencies, optimizations for ARC600 / ARC700 pipelines,
13    profiling support added by Joern Rennecke <joern.rennecke@embecosm.com>
14 
15 This file is part of GCC.
16 
17 GCC is free software; you can redistribute it and/or modify
18 it under the terms of the GNU General Public License as published by
19 the Free Software Foundation; either version 3, or (at your option)
20 any later version.
21 
22 GCC is distributed in the hope that it will be useful,
23 but WITHOUT ANY WARRANTY; without even the implied warranty of
24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25 GNU General Public License for more details.
26 
27 You should have received a copy of the GNU General Public License
28 along with GCC; see the file COPYING3.  If not see
29 <http://www.gnu.org/licenses/>.  */
30 
31 #include "config.h"
32 #include "system.h"
33 #include "coretypes.h"
34 #include "memmodel.h"
35 #include "backend.h"
36 #include "target.h"
37 #include "rtl.h"
38 #include "tree.h"
39 #include "cfghooks.h"
40 #include "df.h"
41 #include "tm_p.h"
42 #include "stringpool.h"
43 #include "optabs.h"
44 #include "regs.h"
45 #include "emit-rtl.h"
46 #include "recog.h"
47 #include "diagnostic.h"
48 #include "fold-const.h"
49 #include "varasm.h"
50 #include "stor-layout.h"
51 #include "calls.h"
52 #include "output.h"
53 #include "insn-attr.h"
54 #include "flags.h"
55 #include "explow.h"
56 #include "expr.h"
57 #include "langhooks.h"
58 #include "tm-constrs.h"
59 #include "reload.h" /* For operands_match_p */
60 #include "cfgrtl.h"
61 #include "tree-pass.h"
62 #include "context.h"
63 #include "builtins.h"
64 #include "rtl-iter.h"
65 #include "alias.h"
66 
67 /* Which cpu we're compiling for (ARC600, ARC601, ARC700).  */
68 static char arc_cpu_name[10] = "";
69 static const char *arc_cpu_string = arc_cpu_name;
70 
71 /* ??? Loads can handle any constant, stores can only handle small ones.  */
72 /* OTOH, LIMMs cost extra, so their usefulness is limited.  */
73 #define RTX_OK_FOR_OFFSET_P(MODE, X) \
74 (GET_CODE (X) == CONST_INT \
75  && SMALL_INT_RANGE (INTVAL (X), (GET_MODE_SIZE (MODE) - 1) & -4, \
76 		     (INTVAL (X) & (GET_MODE_SIZE (MODE) - 1) & 3 \
77 		      ? 0 \
78 		      : -(-GET_MODE_SIZE (MODE) | -4) >> 1)))
79 
80 #define LEGITIMATE_OFFSET_ADDRESS_P(MODE, X, INDEX, STRICT) \
81 (GET_CODE (X) == PLUS			     \
82   && RTX_OK_FOR_BASE_P (XEXP (X, 0), (STRICT)) \
83   && ((INDEX && RTX_OK_FOR_INDEX_P (XEXP (X, 1), (STRICT)) \
84        && GET_MODE_SIZE ((MODE)) <= 4) \
85       || RTX_OK_FOR_OFFSET_P (MODE, XEXP (X, 1))))
86 
87 #define LEGITIMATE_SCALED_ADDRESS_P(MODE, X, STRICT) \
88 (GET_CODE (X) == PLUS \
89  && GET_CODE (XEXP (X, 0)) == MULT \
90  && RTX_OK_FOR_INDEX_P (XEXP (XEXP (X, 0), 0), (STRICT)) \
91  && GET_CODE (XEXP (XEXP (X, 0), 1)) == CONST_INT \
92  && ((GET_MODE_SIZE (MODE) == 2 && INTVAL (XEXP (XEXP (X, 0), 1)) == 2) \
93      || (GET_MODE_SIZE (MODE) == 4 && INTVAL (XEXP (XEXP (X, 0), 1)) == 4)) \
94  && (RTX_OK_FOR_BASE_P (XEXP (X, 1), (STRICT)) \
95      || (flag_pic ? CONST_INT_P (XEXP (X, 1)) : CONSTANT_P (XEXP (X, 1)))))
96 
97 #define LEGITIMATE_SMALL_DATA_ADDRESS_P(X) \
98   (GET_CODE (X) == PLUS \
99    && (REG_P (XEXP ((X), 0)) && REGNO (XEXP ((X), 0)) == SDATA_BASE_REGNUM) \
100    && ((GET_CODE (XEXP((X),1)) == SYMBOL_REF \
101 	&& SYMBOL_REF_SMALL_P (XEXP ((X), 1))) \
102        || (GET_CODE (XEXP ((X), 1)) == CONST \
103 	   && GET_CODE (XEXP (XEXP ((X), 1), 0)) == PLUS \
104 	   && GET_CODE (XEXP (XEXP (XEXP ((X), 1), 0), 0)) == SYMBOL_REF \
105 	   && SYMBOL_REF_SMALL_P (XEXP (XEXP (XEXP ((X), 1), 0), 0)) \
106 	   && GET_CODE (XEXP(XEXP (XEXP ((X), 1), 0), 1)) == CONST_INT)))
107 
108 /* Array of valid operand punctuation characters.  */
109 char arc_punct_chars[256];
110 
111 /* State used by arc_ccfsm_advance to implement conditional execution.  */
112 struct GTY (()) arc_ccfsm
113 {
114   int state;
115   int cc;
116   rtx cond;
117   rtx_insn *target_insn;
118   int target_label;
119 };
120 
121 #define arc_ccfsm_current cfun->machine->ccfsm_current
122 
123 #define ARC_CCFSM_BRANCH_DELETED_P(STATE) \
124   ((STATE)->state == 1 || (STATE)->state == 2)
125 
126 /* Indicate we're conditionalizing insns now.  */
127 #define ARC_CCFSM_RECORD_BRANCH_DELETED(STATE) \
128   ((STATE)->state += 2)
129 
130 #define ARC_CCFSM_COND_EXEC_P(STATE) \
131   ((STATE)->state == 3 || (STATE)->state == 4 || (STATE)->state == 5 \
132    || current_insn_predicate)
133 
134 /* Check if INSN has a 16 bit opcode considering struct arc_ccfsm *STATE.  */
135 #define CCFSM_ISCOMPACT(INSN,STATE) \
136   (ARC_CCFSM_COND_EXEC_P (STATE) \
137    ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \
138       || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \
139    : get_attr_iscompact (INSN) != ISCOMPACT_FALSE)
140 
141 /* Likewise, but also consider that INSN might be in a delay slot of JUMP.  */
142 #define CCFSM_DBR_ISCOMPACT(INSN,JUMP,STATE) \
143   ((ARC_CCFSM_COND_EXEC_P (STATE) \
144     || (JUMP_P (JUMP) \
145 	&& INSN_ANNULLED_BRANCH_P (JUMP) \
146 	&& (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (INSN)))) \
147    ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \
148       || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \
149    : get_attr_iscompact (INSN) != ISCOMPACT_FALSE)
150 
151 /* The maximum number of insns skipped which will be conditionalised if
152    possible.  */
153 /* When optimizing for speed:
154     Let p be the probability that the potentially skipped insns need to
155     be executed, pn the cost of a correctly predicted non-taken branch,
156     mt the cost of a mis/non-predicted taken branch,
157     mn mispredicted non-taken, pt correctly predicted taken ;
158     costs expressed in numbers of instructions like the ones considered
159     skipping.
160     Unfortunately we don't have a measure of predictability - this
161     is linked to probability only in that in the no-eviction-scenario
162     there is a lower bound 1 - 2 * min (p, 1-p), and a somewhat larger
163     value that can be assumed *if* the distribution is perfectly random.
164     A predictability of 1 is perfectly plausible not matter what p is,
165     because the decision could be dependent on an invocation parameter
166     of the program.
167     For large p, we want MAX_INSNS_SKIPPED == pn/(1-p) + mt - pn
168     For small p, we want MAX_INSNS_SKIPPED == pt
169 
170    When optimizing for size:
171     We want to skip insn unless we could use 16 opcodes for the
172     non-conditionalized insn to balance the branch length or more.
173     Performance can be tie-breaker.  */
174 /* If the potentially-skipped insns are likely to be executed, we'll
175    generally save one non-taken branch
176    o
177    this to be no less than the 1/p  */
178 #define MAX_INSNS_SKIPPED 3
179 
180 /* A nop is needed between a 4 byte insn that sets the condition codes and
181    a branch that uses them (the same isn't true for an 8 byte insn that sets
182    the condition codes).  Set by arc_ccfsm_advance.  Used by
183    arc_print_operand.  */
184 
185 static int get_arc_condition_code (rtx);
186 
187 static tree arc_handle_interrupt_attribute (tree *, tree, tree, int, bool *);
188 
189 /* Initialized arc_attribute_table to NULL since arc doesnot have any
190    machine specific supported attributes.  */
191 const struct attribute_spec arc_attribute_table[] =
192 {
193  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
194       affects_type_identity } */
195   { "interrupt", 1, 1, true, false, false, arc_handle_interrupt_attribute, true },
196   /* Function calls made to this symbol must be done indirectly, because
197      it may lie outside of the 21/25 bit addressing range of a normal function
198      call.  */
199   { "long_call",    0, 0, false, true,  true,  NULL, false },
200   /* Whereas these functions are always known to reside within the 25 bit
201      addressing range of unconditionalized bl.  */
202   { "medium_call",   0, 0, false, true,  true,  NULL, false },
203   /* And these functions are always known to reside within the 21 bit
204      addressing range of blcc.  */
205   { "short_call",   0, 0, false, true,  true,  NULL, false },
206   { NULL, 0, 0, false, false, false, NULL, false }
207 };
208 static int arc_comp_type_attributes (const_tree, const_tree);
209 static void arc_file_start (void);
210 static void arc_internal_label (FILE *, const char *, unsigned long);
211 static void arc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
212 				 tree);
213 static int arc_address_cost (rtx, machine_mode, addr_space_t, bool);
214 static void arc_encode_section_info (tree decl, rtx rtl, int first);
215 
216 static void arc_init_builtins (void);
217 static rtx arc_expand_builtin (tree, rtx, rtx, machine_mode, int);
218 
219 static int branch_dest (rtx);
220 
221 static void  arc_output_pic_addr_const (FILE *,  rtx, int);
222 bool arc_legitimate_pic_operand_p (rtx);
223 static bool arc_function_ok_for_sibcall (tree, tree);
224 static rtx arc_function_value (const_tree, const_tree, bool);
225 const char * output_shift (rtx *);
226 static void arc_reorg (void);
227 static bool arc_in_small_data_p (const_tree);
228 
229 static void arc_init_reg_tables (void);
230 static bool arc_return_in_memory (const_tree, const_tree);
231 static bool arc_vector_mode_supported_p (machine_mode);
232 
233 static bool arc_can_use_doloop_p (const widest_int &, const widest_int &,
234 				  unsigned int, bool);
235 static const char *arc_invalid_within_doloop (const rtx_insn *);
236 
237 static void output_short_suffix (FILE *file);
238 
239 static bool arc_frame_pointer_required (void);
240 
241 static bool arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
242 						unsigned int,
243 						enum by_pieces_operation op,
244 						bool);
245 
246 /* Globally visible information about currently selected cpu.  */
247 const arc_cpu_t *arc_selected_cpu;
248 
249 /* Implements target hook vector_mode_supported_p.  */
250 
251 static bool
252 arc_vector_mode_supported_p (machine_mode mode)
253 {
254   switch (mode)
255     {
256     case V2HImode:
257       return TARGET_PLUS_DMPY;
258     case V4HImode:
259     case V2SImode:
260       return TARGET_PLUS_QMACW;
261     case V4SImode:
262     case V8HImode:
263       return TARGET_SIMD_SET;
264 
265     default:
266       return false;
267     }
268 }
269 
270 /* Implements target hook TARGET_VECTORIZE_PREFERRED_SIMD_MODE.  */
271 
272 static machine_mode
273 arc_preferred_simd_mode (machine_mode mode)
274 {
275   switch (mode)
276     {
277     case HImode:
278       return TARGET_PLUS_QMACW ? V4HImode : V2HImode;
279     case SImode:
280       return V2SImode;
281 
282     default:
283       return word_mode;
284     }
285 }
286 
287 /* Implements target hook
288    TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES.  */
289 
290 static unsigned int
291 arc_autovectorize_vector_sizes (void)
292 {
293   return TARGET_PLUS_QMACW ? (8 | 4) : 0;
294 }
295 
296 /* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / review.  */
297 static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED;
298 static rtx arc_delegitimize_address (rtx);
299 static bool arc_can_follow_jump (const rtx_insn *follower,
300 				 const rtx_insn *followee);
301 
302 static rtx frame_insn (rtx);
303 static void arc_function_arg_advance (cumulative_args_t, machine_mode,
304 				      const_tree, bool);
305 static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode);
306 
307 static void arc_finalize_pic (void);
308 
309 /* initialize the GCC target structure.  */
310 #undef  TARGET_COMP_TYPE_ATTRIBUTES
311 #define TARGET_COMP_TYPE_ATTRIBUTES arc_comp_type_attributes
312 #undef TARGET_ASM_FILE_START
313 #define TARGET_ASM_FILE_START arc_file_start
314 #undef TARGET_ATTRIBUTE_TABLE
315 #define TARGET_ATTRIBUTE_TABLE arc_attribute_table
316 #undef TARGET_ASM_INTERNAL_LABEL
317 #define TARGET_ASM_INTERNAL_LABEL arc_internal_label
318 #undef TARGET_RTX_COSTS
319 #define TARGET_RTX_COSTS arc_rtx_costs
320 #undef TARGET_ADDRESS_COST
321 #define TARGET_ADDRESS_COST arc_address_cost
322 
323 #undef TARGET_ENCODE_SECTION_INFO
324 #define TARGET_ENCODE_SECTION_INFO arc_encode_section_info
325 
326 #undef TARGET_CANNOT_FORCE_CONST_MEM
327 #define TARGET_CANNOT_FORCE_CONST_MEM arc_cannot_force_const_mem
328 
329 #undef  TARGET_INIT_BUILTINS
330 #define TARGET_INIT_BUILTINS  arc_init_builtins
331 
332 #undef  TARGET_EXPAND_BUILTIN
333 #define TARGET_EXPAND_BUILTIN arc_expand_builtin
334 
335 #undef  TARGET_BUILTIN_DECL
336 #define TARGET_BUILTIN_DECL arc_builtin_decl
337 
338 #undef  TARGET_ASM_OUTPUT_MI_THUNK
339 #define TARGET_ASM_OUTPUT_MI_THUNK arc_output_mi_thunk
340 
341 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
342 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
343 
344 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
345 #define TARGET_FUNCTION_OK_FOR_SIBCALL arc_function_ok_for_sibcall
346 
347 #undef  TARGET_MACHINE_DEPENDENT_REORG
348 #define TARGET_MACHINE_DEPENDENT_REORG arc_reorg
349 
350 #undef TARGET_IN_SMALL_DATA_P
351 #define TARGET_IN_SMALL_DATA_P arc_in_small_data_p
352 
353 #undef TARGET_PROMOTE_FUNCTION_MODE
354 #define TARGET_PROMOTE_FUNCTION_MODE \
355   default_promote_function_mode_always_promote
356 
357 #undef TARGET_PROMOTE_PROTOTYPES
358 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
359 
360 #undef TARGET_RETURN_IN_MEMORY
361 #define TARGET_RETURN_IN_MEMORY arc_return_in_memory
362 #undef TARGET_PASS_BY_REFERENCE
363 #define TARGET_PASS_BY_REFERENCE arc_pass_by_reference
364 
365 #undef TARGET_SETUP_INCOMING_VARARGS
366 #define TARGET_SETUP_INCOMING_VARARGS arc_setup_incoming_varargs
367 
368 #undef TARGET_ARG_PARTIAL_BYTES
369 #define TARGET_ARG_PARTIAL_BYTES arc_arg_partial_bytes
370 
371 #undef TARGET_MUST_PASS_IN_STACK
372 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
373 
374 #undef TARGET_FUNCTION_VALUE
375 #define TARGET_FUNCTION_VALUE arc_function_value
376 
377 #undef  TARGET_SCHED_ADJUST_PRIORITY
378 #define TARGET_SCHED_ADJUST_PRIORITY arc_sched_adjust_priority
379 
380 #undef TARGET_VECTOR_MODE_SUPPORTED_P
381 #define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p
382 
383 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
384 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arc_preferred_simd_mode
385 
386 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
387 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES arc_autovectorize_vector_sizes
388 
389 #undef TARGET_CAN_USE_DOLOOP_P
390 #define TARGET_CAN_USE_DOLOOP_P arc_can_use_doloop_p
391 
392 #undef TARGET_INVALID_WITHIN_DOLOOP
393 #define TARGET_INVALID_WITHIN_DOLOOP arc_invalid_within_doloop
394 
395 #undef TARGET_PRESERVE_RELOAD_P
396 #define TARGET_PRESERVE_RELOAD_P arc_preserve_reload_p
397 
398 #undef TARGET_CAN_FOLLOW_JUMP
399 #define TARGET_CAN_FOLLOW_JUMP arc_can_follow_jump
400 
401 #undef TARGET_DELEGITIMIZE_ADDRESS
402 #define TARGET_DELEGITIMIZE_ADDRESS arc_delegitimize_address
403 
404 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
405 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
406   arc_use_by_pieces_infrastructure_p
407 
408 /* Usually, we will be able to scale anchor offsets.
409    When this fails, we want LEGITIMIZE_ADDRESS to kick in.  */
410 #undef TARGET_MIN_ANCHOR_OFFSET
411 #define TARGET_MIN_ANCHOR_OFFSET (-1024)
412 #undef TARGET_MAX_ANCHOR_OFFSET
413 #define TARGET_MAX_ANCHOR_OFFSET (1020)
414 
415 #undef TARGET_SECONDARY_RELOAD
416 #define TARGET_SECONDARY_RELOAD arc_secondary_reload
417 
418 #define TARGET_OPTION_OVERRIDE arc_override_options
419 
420 #define TARGET_CONDITIONAL_REGISTER_USAGE arc_conditional_register_usage
421 
422 #define TARGET_TRAMPOLINE_INIT arc_initialize_trampoline
423 
424 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arc_trampoline_adjust_address
425 
426 #define TARGET_CAN_ELIMINATE arc_can_eliminate
427 
428 #define TARGET_FRAME_POINTER_REQUIRED arc_frame_pointer_required
429 
430 #define TARGET_FUNCTION_ARG arc_function_arg
431 
432 #define TARGET_FUNCTION_ARG_ADVANCE arc_function_arg_advance
433 
434 #define TARGET_LEGITIMATE_CONSTANT_P arc_legitimate_constant_p
435 
436 #define TARGET_LEGITIMATE_ADDRESS_P arc_legitimate_address_p
437 
438 #define TARGET_MODE_DEPENDENT_ADDRESS_P arc_mode_dependent_address_p
439 
440 #define TARGET_LEGITIMIZE_ADDRESS arc_legitimize_address
441 
442 #define TARGET_ADJUST_INSN_LENGTH arc_adjust_insn_length
443 
444 #define TARGET_INSN_LENGTH_PARAMETERS arc_insn_length_parameters
445 
446 #undef TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P
447 #define TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P	\
448   arc_no_speculation_in_delay_slots_p
449 
450 #undef TARGET_LRA_P
451 #define TARGET_LRA_P arc_lra_p
452 #define TARGET_REGISTER_PRIORITY arc_register_priority
453 /* Stores with scaled offsets have different displacement ranges.  */
454 #define TARGET_DIFFERENT_ADDR_DISPLACEMENT_P hook_bool_void_true
455 #define TARGET_SPILL_CLASS arc_spill_class
456 
457 #include "target-def.h"
458 
459 #undef TARGET_ASM_ALIGNED_HI_OP
460 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
461 #undef TARGET_ASM_ALIGNED_SI_OP
462 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
463 
464 #ifdef HAVE_AS_TLS
465 #undef TARGET_HAVE_TLS
466 #define TARGET_HAVE_TLS HAVE_AS_TLS
467 #endif
468 
469 #undef TARGET_DWARF_REGISTER_SPAN
470 #define TARGET_DWARF_REGISTER_SPAN arc_dwarf_register_span
471 
472 /* Try to keep the (mov:DF _, reg) as early as possible so
473    that the d<add/sub/mul>h-lr insns appear together and can
474    use the peephole2 pattern.  */
475 
476 static int
477 arc_sched_adjust_priority (rtx_insn *insn, int priority)
478 {
479   rtx set = single_set (insn);
480   if (set
481       && GET_MODE (SET_SRC(set)) == DFmode
482       && GET_CODE (SET_SRC(set)) == REG)
483     {
484       /* Incrementing priority by 20 (empirically derived).  */
485       return priority + 20;
486     }
487 
488   return priority;
489 }
490 
491 /* For ARC base register + offset addressing, the validity of the
492    address is mode-dependent for most of the offset range, as the
493    offset can be scaled by the access size.
494    We don't expose these as mode-dependent addresses in the
495    mode_dependent_address_p target hook, because that would disable
496    lots of optimizations, and most uses of these addresses are for 32
497    or 64 bit accesses anyways, which are fine.
498    However, that leaves some addresses for 8 / 16 bit values not
499    properly reloaded by the generic code, which is why we have to
500    schedule secondary reloads for these.  */
501 
502 static reg_class_t
503 arc_secondary_reload (bool in_p,
504 		      rtx x,
505 		      reg_class_t cl,
506 		      machine_mode mode,
507 		      secondary_reload_info *sri)
508 {
509   enum rtx_code code = GET_CODE (x);
510 
511   if (cl == DOUBLE_REGS)
512     return GENERAL_REGS;
513 
514   /* The loop counter register can be stored, but not loaded directly.  */
515   if ((cl == LPCOUNT_REG || cl == WRITABLE_CORE_REGS)
516       && in_p && MEM_P (x))
517     return GENERAL_REGS;
518 
519  /* If we have a subreg (reg), where reg is a pseudo (that will end in
520     a memory location), then we may need a scratch register to handle
521     the fp/sp+largeoffset address.  */
522   if (code == SUBREG)
523     {
524       rtx addr = NULL_RTX;
525       x = SUBREG_REG (x);
526 
527       if (REG_P (x))
528 	{
529 	  int regno = REGNO (x);
530 	  if (regno >= FIRST_PSEUDO_REGISTER)
531 	    regno = reg_renumber[regno];
532 
533 	  if (regno != -1)
534 	    return NO_REGS;
535 
536 	  /* It is a pseudo that ends in a stack location.  */
537 	  if (reg_equiv_mem (REGNO (x)))
538 	    {
539 	      /* Get the equivalent address and check the range of the
540 		 offset.  */
541 	      rtx mem = reg_equiv_mem (REGNO (x));
542 	      addr = find_replacement (&XEXP (mem, 0));
543 	    }
544 	}
545       else
546 	{
547 	  gcc_assert (MEM_P (x));
548 	  addr = XEXP (x, 0);
549 	  addr = simplify_rtx (addr);
550 	}
551       if (addr && GET_CODE (addr) == PLUS
552 	  && CONST_INT_P (XEXP (addr, 1))
553 	  && (!RTX_OK_FOR_OFFSET_P (mode, XEXP (addr, 1))))
554 	{
555 	  switch (mode)
556 	    {
557 	    case QImode:
558 	      sri->icode =
559 		in_p ? CODE_FOR_reload_qi_load : CODE_FOR_reload_qi_store;
560 	      break;
561 	    case HImode:
562 	      sri->icode =
563 		in_p ? CODE_FOR_reload_hi_load : CODE_FOR_reload_hi_store;
564 	      break;
565 	    default:
566 	      break;
567 	    }
568 	}
569     }
570   return NO_REGS;
571 }
572 
573 /* Convert reloads using offsets that are too large to use indirect
574    addressing.  */
575 
576 void
577 arc_secondary_reload_conv (rtx reg, rtx mem, rtx scratch, bool store_p)
578 {
579   rtx addr;
580 
581   gcc_assert (GET_CODE (mem) == MEM);
582   addr = XEXP (mem, 0);
583 
584   /* Large offset: use a move.  FIXME: ld ops accepts limms as
585      offsets.  Hence, the following move insn is not required.  */
586   emit_move_insn (scratch, addr);
587   mem = replace_equiv_address_nv (mem, scratch);
588 
589   /* Now create the move.  */
590   if (store_p)
591     emit_insn (gen_rtx_SET (mem, reg));
592   else
593     emit_insn (gen_rtx_SET (reg, mem));
594 
595   return;
596 }
597 
598 static unsigned arc_ifcvt (void);
599 
600 namespace {
601 
602 const pass_data pass_data_arc_ifcvt =
603 {
604   RTL_PASS,
605   "arc_ifcvt",				/* name */
606   OPTGROUP_NONE,			/* optinfo_flags */
607   TV_IFCVT2,				/* tv_id */
608   0,					/* properties_required */
609   0,					/* properties_provided */
610   0,					/* properties_destroyed */
611   0,					/* todo_flags_start */
612   TODO_df_finish			/* todo_flags_finish */
613 };
614 
615 class pass_arc_ifcvt : public rtl_opt_pass
616 {
617 public:
618   pass_arc_ifcvt(gcc::context *ctxt)
619   : rtl_opt_pass(pass_data_arc_ifcvt, ctxt)
620   {}
621 
622   /* opt_pass methods: */
623   opt_pass * clone () { return new pass_arc_ifcvt (m_ctxt); }
624   virtual unsigned int execute (function *) { return arc_ifcvt (); }
625 };
626 
627 } // anon namespace
628 
629 rtl_opt_pass *
630 make_pass_arc_ifcvt (gcc::context *ctxt)
631 {
632   return new pass_arc_ifcvt (ctxt);
633 }
634 
635 static unsigned arc_predicate_delay_insns (void);
636 
637 namespace {
638 
639 const pass_data pass_data_arc_predicate_delay_insns =
640 {
641   RTL_PASS,
642   "arc_predicate_delay_insns",		/* name */
643   OPTGROUP_NONE,			/* optinfo_flags */
644   TV_IFCVT2,				/* tv_id */
645   0,					/* properties_required */
646   0,					/* properties_provided */
647   0,					/* properties_destroyed */
648   0,					/* todo_flags_start */
649   TODO_df_finish			/* todo_flags_finish */
650 };
651 
652 class pass_arc_predicate_delay_insns : public rtl_opt_pass
653 {
654 public:
655   pass_arc_predicate_delay_insns(gcc::context *ctxt)
656   : rtl_opt_pass(pass_data_arc_predicate_delay_insns, ctxt)
657   {}
658 
659   /* opt_pass methods: */
660   virtual unsigned int execute (function *)
661     {
662       return arc_predicate_delay_insns ();
663     }
664 };
665 
666 } // anon namespace
667 
668 rtl_opt_pass *
669 make_pass_arc_predicate_delay_insns (gcc::context *ctxt)
670 {
671   return new pass_arc_predicate_delay_insns (ctxt);
672 }
673 
674 /* Called by OVERRIDE_OPTIONS to initialize various things.  */
675 
676 static void
677 arc_init (void)
678 {
679   if (TARGET_V2)
680     {
681       /* I have the multiplier, then use it*/
682       if (TARGET_MPYW || TARGET_MULTI)
683 	  arc_multcost = COSTS_N_INSNS (1);
684     }
685   /* Note: arc_multcost is only used in rtx_cost if speed is true.  */
686   if (arc_multcost < 0)
687     switch (arc_tune)
688       {
689       case TUNE_ARC700_4_2_STD:
690 	/* latency 7;
691 	   max throughput (1 multiply + 4 other insns) / 5 cycles.  */
692 	arc_multcost = COSTS_N_INSNS (4);
693 	if (TARGET_NOMPY_SET)
694 	  arc_multcost = COSTS_N_INSNS (30);
695 	break;
696       case TUNE_ARC700_4_2_XMAC:
697 	/* latency 5;
698 	   max throughput (1 multiply + 2 other insns) / 3 cycles.  */
699 	arc_multcost = COSTS_N_INSNS (3);
700 	if (TARGET_NOMPY_SET)
701 	  arc_multcost = COSTS_N_INSNS (30);
702 	break;
703       case TUNE_ARC600:
704 	if (TARGET_MUL64_SET)
705 	  {
706 	    arc_multcost = COSTS_N_INSNS (4);
707 	    break;
708 	  }
709 	/* Fall through.  */
710       default:
711 	arc_multcost = COSTS_N_INSNS (30);
712 	break;
713       }
714 
715   /* MPY instructions valid only for ARC700 or ARCv2.  */
716   if (TARGET_NOMPY_SET && TARGET_ARC600_FAMILY)
717       error ("-mno-mpy supported only for ARC700 or ARCv2");
718 
719   if (!TARGET_DPFP && TARGET_DPFP_DISABLE_LRSR)
720       error ("-mno-dpfp-lrsr supported only with -mdpfp");
721 
722   /* FPX-1. No fast and compact together.  */
723   if ((TARGET_DPFP_FAST_SET && TARGET_DPFP_COMPACT_SET)
724       || (TARGET_SPFP_FAST_SET && TARGET_SPFP_COMPACT_SET))
725     error ("FPX fast and compact options cannot be specified together");
726 
727   /* FPX-2. No fast-spfp for arc600 or arc601.  */
728   if (TARGET_SPFP_FAST_SET && TARGET_ARC600_FAMILY)
729     error ("-mspfp_fast not available on ARC600 or ARC601");
730 
731   /* FPX-4.  No FPX extensions mixed with FPU extensions.  */
732   if ((TARGET_DPFP_FAST_SET || TARGET_DPFP_COMPACT_SET || TARGET_SPFP)
733       && TARGET_HARD_FLOAT)
734     error ("No FPX/FPU mixing allowed");
735 
736   /* Warn for unimplemented PIC in pre-ARC700 cores, and disable flag_pic.  */
737   if (flag_pic && TARGET_ARC600_FAMILY)
738     {
739       warning (DK_WARNING,
740 	       "PIC is not supported for %s. Generating non-PIC code only..",
741 	       arc_cpu_string);
742       flag_pic = 0;
743     }
744 
745   arc_init_reg_tables ();
746 
747   /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P.  */
748   memset (arc_punct_chars, 0, sizeof (arc_punct_chars));
749   arc_punct_chars['#'] = 1;
750   arc_punct_chars['*'] = 1;
751   arc_punct_chars['?'] = 1;
752   arc_punct_chars['!'] = 1;
753   arc_punct_chars['^'] = 1;
754   arc_punct_chars['&'] = 1;
755   arc_punct_chars['+'] = 1;
756   arc_punct_chars['_'] = 1;
757 
758   if (optimize > 1 && !TARGET_NO_COND_EXEC)
759     {
760       /* There are two target-independent ifcvt passes, and arc_reorg may do
761 	 one or more arc_ifcvt calls.  */
762       opt_pass *pass_arc_ifcvt_4 = make_pass_arc_ifcvt (g);
763       struct register_pass_info arc_ifcvt4_info
764 	= { pass_arc_ifcvt_4, "dbr", 1, PASS_POS_INSERT_AFTER };
765       struct register_pass_info arc_ifcvt5_info
766 	= { pass_arc_ifcvt_4->clone (), "shorten", 1, PASS_POS_INSERT_BEFORE };
767 
768       register_pass (&arc_ifcvt4_info);
769       register_pass (&arc_ifcvt5_info);
770     }
771 
772   if (flag_delayed_branch)
773     {
774       opt_pass *pass_arc_predicate_delay_insns
775 	= make_pass_arc_predicate_delay_insns (g);
776       struct register_pass_info arc_predicate_delay_info
777 	= { pass_arc_predicate_delay_insns, "dbr", 1, PASS_POS_INSERT_AFTER };
778 
779       register_pass (&arc_predicate_delay_info);
780     }
781 }
782 
783 /* Check ARC options, generate derived target attributes.  */
784 
785 static void
786 arc_override_options (void)
787 {
788   if (arc_cpu == PROCESSOR_NONE)
789     arc_cpu = TARGET_CPU_DEFAULT;
790 
791   /* Set the default cpu options.  */
792   arc_selected_cpu = &arc_cpu_types[(int) arc_cpu];
793 
794   /* Set the architectures.  */
795   switch (arc_selected_cpu->arch_info->arch_id)
796     {
797     case BASE_ARCH_em:
798       arc_cpu_string = "EM";
799       break;
800     case BASE_ARCH_hs:
801       arc_cpu_string = "HS";
802       break;
803     case BASE_ARCH_700:
804       if (arc_selected_cpu->processor == PROCESSOR_nps400)
805 	arc_cpu_string = "NPS400";
806       else
807 	arc_cpu_string = "ARC700";
808       break;
809     case BASE_ARCH_6xx:
810       arc_cpu_string = "ARC600";
811       break;
812     default:
813       gcc_unreachable ();
814     }
815 
816   /* Set cpu flags accordingly to architecture/selected cpu.  The cpu
817      specific flags are set in arc-common.c.  The architecture forces
818      the default hardware configurations in, regardless what command
819      line options are saying.  The CPU optional hw options can be
820      turned on or off.  */
821 #define ARC_OPT(NAME, CODE, MASK, DOC)			\
822   do {							\
823     if ((arc_selected_cpu->flags & CODE)		\
824 	&& ((target_flags_explicit & MASK) == 0))	\
825       target_flags |= MASK;				\
826     if (arc_selected_cpu->arch_info->dflags & CODE)	\
827       target_flags |= MASK;				\
828   } while (0);
829 #define ARC_OPTX(NAME, CODE, VAR, VAL, DOC)		\
830   do {							\
831     if ((arc_selected_cpu->flags & CODE)		\
832 	&& (VAR == DEFAULT_##VAR))			\
833       VAR = VAL;					\
834     if (arc_selected_cpu->arch_info->dflags & CODE)	\
835       VAR = VAL;					\
836   } while (0);
837 
838 #include "arc-options.def"
839 
840 #undef ARC_OPTX
841 #undef ARC_OPT
842 
843   /* Check options against architecture options.  Throw an error if
844      option is not allowed.  */
845 #define ARC_OPTX(NAME, CODE, VAR, VAL, DOC)			\
846   do {								\
847     if ((VAR == VAL)						\
848 	&& (!(arc_selected_cpu->arch_info->flags & CODE)))	\
849       {								\
850 	error ("%s is not available for %s architecture",	\
851 	       DOC, arc_selected_cpu->arch_info->name);		\
852       }								\
853   } while (0);
854 #define ARC_OPT(NAME, CODE, MASK, DOC)				\
855   do {								\
856     if ((target_flags & MASK)					\
857 	&& (!(arc_selected_cpu->arch_info->flags & CODE)))	\
858       error ("%s is not available for %s architecture",		\
859 	     DOC, arc_selected_cpu->arch_info->name);		\
860   } while (0);
861 
862 #include "arc-options.def"
863 
864 #undef ARC_OPTX
865 #undef ARC_OPT
866 
867   /* Set Tune option.  */
868   if (arc_tune == TUNE_NONE)
869     arc_tune = (enum attr_tune) arc_selected_cpu->tune;
870 
871   if (arc_size_opt_level == 3)
872     optimize_size = 1;
873 
874   /* Compact casesi is not a valid option for ARCv2 family.  */
875   if (TARGET_V2)
876     {
877       if (TARGET_COMPACT_CASESI)
878 	{
879 	  warning (0, "compact-casesi is not applicable to ARCv2");
880 	  TARGET_COMPACT_CASESI = 0;
881 	}
882     }
883   else if (optimize_size == 1
884 	   && !global_options_set.x_TARGET_COMPACT_CASESI)
885     TARGET_COMPACT_CASESI = 1;
886 
887   if (flag_pic)
888     target_flags |= MASK_NO_SDATA_SET;
889 
890   if (flag_no_common == 255)
891     flag_no_common = !TARGET_NO_SDATA_SET;
892 
893   /* TARGET_COMPACT_CASESI needs the "q" register class.  */
894   if (TARGET_MIXED_CODE)
895     TARGET_Q_CLASS = 1;
896   if (!TARGET_Q_CLASS)
897     TARGET_COMPACT_CASESI = 0;
898   if (TARGET_COMPACT_CASESI)
899     TARGET_CASE_VECTOR_PC_RELATIVE = 1;
900 
901   /* These need to be done at start up.  It's convenient to do them here.  */
902   arc_init ();
903 }
904 
905 /* The condition codes of the ARC, and the inverse function.  */
906 /* For short branches, the "c" / "nc" names are not defined in the ARC
907    Programmers manual, so we have to use "lo" / "hs"" instead.  */
908 static const char *arc_condition_codes[] =
909 {
910   "al", 0, "eq", "ne", "p", "n", "lo", "hs", "v", "nv",
911   "gt", "le", "ge", "lt", "hi", "ls", "pnz", 0
912 };
913 
914 enum arc_cc_code_index
915 {
916   ARC_CC_AL, ARC_CC_EQ = ARC_CC_AL+2, ARC_CC_NE, ARC_CC_P, ARC_CC_N,
917   ARC_CC_C,  ARC_CC_NC, ARC_CC_V, ARC_CC_NV,
918   ARC_CC_GT, ARC_CC_LE, ARC_CC_GE, ARC_CC_LT, ARC_CC_HI, ARC_CC_LS, ARC_CC_PNZ,
919   ARC_CC_LO = ARC_CC_C, ARC_CC_HS = ARC_CC_NC
920 };
921 
922 #define ARC_INVERSE_CONDITION_CODE(X)  ((X) ^ 1)
923 
924 /* Returns the index of the ARC condition code string in
925    `arc_condition_codes'.  COMPARISON should be an rtx like
926    `(eq (...) (...))'.  */
927 
928 static int
929 get_arc_condition_code (rtx comparison)
930 {
931   switch (GET_MODE (XEXP (comparison, 0)))
932     {
933     case CCmode:
934     case SImode: /* For BRcc.  */
935       switch (GET_CODE (comparison))
936 	{
937 	case EQ : return ARC_CC_EQ;
938 	case NE : return ARC_CC_NE;
939 	case GT : return ARC_CC_GT;
940 	case LE : return ARC_CC_LE;
941 	case GE : return ARC_CC_GE;
942 	case LT : return ARC_CC_LT;
943 	case GTU : return ARC_CC_HI;
944 	case LEU : return ARC_CC_LS;
945 	case LTU : return ARC_CC_LO;
946 	case GEU : return ARC_CC_HS;
947 	default : gcc_unreachable ();
948 	}
949     case CC_ZNmode:
950       switch (GET_CODE (comparison))
951 	{
952 	case EQ : return ARC_CC_EQ;
953 	case NE : return ARC_CC_NE;
954 	case GE: return ARC_CC_P;
955 	case LT: return ARC_CC_N;
956 	case GT : return ARC_CC_PNZ;
957 	default : gcc_unreachable ();
958 	}
959     case CC_Zmode:
960       switch (GET_CODE (comparison))
961 	{
962 	case EQ : return ARC_CC_EQ;
963 	case NE : return ARC_CC_NE;
964 	default : gcc_unreachable ();
965 	}
966     case CC_Cmode:
967       switch (GET_CODE (comparison))
968 	{
969 	case LTU : return ARC_CC_C;
970 	case GEU : return ARC_CC_NC;
971 	default : gcc_unreachable ();
972 	}
973     case CC_FP_GTmode:
974       if (TARGET_ARGONAUT_SET && TARGET_SPFP)
975 	switch (GET_CODE (comparison))
976 	  {
977 	  case GT  : return ARC_CC_N;
978 	  case UNLE: return ARC_CC_P;
979 	  default : gcc_unreachable ();
980 	}
981       else
982 	switch (GET_CODE (comparison))
983 	  {
984 	  case GT   : return ARC_CC_HI;
985 	  case UNLE : return ARC_CC_LS;
986 	  default : gcc_unreachable ();
987 	}
988     case CC_FP_GEmode:
989       /* Same for FPX and non-FPX.  */
990       switch (GET_CODE (comparison))
991 	{
992 	case GE   : return ARC_CC_HS;
993 	case UNLT : return ARC_CC_LO;
994 	default : gcc_unreachable ();
995 	}
996     case CC_FP_UNEQmode:
997       switch (GET_CODE (comparison))
998 	{
999 	case UNEQ : return ARC_CC_EQ;
1000 	case LTGT : return ARC_CC_NE;
1001 	default : gcc_unreachable ();
1002 	}
1003     case CC_FP_ORDmode:
1004       switch (GET_CODE (comparison))
1005 	{
1006 	case UNORDERED : return ARC_CC_C;
1007 	case ORDERED   : return ARC_CC_NC;
1008 	default : gcc_unreachable ();
1009 	}
1010     case CC_FPXmode:
1011       switch (GET_CODE (comparison))
1012 	{
1013 	case EQ        : return ARC_CC_EQ;
1014 	case NE        : return ARC_CC_NE;
1015 	case UNORDERED : return ARC_CC_C;
1016 	case ORDERED   : return ARC_CC_NC;
1017 	case LTGT      : return ARC_CC_HI;
1018 	case UNEQ      : return ARC_CC_LS;
1019 	default : gcc_unreachable ();
1020 	}
1021     case CC_FPUmode:
1022       switch (GET_CODE (comparison))
1023 	{
1024 	case EQ	       : return ARC_CC_EQ;
1025 	case NE	       : return ARC_CC_NE;
1026 	case GT	       : return ARC_CC_GT;
1027 	case GE	       : return ARC_CC_GE;
1028 	case LT	       : return ARC_CC_C;
1029 	case LE	       : return ARC_CC_LS;
1030 	case UNORDERED : return ARC_CC_V;
1031 	case ORDERED   : return ARC_CC_NV;
1032 	case UNGT      : return ARC_CC_HI;
1033 	case UNGE      : return ARC_CC_HS;
1034 	case UNLT      : return ARC_CC_LT;
1035 	case UNLE      : return ARC_CC_LE;
1036 	  /* UNEQ and LTGT do not have representation.  */
1037 	case LTGT      : /* Fall through.  */
1038 	case UNEQ      : /* Fall through.  */
1039 	default : gcc_unreachable ();
1040 	}
1041     case CC_FPU_UNEQmode:
1042       switch (GET_CODE (comparison))
1043 	{
1044 	case LTGT : return ARC_CC_NE;
1045 	case UNEQ : return ARC_CC_EQ;
1046 	default : gcc_unreachable ();
1047 	}
1048     default : gcc_unreachable ();
1049     }
1050   /*NOTREACHED*/
1051   return (42);
1052 }
1053 
1054 /* Return true if COMPARISON has a short form that can accomodate OFFSET.  */
1055 
1056 bool
1057 arc_short_comparison_p (rtx comparison, int offset)
1058 {
1059   gcc_assert (ARC_CC_NC == ARC_CC_HS);
1060   gcc_assert (ARC_CC_C == ARC_CC_LO);
1061   switch (get_arc_condition_code (comparison))
1062     {
1063     case ARC_CC_EQ: case ARC_CC_NE:
1064       return offset >= -512 && offset <= 506;
1065     case ARC_CC_GT: case ARC_CC_LE: case ARC_CC_GE: case ARC_CC_LT:
1066     case ARC_CC_HI: case ARC_CC_LS: case ARC_CC_LO: case ARC_CC_HS:
1067       return offset >= -64 && offset <= 58;
1068     default:
1069       return false;
1070     }
1071 }
1072 
1073 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
1074    return the mode to be used for the comparison.  */
1075 
1076 machine_mode
1077 arc_select_cc_mode (enum rtx_code op, rtx x, rtx y)
1078 {
1079   machine_mode mode = GET_MODE (x);
1080   rtx x1;
1081 
1082   /* For an operation that sets the condition codes as a side-effect, the
1083      C and V flags is not set as for cmp, so we can only use comparisons where
1084      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
1085      instead.)  */
1086   /* ??? We could use "pnz" for greater than zero, however, we could then
1087      get into trouble because the comparison could not be reversed.  */
1088   if (GET_MODE_CLASS (mode) == MODE_INT
1089       && y == const0_rtx
1090       && (op == EQ || op == NE
1091 	  || ((op == LT || op == GE) && GET_MODE_SIZE (GET_MODE (x)) <= 4)))
1092     return CC_ZNmode;
1093 
1094   /* add.f for if (a+b) */
1095   if (mode == SImode
1096       && GET_CODE (y) == NEG
1097       && (op == EQ || op == NE))
1098     return CC_ZNmode;
1099 
1100   /* Check if this is a test suitable for bxor.f .  */
1101   if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y)
1102       && ((INTVAL (y) - 1) & INTVAL (y)) == 0
1103       && INTVAL (y))
1104     return CC_Zmode;
1105 
1106   /* Check if this is a test suitable for add / bmsk.f .  */
1107   if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y)
1108       && GET_CODE (x) == AND && CONST_INT_P ((x1 = XEXP (x, 1)))
1109       && ((INTVAL (x1) + 1) & INTVAL (x1)) == 0
1110       && (~INTVAL (x1) | INTVAL (y)) < 0
1111       && (~INTVAL (x1) | INTVAL (y)) > -0x800)
1112     return CC_Zmode;
1113 
1114   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
1115       && GET_CODE (x) == PLUS
1116       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
1117     return CC_Cmode;
1118 
1119   if (TARGET_ARGONAUT_SET
1120       && ((mode == SFmode && TARGET_SPFP) || (mode == DFmode && TARGET_DPFP)))
1121     switch (op)
1122       {
1123       case EQ: case NE: case UNEQ: case LTGT: case ORDERED: case UNORDERED:
1124 	return CC_FPXmode;
1125       case LT: case UNGE: case GT: case UNLE:
1126 	return CC_FP_GTmode;
1127       case LE: case UNGT: case GE: case UNLT:
1128 	return CC_FP_GEmode;
1129       default: gcc_unreachable ();
1130       }
1131   else if (TARGET_HARD_FLOAT
1132 	   && ((mode == SFmode && TARGET_FP_SP_BASE)
1133 	       || (mode == DFmode && TARGET_FP_DP_BASE)))
1134     switch (op)
1135       {
1136       case EQ:
1137       case NE:
1138       case UNORDERED:
1139       case ORDERED:
1140       case UNLT:
1141       case UNLE:
1142       case UNGT:
1143       case UNGE:
1144       case LT:
1145       case LE:
1146       case GT:
1147       case GE:
1148 	return CC_FPUmode;
1149 
1150       case LTGT:
1151       case UNEQ:
1152 	return CC_FPU_UNEQmode;
1153 
1154       default:
1155 	gcc_unreachable ();
1156       }
1157   else if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_OPTFPE)
1158     {
1159       switch (op)
1160 	{
1161 	case EQ: case NE: return CC_Zmode;
1162 	case LT: case UNGE:
1163 	case GT: case UNLE: return CC_FP_GTmode;
1164 	case LE: case UNGT:
1165 	case GE: case UNLT: return CC_FP_GEmode;
1166 	case UNEQ: case LTGT: return CC_FP_UNEQmode;
1167 	case ORDERED: case UNORDERED: return CC_FP_ORDmode;
1168 	default: gcc_unreachable ();
1169 	}
1170     }
1171   return CCmode;
1172 }
1173 
1174 /* Vectors to keep interesting information about registers where it can easily
1175    be got.  We use to use the actual mode value as the bit number, but there
1176    is (or may be) more than 32 modes now.  Instead we use two tables: one
1177    indexed by hard register number, and one indexed by mode.  */
1178 
1179 /* The purpose of arc_mode_class is to shrink the range of modes so that
1180    they all fit (as bit numbers) in a 32-bit word (again).  Each real mode is
1181    mapped into one arc_mode_class mode.  */
1182 
1183 enum arc_mode_class {
1184   C_MODE,
1185   S_MODE, D_MODE, T_MODE, O_MODE,
1186   SF_MODE, DF_MODE, TF_MODE, OF_MODE,
1187   V_MODE
1188 };
1189 
1190 /* Modes for condition codes.  */
1191 #define C_MODES (1 << (int) C_MODE)
1192 
1193 /* Modes for single-word and smaller quantities.  */
1194 #define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
1195 
1196 /* Modes for double-word and smaller quantities.  */
1197 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
1198 
1199 /* Mode for 8-byte DF values only.  */
1200 #define DF_MODES (1 << DF_MODE)
1201 
1202 /* Modes for quad-word and smaller quantities.  */
1203 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
1204 
1205 /* Modes for 128-bit vectors.  */
1206 #define V_MODES (1 << (int) V_MODE)
1207 
1208 /* Value is 1 if register/mode pair is acceptable on arc.  */
1209 
1210 unsigned int arc_hard_regno_mode_ok[] = {
1211   T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
1212   T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
1213   T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, D_MODES,
1214   D_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1215 
1216   /* ??? Leave these as S_MODES for now.  */
1217   S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1218   DF_MODES, 0, DF_MODES, 0, S_MODES, S_MODES, S_MODES, S_MODES,
1219   S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1220   S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, C_MODES, S_MODES,
1221 
1222   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1223   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1224   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1225   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1226 
1227   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1228   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1229   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1230   V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1231 
1232   S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1233   S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES
1234 };
1235 
1236 unsigned int arc_mode_class [NUM_MACHINE_MODES];
1237 
1238 enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER];
1239 
1240 enum reg_class
1241 arc_preferred_reload_class (rtx, enum reg_class cl)
1242 {
1243   if ((cl) == CHEAP_CORE_REGS  || (cl) == WRITABLE_CORE_REGS)
1244     return GENERAL_REGS;
1245   return cl;
1246 }
1247 
1248 /* Initialize the arc_mode_class array.  */
1249 
1250 static void
1251 arc_init_reg_tables (void)
1252 {
1253   int i;
1254 
1255   for (i = 0; i < NUM_MACHINE_MODES; i++)
1256     {
1257       machine_mode m = (machine_mode) i;
1258 
1259       switch (GET_MODE_CLASS (m))
1260 	{
1261 	case MODE_INT:
1262 	case MODE_PARTIAL_INT:
1263 	case MODE_COMPLEX_INT:
1264 	  if (GET_MODE_SIZE (m) <= 4)
1265 	    arc_mode_class[i] = 1 << (int) S_MODE;
1266 	  else if (GET_MODE_SIZE (m) == 8)
1267 	    arc_mode_class[i] = 1 << (int) D_MODE;
1268 	  else if (GET_MODE_SIZE (m) == 16)
1269 	    arc_mode_class[i] = 1 << (int) T_MODE;
1270 	  else if (GET_MODE_SIZE (m) == 32)
1271 	    arc_mode_class[i] = 1 << (int) O_MODE;
1272 	  else
1273 	    arc_mode_class[i] = 0;
1274 	  break;
1275 	case MODE_FLOAT:
1276 	case MODE_COMPLEX_FLOAT:
1277 	  if (GET_MODE_SIZE (m) <= 4)
1278 	    arc_mode_class[i] = 1 << (int) SF_MODE;
1279 	  else if (GET_MODE_SIZE (m) == 8)
1280 	    arc_mode_class[i] = 1 << (int) DF_MODE;
1281 	  else if (GET_MODE_SIZE (m) == 16)
1282 	    arc_mode_class[i] = 1 << (int) TF_MODE;
1283 	  else if (GET_MODE_SIZE (m) == 32)
1284 	    arc_mode_class[i] = 1 << (int) OF_MODE;
1285 	  else
1286 	    arc_mode_class[i] = 0;
1287 	  break;
1288 	case MODE_VECTOR_INT:
1289 	  if (GET_MODE_SIZE (m) == 4)
1290 	    arc_mode_class[i] = (1 << (int) S_MODE);
1291 	  else if (GET_MODE_SIZE (m) == 8)
1292 	    arc_mode_class[i] = (1 << (int) D_MODE);
1293 	  else
1294 	    arc_mode_class[i] = (1 << (int) V_MODE);
1295 	  break;
1296 	case MODE_CC:
1297 	default:
1298 	  /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so
1299 	     we must explicitly check for them here.  */
1300 	  if (i == (int) CCmode || i == (int) CC_ZNmode || i == (int) CC_Zmode
1301 	      || i == (int) CC_Cmode
1302 	      || i == CC_FP_GTmode || i == CC_FP_GEmode || i == CC_FP_ORDmode
1303 	      || i == CC_FPUmode || i == CC_FPU_UNEQmode)
1304 	    arc_mode_class[i] = 1 << (int) C_MODE;
1305 	  else
1306 	    arc_mode_class[i] = 0;
1307 	  break;
1308 	}
1309     }
1310 }
1311 
1312 /* Core registers 56..59 are used for multiply extension options.
1313    The dsp option uses r56 and r57, these are then named acc1 and acc2.
1314    acc1 is the highpart, and acc2 the lowpart, so which register gets which
1315    number depends on endianness.
1316    The mul64 multiplier options use r57 for mlo, r58 for mmid and r59 for mhi.
1317    Because mlo / mhi form a 64 bit value, we use different gcc internal
1318    register numbers to make them form a register pair as the gcc internals
1319    know it.  mmid gets number 57, if still available, and mlo / mhi get
1320    number 58 and 59, depending on endianness.  We use DBX_REGISTER_NUMBER
1321    to map this back.  */
1322   char rname56[5] = "r56";
1323   char rname57[5] = "r57";
1324   char rname58[5] = "r58";
1325   char rname59[5] = "r59";
1326   char rname29[7] = "ilink1";
1327   char rname30[7] = "ilink2";
1328 
1329 static void
1330 arc_conditional_register_usage (void)
1331 {
1332   int regno;
1333   int i;
1334   int fix_start = 60, fix_end = 55;
1335 
1336   if (TARGET_V2)
1337     {
1338       /* For ARCv2 the core register set is changed.  */
1339       strcpy (rname29, "ilink");
1340       strcpy (rname30, "r30");
1341       fixed_regs[30] = call_used_regs[30] = 1;
1342    }
1343 
1344   if (TARGET_MUL64_SET)
1345     {
1346       fix_start = 57;
1347       fix_end = 59;
1348 
1349       /* We don't provide a name for mmed.  In rtl / assembly resource lists,
1350 	 you are supposed to refer to it as mlo & mhi, e.g
1351 	 (zero_extract:SI (reg:DI 58) (const_int 32) (16)) .
1352 	 In an actual asm instruction, you are of course use mmed.
1353 	 The point of avoiding having a separate register for mmed is that
1354 	 this way, we don't have to carry clobbers of that reg around in every
1355 	 isntruction that modifies mlo and/or mhi.  */
1356       strcpy (rname57, "");
1357       strcpy (rname58, TARGET_BIG_ENDIAN ? "mhi" : "mlo");
1358       strcpy (rname59, TARGET_BIG_ENDIAN ? "mlo" : "mhi");
1359     }
1360 
1361   /* The nature of arc_tp_regno is actually something more like a global
1362      register, however globalize_reg requires a declaration.
1363      We use EPILOGUE_USES to compensate so that sets from
1364      __builtin_set_frame_pointer are not deleted.  */
1365   if (arc_tp_regno != -1)
1366     fixed_regs[arc_tp_regno] = call_used_regs[arc_tp_regno] = 1;
1367 
1368   if (TARGET_MULMAC_32BY16_SET)
1369     {
1370       fix_start = 56;
1371       fix_end = fix_end > 57 ? fix_end : 57;
1372       strcpy (rname56, TARGET_BIG_ENDIAN ? "acc1" : "acc2");
1373       strcpy (rname57, TARGET_BIG_ENDIAN ? "acc2" : "acc1");
1374     }
1375   for (regno = fix_start; regno <= fix_end; regno++)
1376     {
1377       if (!fixed_regs[regno])
1378 	warning (0, "multiply option implies r%d is fixed", regno);
1379       fixed_regs [regno] = call_used_regs[regno] = 1;
1380     }
1381   if (TARGET_Q_CLASS)
1382     {
1383       if (optimize_size)
1384 	{
1385 	  reg_alloc_order[0] = 0;
1386 	  reg_alloc_order[1] = 1;
1387 	  reg_alloc_order[2] = 2;
1388 	  reg_alloc_order[3] = 3;
1389 	  reg_alloc_order[4] = 12;
1390 	  reg_alloc_order[5] = 13;
1391 	  reg_alloc_order[6] = 14;
1392 	  reg_alloc_order[7] = 15;
1393 	  reg_alloc_order[8] = 4;
1394 	  reg_alloc_order[9] = 5;
1395 	  reg_alloc_order[10] = 6;
1396 	  reg_alloc_order[11] = 7;
1397 	  reg_alloc_order[12] = 8;
1398 	  reg_alloc_order[13] = 9;
1399 	  reg_alloc_order[14] = 10;
1400 	  reg_alloc_order[15] = 11;
1401 	}
1402       else
1403 	{
1404 	  reg_alloc_order[2] = 12;
1405 	  reg_alloc_order[3] = 13;
1406 	  reg_alloc_order[4] = 14;
1407 	  reg_alloc_order[5] = 15;
1408 	  reg_alloc_order[6] = 1;
1409 	  reg_alloc_order[7] = 0;
1410 	  reg_alloc_order[8] = 4;
1411 	  reg_alloc_order[9] = 5;
1412 	  reg_alloc_order[10] = 6;
1413 	  reg_alloc_order[11] = 7;
1414 	  reg_alloc_order[12] = 8;
1415 	  reg_alloc_order[13] = 9;
1416 	  reg_alloc_order[14] = 10;
1417 	  reg_alloc_order[15] = 11;
1418 	}
1419     }
1420   if (TARGET_SIMD_SET)
1421     {
1422       int i;
1423       for (i = ARC_FIRST_SIMD_VR_REG; i <= ARC_LAST_SIMD_VR_REG; i++)
1424 	reg_alloc_order [i] = i;
1425       for (i = ARC_FIRST_SIMD_DMA_CONFIG_REG;
1426 	   i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++)
1427 	reg_alloc_order [i] = i;
1428     }
1429   /* For ARC600, lp_count may not be read in an instruction
1430      following immediately after another one setting it to a new value.
1431      There was some discussion on how to enforce scheduling constraints for
1432      processors with missing interlocks on the gcc mailing list:
1433      http://gcc.gnu.org/ml/gcc/2008-05/msg00021.html .
1434      However, we can't actually use this approach, because for ARC the
1435      delay slot scheduling pass is active, which runs after
1436      machine_dependent_reorg.  */
1437   if (TARGET_ARC600)
1438     CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT);
1439   else if (!TARGET_LP_WR_INTERLOCK)
1440     fixed_regs[LP_COUNT] = 1;
1441   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1442     if (!call_used_regs[regno])
1443       CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
1444   for (regno = 32; regno < 60; regno++)
1445     if (!fixed_regs[regno])
1446       SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], regno);
1447   if (!TARGET_ARC600_FAMILY)
1448     {
1449       for (regno = 32; regno <= 60; regno++)
1450 	CLEAR_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], regno);
1451 
1452       /* If they have used -ffixed-lp_count, make sure it takes
1453 	 effect.  */
1454       if (fixed_regs[LP_COUNT])
1455 	{
1456 	  CLEAR_HARD_REG_BIT (reg_class_contents[LPCOUNT_REG], LP_COUNT);
1457 	  CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT);
1458 	  CLEAR_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], LP_COUNT);
1459 
1460 	  /* Instead of taking out SF_MODE like below, forbid it outright.  */
1461 	  arc_hard_regno_mode_ok[60] = 0;
1462 	}
1463       else
1464 	arc_hard_regno_mode_ok[60] = 1 << (int) S_MODE;
1465     }
1466 
1467   /* ARCHS has 64-bit data-path which makes use of the even-odd paired
1468      registers.  */
1469   if (TARGET_HS)
1470     {
1471       for (regno = 1; regno < 32; regno +=2)
1472 	{
1473 	  arc_hard_regno_mode_ok[regno] = S_MODES;
1474 	}
1475     }
1476 
1477   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1478     {
1479       if (i < 29)
1480 	{
1481 	  if ((TARGET_Q_CLASS || TARGET_RRQ_CLASS)
1482 	      && ((i <= 3) || ((i >= 12) && (i <= 15))))
1483 	    arc_regno_reg_class[i] = ARCOMPACT16_REGS;
1484 	  else
1485 	    arc_regno_reg_class[i] = GENERAL_REGS;
1486 	}
1487       else if (i < 60)
1488 	arc_regno_reg_class[i]
1489 	  = (fixed_regs[i]
1490 	     ? (TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i)
1491 		? CHEAP_CORE_REGS : ALL_CORE_REGS)
1492 	     : (((!TARGET_ARC600_FAMILY)
1493 		 && TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i))
1494 		? CHEAP_CORE_REGS : WRITABLE_CORE_REGS));
1495       else
1496 	arc_regno_reg_class[i] = NO_REGS;
1497     }
1498 
1499   /* ARCOMPACT16_REGS is empty, if TARGET_Q_CLASS / TARGET_RRQ_CLASS
1500      has not been activated.  */
1501   if (!TARGET_Q_CLASS && !TARGET_RRQ_CLASS)
1502     CLEAR_HARD_REG_SET(reg_class_contents [ARCOMPACT16_REGS]);
1503   if (!TARGET_Q_CLASS)
1504     CLEAR_HARD_REG_SET(reg_class_contents [AC16_BASE_REGS]);
1505 
1506   gcc_assert (FIRST_PSEUDO_REGISTER >= 144);
1507 
1508   /* Handle Special Registers.  */
1509   arc_regno_reg_class[29] = LINK_REGS; /* ilink1 register.  */
1510   if (!TARGET_V2)
1511     arc_regno_reg_class[30] = LINK_REGS; /* ilink2 register.  */
1512   arc_regno_reg_class[31] = LINK_REGS; /* blink register.  */
1513   arc_regno_reg_class[60] = LPCOUNT_REG;
1514   arc_regno_reg_class[61] = NO_REGS;      /* CC_REG: must be NO_REGS.  */
1515   arc_regno_reg_class[62] = GENERAL_REGS;
1516 
1517   if (TARGET_DPFP)
1518     {
1519       for (i = 40; i < 44; ++i)
1520 	{
1521 	  arc_regno_reg_class[i] = DOUBLE_REGS;
1522 
1523 	  /* Unless they want us to do 'mov d1, 0x00000000' make sure
1524 	     no attempt is made to use such a register as a destination
1525 	     operand in *movdf_insn.  */
1526 	  if (!TARGET_ARGONAUT_SET)
1527 	    {
1528 	    /* Make sure no 'c', 'w', 'W', or 'Rac' constraint is
1529 	       interpreted to mean they can use D1 or D2 in their insn.  */
1530 	    CLEAR_HARD_REG_BIT(reg_class_contents[CHEAP_CORE_REGS       ], i);
1531 	    CLEAR_HARD_REG_BIT(reg_class_contents[ALL_CORE_REGS         ], i);
1532 	    CLEAR_HARD_REG_BIT(reg_class_contents[WRITABLE_CORE_REGS    ], i);
1533 	    CLEAR_HARD_REG_BIT(reg_class_contents[MPY_WRITABLE_CORE_REGS], i);
1534 	    }
1535 	}
1536     }
1537   else
1538     {
1539       /* Disable all DOUBLE_REGISTER settings,
1540 	 if not generating DPFP code.  */
1541       arc_regno_reg_class[40] = ALL_REGS;
1542       arc_regno_reg_class[41] = ALL_REGS;
1543       arc_regno_reg_class[42] = ALL_REGS;
1544       arc_regno_reg_class[43] = ALL_REGS;
1545 
1546       arc_hard_regno_mode_ok[40] = 0;
1547       arc_hard_regno_mode_ok[42] = 0;
1548 
1549       CLEAR_HARD_REG_SET(reg_class_contents [DOUBLE_REGS]);
1550     }
1551 
1552   if (TARGET_SIMD_SET)
1553     {
1554       gcc_assert (ARC_FIRST_SIMD_VR_REG == 64);
1555       gcc_assert (ARC_LAST_SIMD_VR_REG  == 127);
1556 
1557       for (i = ARC_FIRST_SIMD_VR_REG; i <= ARC_LAST_SIMD_VR_REG; i++)
1558 	arc_regno_reg_class [i] =  SIMD_VR_REGS;
1559 
1560       gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_REG == 128);
1561       gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_IN_REG == 128);
1562       gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG == 136);
1563       gcc_assert (ARC_LAST_SIMD_DMA_CONFIG_REG  == 143);
1564 
1565       for (i = ARC_FIRST_SIMD_DMA_CONFIG_REG;
1566 	   i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++)
1567 	arc_regno_reg_class [i] =  SIMD_DMA_CONFIG_REGS;
1568     }
1569 
1570   /* pc : r63 */
1571   arc_regno_reg_class[PROGRAM_COUNTER_REGNO] = GENERAL_REGS;
1572 
1573   /*ARCV2 Accumulator.  */
1574   if (TARGET_V2
1575       && (TARGET_FP_DP_FUSED || TARGET_FP_SP_FUSED))
1576   {
1577     arc_regno_reg_class[ACCL_REGNO] = WRITABLE_CORE_REGS;
1578     arc_regno_reg_class[ACCH_REGNO] = WRITABLE_CORE_REGS;
1579     SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], ACCL_REGNO);
1580     SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], ACCH_REGNO);
1581     SET_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], ACCL_REGNO);
1582     SET_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], ACCH_REGNO);
1583     arc_hard_regno_mode_ok[ACC_REG_FIRST] = D_MODES;
1584   }
1585 }
1586 
1587 /* Handle an "interrupt" attribute; arguments as in
1588    struct attribute_spec.handler.  */
1589 
1590 static tree
1591 arc_handle_interrupt_attribute (tree *, tree name, tree args, int,
1592 				bool *no_add_attrs)
1593 {
1594   gcc_assert (args);
1595 
1596   tree value = TREE_VALUE (args);
1597 
1598   if (TREE_CODE (value) != STRING_CST)
1599     {
1600       warning (OPT_Wattributes,
1601 	       "argument of %qE attribute is not a string constant",
1602 	       name);
1603       *no_add_attrs = true;
1604     }
1605   else if (strcmp (TREE_STRING_POINTER (value), "ilink1")
1606 	   && strcmp (TREE_STRING_POINTER (value), "ilink2")
1607 	   && !TARGET_V2)
1608     {
1609       warning (OPT_Wattributes,
1610 	       "argument of %qE attribute is not \"ilink1\" or \"ilink2\"",
1611 	       name);
1612       *no_add_attrs = true;
1613     }
1614   else if (TARGET_V2
1615 	   && strcmp (TREE_STRING_POINTER (value), "ilink"))
1616     {
1617       warning (OPT_Wattributes,
1618 	       "argument of %qE attribute is not \"ilink\"",
1619 	       name);
1620       *no_add_attrs = true;
1621     }
1622 
1623   return NULL_TREE;
1624 }
1625 
1626 /* Return zero if TYPE1 and TYPE are incompatible, one if they are compatible,
1627    and two if they are nearly compatible (which causes a warning to be
1628    generated).  */
1629 
1630 static int
1631 arc_comp_type_attributes (const_tree type1,
1632 			  const_tree type2)
1633 {
1634   int l1, l2, m1, m2, s1, s2;
1635 
1636   /* Check for mismatch of non-default calling convention.  */
1637   if (TREE_CODE (type1) != FUNCTION_TYPE)
1638     return 1;
1639 
1640   /* Check for mismatched call attributes.  */
1641   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
1642   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
1643   m1 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type1)) != NULL;
1644   m2 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type2)) != NULL;
1645   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
1646   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
1647 
1648   /* Only bother to check if an attribute is defined.  */
1649   if (l1 | l2 | m1 | m2 | s1 | s2)
1650     {
1651       /* If one type has an attribute, the other must have the same attribute.  */
1652       if ((l1 != l2) || (m1 != m2) || (s1 != s2))
1653 	return 0;
1654 
1655       /* Disallow mixed attributes.  */
1656       if (l1 + m1 + s1 > 1)
1657 	return 0;
1658     }
1659 
1660 
1661   return 1;
1662 }
1663 
1664 /* Set the default attributes for TYPE.  */
1665 
1666 void
1667 arc_set_default_type_attributes (tree type ATTRIBUTE_UNUSED)
1668 {
1669   gcc_unreachable();
1670 }
1671 
1672 /* Misc. utilities.  */
1673 
1674 /* X and Y are two things to compare using CODE.  Emit the compare insn and
1675    return the rtx for the cc reg in the proper mode.  */
1676 
1677 rtx
1678 gen_compare_reg (rtx comparison, machine_mode omode)
1679 {
1680   enum rtx_code code = GET_CODE (comparison);
1681   rtx x = XEXP (comparison, 0);
1682   rtx y = XEXP (comparison, 1);
1683   rtx tmp, cc_reg;
1684   machine_mode mode, cmode;
1685 
1686 
1687   cmode = GET_MODE (x);
1688   if (cmode == VOIDmode)
1689     cmode = GET_MODE (y);
1690   gcc_assert (cmode == SImode || cmode == SFmode || cmode == DFmode);
1691   if (cmode == SImode)
1692     {
1693       if (!register_operand (x, SImode))
1694 	{
1695 	  if (register_operand (y, SImode))
1696 	    {
1697 	      tmp = x;
1698 	      x = y;
1699 	      y = tmp;
1700 	      code = swap_condition (code);
1701 	    }
1702 	  else
1703 	    x = copy_to_mode_reg (SImode, x);
1704 	}
1705       if (GET_CODE (y) == SYMBOL_REF && flag_pic)
1706 	y = copy_to_mode_reg (SImode, y);
1707     }
1708   else
1709     {
1710       x = force_reg (cmode, x);
1711       y = force_reg (cmode, y);
1712     }
1713   mode = SELECT_CC_MODE (code, x, y);
1714 
1715   cc_reg = gen_rtx_REG (mode, CC_REG);
1716 
1717   /* ??? FIXME (x-y)==0, as done by both cmpsfpx_raw and
1718      cmpdfpx_raw, is not a correct comparison for floats:
1719         http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
1720    */
1721   if (TARGET_ARGONAUT_SET
1722       && ((cmode == SFmode && TARGET_SPFP) || (cmode == DFmode && TARGET_DPFP)))
1723     {
1724       switch (code)
1725 	{
1726 	case NE: case EQ: case LT: case UNGE: case LE: case UNGT:
1727 	case UNEQ: case LTGT: case ORDERED: case UNORDERED:
1728 	  break;
1729 	case GT: case UNLE: case GE: case UNLT:
1730 	  code = swap_condition (code);
1731 	  tmp = x;
1732 	  x = y;
1733 	  y = tmp;
1734 	  break;
1735 	default:
1736 	  gcc_unreachable ();
1737 	}
1738       if (cmode == SFmode)
1739       {
1740 	emit_insn (gen_cmpsfpx_raw (x, y));
1741       }
1742       else /* DFmode */
1743       {
1744 	/* Accepts Dx regs directly by insns.  */
1745 	emit_insn (gen_cmpdfpx_raw (x, y));
1746       }
1747 
1748       if (mode != CC_FPXmode)
1749 	emit_insn (gen_rtx_SET (cc_reg,
1750 				gen_rtx_COMPARE (mode,
1751 						 gen_rtx_REG (CC_FPXmode, 61),
1752 						 const0_rtx)));
1753     }
1754   else if (TARGET_FPX_QUARK && (cmode == SFmode))
1755     {
1756       switch (code)
1757 	{
1758 	case NE: case EQ: case GT: case UNLE: case GE: case UNLT:
1759 	case UNEQ: case LTGT: case ORDERED: case UNORDERED:
1760 	  break;
1761 	case LT: case UNGE: case LE: case UNGT:
1762 	  code = swap_condition (code);
1763 	  tmp = x;
1764 	  x = y;
1765 	  y = tmp;
1766 	  break;
1767 	default:
1768 	  gcc_unreachable ();
1769 	}
1770 
1771       emit_insn (gen_cmp_quark (cc_reg,
1772 				gen_rtx_COMPARE (mode, x, y)));
1773     }
1774   else if (TARGET_HARD_FLOAT
1775 	   && ((cmode == SFmode && TARGET_FP_SP_BASE)
1776 	       || (cmode == DFmode && TARGET_FP_DP_BASE)))
1777     emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
1778   else if (GET_MODE_CLASS (cmode) == MODE_FLOAT && TARGET_OPTFPE)
1779     {
1780       rtx op0 = gen_rtx_REG (cmode, 0);
1781       rtx op1 = gen_rtx_REG (cmode, GET_MODE_SIZE (cmode) / UNITS_PER_WORD);
1782       bool swap = false;
1783 
1784       switch (code)
1785 	{
1786 	case NE: case EQ: case GT: case UNLE: case GE: case UNLT:
1787 	case UNEQ: case LTGT: case ORDERED: case UNORDERED:
1788 	  break;
1789 	case LT: case UNGE: case LE: case UNGT:
1790 	  code = swap_condition (code);
1791 	  swap = true;
1792 	  break;
1793 	default:
1794 	  gcc_unreachable ();
1795 	}
1796       if (currently_expanding_to_rtl)
1797 	{
1798 	  if (swap)
1799 	    {
1800 	      tmp = x;
1801 	      x = y;
1802 	      y = tmp;
1803 	    }
1804 	  emit_move_insn (op0, x);
1805 	  emit_move_insn (op1, y);
1806 	}
1807       else
1808 	{
1809 	  gcc_assert (rtx_equal_p (op0, x));
1810 	  gcc_assert (rtx_equal_p (op1, y));
1811 	  if (swap)
1812 	    {
1813 	      op0 = y;
1814 	      op1 = x;
1815 	    }
1816 	}
1817       emit_insn (gen_cmp_float (cc_reg, gen_rtx_COMPARE (mode, op0, op1)));
1818     }
1819   else
1820     emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
1821   return gen_rtx_fmt_ee (code, omode, cc_reg, const0_rtx);
1822 }
1823 
1824 /* Return true if VALUE, a const_double, will fit in a limm (4 byte number).
1825    We assume the value can be either signed or unsigned.  */
1826 
1827 bool
1828 arc_double_limm_p (rtx value)
1829 {
1830   HOST_WIDE_INT low, high;
1831 
1832   gcc_assert (GET_CODE (value) == CONST_DOUBLE);
1833 
1834   if (TARGET_DPFP)
1835     return true;
1836 
1837   low = CONST_DOUBLE_LOW (value);
1838   high = CONST_DOUBLE_HIGH (value);
1839 
1840   if (low & 0x80000000)
1841     {
1842       return (((unsigned HOST_WIDE_INT) low <= 0xffffffff && high == 0)
1843 	      || (((low & - (unsigned HOST_WIDE_INT) 0x80000000)
1844 		   == - (unsigned HOST_WIDE_INT) 0x80000000)
1845 		  && high == -1));
1846     }
1847   else
1848     {
1849       return (unsigned HOST_WIDE_INT) low <= 0x7fffffff && high == 0;
1850     }
1851 }
1852 
1853 /* Do any needed setup for a variadic function.  For the ARC, we must
1854    create a register parameter block, and then copy any anonymous arguments
1855    in registers to memory.
1856 
1857    CUM has not been updated for the last named argument which has type TYPE
1858    and mode MODE, and we rely on this fact.  */
1859 
1860 static void
1861 arc_setup_incoming_varargs (cumulative_args_t args_so_far,
1862 			    machine_mode mode, tree type,
1863 			    int *pretend_size, int no_rtl)
1864 {
1865   int first_anon_arg;
1866   CUMULATIVE_ARGS next_cum;
1867 
1868   /* We must treat `__builtin_va_alist' as an anonymous arg.  */
1869 
1870   next_cum = *get_cumulative_args (args_so_far);
1871   arc_function_arg_advance (pack_cumulative_args (&next_cum),
1872 			    mode, type, true);
1873   first_anon_arg = next_cum;
1874 
1875   if (FUNCTION_ARG_REGNO_P (first_anon_arg))
1876     {
1877       /* First anonymous (unnamed) argument is in a reg.  */
1878 
1879       /* Note that first_reg_offset < MAX_ARC_PARM_REGS.  */
1880       int first_reg_offset = first_anon_arg;
1881 
1882       if (!no_rtl)
1883 	{
1884 	  rtx regblock
1885 	    = gen_rtx_MEM (BLKmode, plus_constant (Pmode, arg_pointer_rtx,
1886 			   FIRST_PARM_OFFSET (0)));
1887 	  move_block_from_reg (first_reg_offset, regblock,
1888 			       MAX_ARC_PARM_REGS - first_reg_offset);
1889 	}
1890 
1891       *pretend_size
1892 	= ((MAX_ARC_PARM_REGS - first_reg_offset ) * UNITS_PER_WORD);
1893     }
1894 }
1895 
1896 /* Cost functions.  */
1897 
1898 /* Provide the costs of an addressing mode that contains ADDR.
1899    If ADDR is not a valid address, its cost is irrelevant.  */
1900 
1901 int
1902 arc_address_cost (rtx addr, machine_mode, addr_space_t, bool speed)
1903 {
1904   switch (GET_CODE (addr))
1905     {
1906     case REG :
1907       return speed || satisfies_constraint_Rcq (addr) ? 0 : 1;
1908     case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC:
1909     case PRE_MODIFY: case POST_MODIFY:
1910       return !speed;
1911 
1912     case LABEL_REF :
1913     case SYMBOL_REF :
1914     case CONST :
1915       if (TARGET_NPS_CMEM && cmem_address (addr, SImode))
1916 	return 0;
1917       /* Most likely needs a LIMM.  */
1918       return COSTS_N_INSNS (1);
1919 
1920     case PLUS :
1921       {
1922 	register rtx plus0 = XEXP (addr, 0);
1923 	register rtx plus1 = XEXP (addr, 1);
1924 
1925 	if (GET_CODE (plus0) != REG
1926 	    && (GET_CODE (plus0) != MULT
1927 		|| !CONST_INT_P (XEXP (plus0, 1))
1928 		|| (INTVAL (XEXP (plus0, 1)) != 2
1929 		    && INTVAL (XEXP (plus0, 1)) != 4)))
1930 	  break;
1931 
1932 	switch (GET_CODE (plus1))
1933 	  {
1934 	  case CONST_INT :
1935 	    return (!RTX_OK_FOR_OFFSET_P (SImode, plus1)
1936 		    ? COSTS_N_INSNS (1)
1937 		    : speed
1938 		    ? 0
1939 		    : (satisfies_constraint_Rcq (plus0)
1940 		       && satisfies_constraint_O (plus1))
1941 		    ? 0
1942 		    : 1);
1943 	  case REG:
1944 	    return (speed < 1 ? 0
1945 		    : (satisfies_constraint_Rcq (plus0)
1946 		       && satisfies_constraint_Rcq (plus1))
1947 		    ? 0 : 1);
1948 	  case CONST :
1949 	  case SYMBOL_REF :
1950 	  case LABEL_REF :
1951 	    return COSTS_N_INSNS (1);
1952 	  default:
1953 	    break;
1954 	  }
1955 	break;
1956       }
1957     default:
1958       break;
1959     }
1960 
1961   return 4;
1962 }
1963 
1964 /* Emit instruction X with the frame related bit set.  */
1965 
1966 static rtx
1967 frame_insn (rtx x)
1968 {
1969   x = emit_insn (x);
1970   RTX_FRAME_RELATED_P (x) = 1;
1971   return x;
1972 }
1973 
1974 /* Emit a frame insn to move SRC to DST.  */
1975 
1976 static rtx
1977 frame_move (rtx dst, rtx src)
1978 {
1979   rtx tmp = gen_rtx_SET (dst, src);
1980   RTX_FRAME_RELATED_P (tmp) = 1;
1981   return frame_insn (tmp);
1982 }
1983 
1984 /* Like frame_move, but add a REG_INC note for REG if ADDR contains an
1985    auto increment address, or is zero.  */
1986 
1987 static rtx
1988 frame_move_inc (rtx dst, rtx src, rtx reg, rtx addr)
1989 {
1990   rtx insn = frame_move (dst, src);
1991 
1992   if (!addr
1993       || GET_CODE (addr) == PRE_DEC || GET_CODE (addr) == POST_INC
1994       || GET_CODE (addr) == PRE_MODIFY || GET_CODE (addr) == POST_MODIFY)
1995     add_reg_note (insn, REG_INC, reg);
1996   return insn;
1997 }
1998 
1999 /* Emit a frame insn which adjusts a frame address register REG by OFFSET.  */
2000 
2001 static rtx
2002 frame_add (rtx reg, HOST_WIDE_INT offset)
2003 {
2004   gcc_assert ((offset & 0x3) == 0);
2005   if (!offset)
2006     return NULL_RTX;
2007   return frame_move (reg, plus_constant (Pmode, reg, offset));
2008 }
2009 
2010 /* Emit a frame insn which adjusts stack pointer by OFFSET.  */
2011 
2012 static rtx
2013 frame_stack_add (HOST_WIDE_INT offset)
2014 {
2015   return frame_add (stack_pointer_rtx, offset);
2016 }
2017 
2018 /* Traditionally, we push saved registers first in the prologue,
2019    then we allocate the rest of the frame - and reverse in the epilogue.
2020    This has still its merits for ease of debugging, or saving code size
2021    or even execution time if the stack frame is so large that some accesses
2022    can't be encoded anymore with offsets in the instruction code when using
2023    a different scheme.
2024    Also, it would be a good starting point if we got instructions to help
2025    with register save/restore.
2026 
2027    However, often stack frames are small, and the pushing / popping has
2028    some costs:
2029    - the stack modification prevents a lot of scheduling.
2030    - frame allocation / deallocation needs extra instructions.
2031    - unless we know that we compile ARC700 user code, we need to put
2032      a memory barrier after frame allocation / before deallocation to
2033      prevent interrupts clobbering our data in the frame.
2034      In particular, we don't have any such guarantees for library functions,
2035      which tend to, on the other hand, to have small frames.
2036 
2037    Thus, for small frames, we'd like to use a different scheme:
2038    - The frame is allocated in full with the first prologue instruction,
2039      and deallocated in full with the last epilogue instruction.
2040      Thus, the instructions in-betwen can be freely scheduled.
2041    - If the function has no outgoing arguments on the stack, we can allocate
2042      one register save slot at the top of the stack.  This register can then
2043      be saved simultanously with frame allocation, and restored with
2044      frame deallocation.
2045      This register can be picked depending on scheduling considerations,
2046      although same though should go into having some set of registers
2047      to be potentially lingering after a call, and others to be available
2048      immediately - i.e. in the absence of interprocedual optimization, we
2049      can use an ABI-like convention for register allocation to reduce
2050      stalls after function return.  */
2051 /* Function prologue/epilogue handlers.  */
2052 
2053 /* ARCompact stack frames look like:
2054 
2055            Before call                     After call
2056   high  +-----------------------+       +-----------------------+
2057   mem   |  reg parm save area   |       | reg parm save area    |
2058         |  only created for     |       | only created for      |
2059         |  variable arg fns     |       | variable arg fns      |
2060     AP  +-----------------------+       +-----------------------+
2061         |  return addr register |       | return addr register  |
2062         |  (if required)        |       | (if required)         |
2063         +-----------------------+       +-----------------------+
2064         |                       |       |                       |
2065         |  reg save area        |       | reg save area         |
2066         |                       |       |                       |
2067         +-----------------------+       +-----------------------+
2068         |  frame pointer        |       | frame pointer         |
2069         |  (if required)        |       | (if required)         |
2070     FP  +-----------------------+       +-----------------------+
2071         |                       |       |                       |
2072         |  local/temp variables |       | local/temp variables  |
2073         |                       |       |                       |
2074         +-----------------------+       +-----------------------+
2075         |                       |       |                       |
2076         |  arguments on stack   |       | arguments on stack    |
2077         |                       |       |                       |
2078     SP  +-----------------------+       +-----------------------+
2079                                         | reg parm save area    |
2080                                         | only created for      |
2081                                         | variable arg fns      |
2082                                     AP  +-----------------------+
2083                                         | return addr register  |
2084                                         | (if required)         |
2085                                         +-----------------------+
2086                                         |                       |
2087                                         | reg save area         |
2088                                         |                       |
2089                                         +-----------------------+
2090                                         | frame pointer         |
2091                                         | (if required)         |
2092                                     FP  +-----------------------+
2093                                         |                       |
2094                                         | local/temp variables  |
2095                                         |                       |
2096                                         +-----------------------+
2097                                         |                       |
2098                                         | arguments on stack    |
2099   low                                   |                       |
2100   mem                               SP  +-----------------------+
2101 
2102 Notes:
2103 1) The "reg parm save area" does not exist for non variable argument fns.
2104    The "reg parm save area" can be eliminated completely if we created our
2105    own va-arc.h, but that has tradeoffs as well (so it's not done).  */
2106 
2107 /* Structure to be filled in by arc_compute_frame_size with register
2108    save masks, and offsets for the current function.  */
2109 struct GTY (()) arc_frame_info
2110 {
2111   unsigned int total_size;	/* # bytes that the entire frame takes up.  */
2112   unsigned int extra_size;	/* # bytes of extra stuff.  */
2113   unsigned int pretend_size;	/* # bytes we push and pretend caller did.  */
2114   unsigned int args_size;	/* # bytes that outgoing arguments take up.  */
2115   unsigned int reg_size;	/* # bytes needed to store regs.  */
2116   unsigned int var_size;	/* # bytes that variables take up.  */
2117   unsigned int reg_offset;	/* Offset from new sp to store regs.  */
2118   unsigned int gmask;		/* Mask of saved gp registers.  */
2119   int          initialized;	/* Nonzero if frame size already calculated.  */
2120   short millicode_start_reg;
2121   short millicode_end_reg;
2122   bool save_return_addr;
2123 };
2124 
2125 /* Defining data structures for per-function information.  */
2126 
2127 typedef struct GTY (()) machine_function
2128 {
2129   enum arc_function_type fn_type;
2130   struct arc_frame_info frame_info;
2131   /* To keep track of unalignment caused by short insns.  */
2132   int unalign;
2133   int force_short_suffix; /* Used when disgorging return delay slot insns.  */
2134   const char *size_reason;
2135   struct arc_ccfsm ccfsm_current;
2136   /* Map from uid to ccfsm state during branch shortening.  */
2137   rtx ccfsm_current_insn;
2138   char arc_reorg_started;
2139   char prescan_initialized;
2140 } machine_function;
2141 
2142 /* Type of function DECL.
2143 
2144    The result is cached.  To reset the cache at the end of a function,
2145    call with DECL = NULL_TREE.  */
2146 
2147 enum arc_function_type
2148 arc_compute_function_type (struct function *fun)
2149 {
2150   tree decl = fun->decl;
2151   tree a;
2152   enum arc_function_type fn_type = fun->machine->fn_type;
2153 
2154   if (fn_type != ARC_FUNCTION_UNKNOWN)
2155     return fn_type;
2156 
2157   /* Assume we have a normal function (not an interrupt handler).  */
2158   fn_type = ARC_FUNCTION_NORMAL;
2159 
2160   /* Now see if this is an interrupt handler.  */
2161   for (a = DECL_ATTRIBUTES (decl);
2162        a;
2163        a = TREE_CHAIN (a))
2164     {
2165       tree name = TREE_PURPOSE (a), args = TREE_VALUE (a);
2166 
2167       if (name == get_identifier ("interrupt")
2168 	  && list_length (args) == 1
2169 	  && TREE_CODE (TREE_VALUE (args)) == STRING_CST)
2170 	{
2171 	  tree value = TREE_VALUE (args);
2172 
2173 	  if (!strcmp (TREE_STRING_POINTER (value), "ilink1")
2174 	      || !strcmp (TREE_STRING_POINTER (value), "ilink"))
2175 	    fn_type = ARC_FUNCTION_ILINK1;
2176 	  else if (!strcmp (TREE_STRING_POINTER (value), "ilink2"))
2177 	    fn_type = ARC_FUNCTION_ILINK2;
2178 	  else
2179 	    gcc_unreachable ();
2180 	  break;
2181 	}
2182     }
2183 
2184   return fun->machine->fn_type = fn_type;
2185 }
2186 
2187 #define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM))
2188 #define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM))
2189 
2190 /* Tell prologue and epilogue if register REGNO should be saved / restored.
2191    The return address and frame pointer are treated separately.
2192    Don't consider them here.
2193    Addition for pic: The gp register needs to be saved if the current
2194    function changes it to access gotoff variables.
2195    FIXME: This will not be needed if we used some arbitrary register
2196    instead of r26.
2197 */
2198 #define MUST_SAVE_REGISTER(regno, interrupt_p) \
2199 (((regno) != RETURN_ADDR_REGNUM && (regno) != FRAME_POINTER_REGNUM \
2200   && (df_regs_ever_live_p (regno) && (!call_used_regs[regno] || interrupt_p))) \
2201  || (flag_pic && crtl->uses_pic_offset_table \
2202      && regno == PIC_OFFSET_TABLE_REGNUM) )
2203 
2204 #define MUST_SAVE_RETURN_ADDR \
2205   (cfun->machine->frame_info.save_return_addr)
2206 
2207 /* Return non-zero if there are registers to be saved or loaded using
2208    millicode thunks.  We can only use consecutive sequences starting
2209    with r13, and not going beyond r25.
2210    GMASK is a bitmask of registers to save.  This function sets
2211    FRAME->millicod_start_reg .. FRAME->millicode_end_reg to the range
2212    of registers to be saved / restored with a millicode call.  */
2213 
2214 static int
2215 arc_compute_millicode_save_restore_regs (unsigned int gmask,
2216 					 struct arc_frame_info *frame)
2217 {
2218   int regno;
2219 
2220   int start_reg = 13, end_reg = 25;
2221 
2222   for (regno = start_reg; regno <= end_reg && (gmask & (1L << regno));)
2223     regno++;
2224   end_reg = regno - 1;
2225   /* There is no point in using millicode thunks if we don't save/restore
2226      at least three registers.  For non-leaf functions we also have the
2227      blink restore.  */
2228   if (regno - start_reg >= 3 - (crtl->is_leaf == 0))
2229     {
2230       frame->millicode_start_reg = 13;
2231       frame->millicode_end_reg = regno - 1;
2232       return 1;
2233     }
2234   return 0;
2235 }
2236 
2237 /* Return the bytes needed to compute the frame pointer from the current
2238    stack pointer.
2239 
2240    SIZE is the size needed for local variables.  */
2241 
2242 unsigned int
2243 arc_compute_frame_size (int size)	/* size = # of var. bytes allocated.  */
2244 {
2245   int regno;
2246   unsigned int total_size, var_size, args_size, pretend_size, extra_size;
2247   unsigned int reg_size, reg_offset;
2248   unsigned int gmask;
2249   enum arc_function_type fn_type;
2250   int interrupt_p;
2251   struct arc_frame_info *frame_info = &cfun->machine->frame_info;
2252 
2253   size = ARC_STACK_ALIGN (size);
2254 
2255   /* 1) Size of locals and temporaries */
2256   var_size	= size;
2257 
2258   /* 2) Size of outgoing arguments */
2259   args_size	= crtl->outgoing_args_size;
2260 
2261   /* 3) Calculate space needed for saved registers.
2262      ??? We ignore the extension registers for now.  */
2263 
2264   /* See if this is an interrupt handler.  Call used registers must be saved
2265      for them too.  */
2266 
2267   reg_size = 0;
2268   gmask = 0;
2269   fn_type = arc_compute_function_type (cfun);
2270   interrupt_p = ARC_INTERRUPT_P (fn_type);
2271 
2272   for (regno = 0; regno <= 31; regno++)
2273     {
2274       if (MUST_SAVE_REGISTER (regno, interrupt_p))
2275 	{
2276 	  reg_size += UNITS_PER_WORD;
2277 	  gmask |= 1 << regno;
2278 	}
2279     }
2280 
2281   /* 4) Space for back trace data structure.
2282 	<return addr reg size> (if required) + <fp size> (if required).  */
2283   frame_info->save_return_addr
2284     = (!crtl->is_leaf || df_regs_ever_live_p (RETURN_ADDR_REGNUM));
2285   /* Saving blink reg in case of leaf function for millicode thunk calls.  */
2286   if (optimize_size && !TARGET_NO_MILLICODE_THUNK_SET)
2287     {
2288       if (arc_compute_millicode_save_restore_regs (gmask, frame_info))
2289 	frame_info->save_return_addr = true;
2290     }
2291 
2292   extra_size = 0;
2293   if (MUST_SAVE_RETURN_ADDR)
2294     extra_size = 4;
2295   if (frame_pointer_needed)
2296     extra_size += 4;
2297 
2298   /* 5) Space for variable arguments passed in registers */
2299   pretend_size	= crtl->args.pretend_args_size;
2300 
2301   /* Ensure everything before the locals is aligned appropriately.  */
2302     {
2303        unsigned int extra_plus_reg_size;
2304        unsigned int extra_plus_reg_size_aligned;
2305 
2306        extra_plus_reg_size = extra_size + reg_size;
2307        extra_plus_reg_size_aligned = ARC_STACK_ALIGN(extra_plus_reg_size);
2308        reg_size = extra_plus_reg_size_aligned - extra_size;
2309     }
2310 
2311   /* Compute total frame size.  */
2312   total_size = var_size + args_size + extra_size + pretend_size + reg_size;
2313 
2314   total_size = ARC_STACK_ALIGN (total_size);
2315 
2316   /* Compute offset of register save area from stack pointer:
2317      Frame: pretend_size <blink> reg_size <fp> var_size args_size <--sp
2318   */
2319   reg_offset = (total_size - (pretend_size + reg_size + extra_size)
2320 		+ (frame_pointer_needed ? 4 : 0));
2321 
2322   /* Save computed information.  */
2323   frame_info->total_size   = total_size;
2324   frame_info->extra_size   = extra_size;
2325   frame_info->pretend_size = pretend_size;
2326   frame_info->var_size     = var_size;
2327   frame_info->args_size    = args_size;
2328   frame_info->reg_size     = reg_size;
2329   frame_info->reg_offset   = reg_offset;
2330   frame_info->gmask        = gmask;
2331   frame_info->initialized  = reload_completed;
2332 
2333   /* Ok, we're done.  */
2334   return total_size;
2335 }
2336 
2337 /* Common code to save/restore registers.  */
2338 /* BASE_REG is the base register to use for addressing and to adjust.
2339    GMASK is a bitmask of general purpose registers to save/restore.
2340    epilogue_p 0: prologue 1:epilogue 2:epilogue, sibling thunk
2341    If *FIRST_OFFSET is non-zero, add it first to BASE_REG - preferably
2342    using a pre-modify for the first memory access.  *FIRST_OFFSET is then
2343    zeroed.  */
2344 
2345 static void
2346 arc_save_restore (rtx base_reg,
2347 		  unsigned int gmask, int epilogue_p, int *first_offset)
2348 {
2349   unsigned int offset = 0;
2350   int regno;
2351   struct arc_frame_info *frame = &cfun->machine->frame_info;
2352   rtx sibthunk_insn = NULL_RTX;
2353 
2354   if (gmask)
2355     {
2356       /* Millicode thunks implementation:
2357 	 Generates calls to millicodes for registers starting from r13 to r25
2358 	 Present Limitations:
2359 	 - Only one range supported. The remaining regs will have the ordinary
2360 	   st and ld instructions for store and loads. Hence a gmask asking
2361 	   to store r13-14, r16-r25 will only generate calls to store and
2362 	   load r13 to r14 while store and load insns will be generated for
2363 	   r16 to r25 in the prologue and epilogue respectively.
2364 
2365 	 - Presently library only supports register ranges starting from r13.
2366       */
2367       if (epilogue_p == 2 || frame->millicode_end_reg > 14)
2368 	{
2369 	  int start_call = frame->millicode_start_reg;
2370 	  int end_call = frame->millicode_end_reg;
2371 	  int n_regs = end_call - start_call + 1;
2372 	  int i = 0, r, off = 0;
2373 	  rtx insn;
2374 	  rtx ret_addr = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
2375 
2376 	  if (*first_offset)
2377 	    {
2378 	      /* "reg_size" won't be more than 127 .  */
2379 	      gcc_assert (epilogue_p || abs (*first_offset) <= 127);
2380 	      frame_add (base_reg, *first_offset);
2381 	      *first_offset = 0;
2382 	    }
2383 	  insn = gen_rtx_PARALLEL
2384 		  (VOIDmode, rtvec_alloc ((epilogue_p == 2) + n_regs + 1));
2385 	  if (epilogue_p == 2)
2386 	    i += 2;
2387 	  else
2388 	    XVECEXP (insn, 0, n_regs) = gen_rtx_CLOBBER (VOIDmode, ret_addr);
2389 	  for (r = start_call; r <= end_call; r++, off += UNITS_PER_WORD, i++)
2390 	    {
2391 	      rtx reg = gen_rtx_REG (SImode, r);
2392 	      rtx mem
2393 		= gen_frame_mem (SImode, plus_constant (Pmode, base_reg, off));
2394 
2395 	      if (epilogue_p)
2396 		XVECEXP (insn, 0, i) = gen_rtx_SET (reg, mem);
2397 	      else
2398 		XVECEXP (insn, 0, i) = gen_rtx_SET (mem, reg);
2399 	      gmask = gmask & ~(1L << r);
2400 	    }
2401 	  if (epilogue_p == 2)
2402 	    sibthunk_insn = insn;
2403 	  else
2404 	    {
2405 	      insn = frame_insn (insn);
2406 	      if (epilogue_p)
2407 		for (r = start_call; r <= end_call; r++)
2408 		  {
2409 		    rtx reg = gen_rtx_REG (SImode, r);
2410 		    add_reg_note (insn, REG_CFA_RESTORE, reg);
2411 		  }
2412 	    }
2413 	  offset += off;
2414 	}
2415 
2416       for (regno = 0; regno <= 31; regno++)
2417 	{
2418 	  machine_mode mode = SImode;
2419 	  bool found = false;
2420 
2421 	  if (TARGET_LL64
2422 	      && (regno % 2 == 0)
2423 	      && ((gmask & (1L << regno)) != 0)
2424 	      && ((gmask & (1L << (regno+1))) != 0))
2425 	    {
2426 	      found = true;
2427 	      mode  = DImode;
2428 	    }
2429 	  else if ((gmask & (1L << regno)) != 0)
2430 	    {
2431 	      found = true;
2432 	      mode  = SImode;
2433 	    }
2434 
2435 	  if (found)
2436 	    {
2437 	      rtx reg = gen_rtx_REG (mode, regno);
2438 	      rtx addr, mem;
2439 	      int cfa_adjust = *first_offset;
2440 
2441 	      if (*first_offset)
2442 		{
2443 		  gcc_assert (!offset);
2444 		  addr = plus_constant (Pmode, base_reg, *first_offset);
2445 		  addr = gen_rtx_PRE_MODIFY (Pmode, base_reg, addr);
2446 		  *first_offset = 0;
2447 		}
2448 	      else
2449 		{
2450 		  gcc_assert (SMALL_INT (offset));
2451 		  addr = plus_constant (Pmode, base_reg, offset);
2452 		}
2453 	      mem = gen_frame_mem (mode, addr);
2454 	      if (epilogue_p)
2455 		{
2456 		  rtx insn =
2457 		    frame_move_inc (reg, mem, base_reg, addr);
2458 		  add_reg_note (insn, REG_CFA_RESTORE, reg);
2459 		  if (cfa_adjust)
2460 		    {
2461 		      enum reg_note note = REG_CFA_ADJUST_CFA;
2462 		      add_reg_note (insn, note,
2463 				    gen_rtx_SET (stack_pointer_rtx,
2464 						 plus_constant (Pmode,
2465 								stack_pointer_rtx,
2466 								cfa_adjust)));
2467 		    }
2468 		}
2469 	      else
2470 		frame_move_inc (mem, reg, base_reg, addr);
2471 	      offset += UNITS_PER_WORD;
2472 	      if (mode == DImode)
2473 		{
2474 		  offset += UNITS_PER_WORD;
2475 		  ++regno;
2476 		}
2477 	    } /* if */
2478 	} /* for */
2479     }/* if */
2480   if (sibthunk_insn)
2481     {
2482       int start_call = frame->millicode_start_reg;
2483       int end_call = frame->millicode_end_reg;
2484       int r;
2485 
2486       rtx r12 = gen_rtx_REG (Pmode, 12);
2487 
2488       frame_insn (gen_rtx_SET (r12, GEN_INT (offset)));
2489       XVECEXP (sibthunk_insn, 0, 0) = ret_rtx;
2490       XVECEXP (sibthunk_insn, 0, 1)
2491 	= gen_rtx_SET (stack_pointer_rtx,
2492 		       gen_rtx_PLUS (Pmode, stack_pointer_rtx, r12));
2493       sibthunk_insn = emit_jump_insn (sibthunk_insn);
2494       RTX_FRAME_RELATED_P (sibthunk_insn) = 1;
2495 
2496       /* Would be nice if we could do this earlier, when the PARALLEL
2497 	 is populated, but these need to be attached after the
2498 	 emit.  */
2499       for (r = start_call; r <= end_call; r++)
2500 	{
2501 	  rtx reg = gen_rtx_REG (SImode, r);
2502 	  add_reg_note (sibthunk_insn, REG_CFA_RESTORE, reg);
2503 	}
2504     }
2505 } /* arc_save_restore */
2506 
2507 
2508 int arc_return_address_regs[4]
2509   = {0, RETURN_ADDR_REGNUM, ILINK1_REGNUM, ILINK2_REGNUM};
2510 
2511 /* Set up the stack and frame pointer (if desired) for the function.  */
2512 
2513 void
2514 arc_expand_prologue (void)
2515 {
2516   int size = get_frame_size ();
2517   unsigned int gmask = cfun->machine->frame_info.gmask;
2518   /*  unsigned int frame_pointer_offset;*/
2519   unsigned int frame_size_to_allocate;
2520   /* (FIXME: The first store will use a PRE_MODIFY; this will usually be r13.
2521      Change the stack layout so that we rather store a high register with the
2522      PRE_MODIFY, thus enabling more short insn generation.)  */
2523   int first_offset = 0;
2524 
2525   size = ARC_STACK_ALIGN (size);
2526 
2527   /* Compute/get total frame size.  */
2528   size = (!cfun->machine->frame_info.initialized
2529 	   ? arc_compute_frame_size (size)
2530 	   : cfun->machine->frame_info.total_size);
2531 
2532   if (flag_stack_usage_info)
2533     current_function_static_stack_size = size;
2534 
2535   /* Keep track of frame size to be allocated.  */
2536   frame_size_to_allocate = size;
2537 
2538   /* These cases shouldn't happen.  Catch them now.  */
2539   gcc_assert (!(size == 0 && gmask));
2540 
2541   /* Allocate space for register arguments if this is a variadic function.  */
2542   if (cfun->machine->frame_info.pretend_size != 0)
2543     {
2544        /* Ensure pretend_size is maximum of 8 * word_size.  */
2545       gcc_assert (cfun->machine->frame_info.pretend_size <= 32);
2546 
2547       frame_stack_add (-(HOST_WIDE_INT)cfun->machine->frame_info.pretend_size);
2548       frame_size_to_allocate -= cfun->machine->frame_info.pretend_size;
2549     }
2550 
2551   /* The home-grown ABI says link register is saved first.  */
2552   if (MUST_SAVE_RETURN_ADDR)
2553     {
2554       rtx ra = gen_rtx_REG (SImode, RETURN_ADDR_REGNUM);
2555       rtx mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
2556 
2557       frame_move_inc (mem, ra, stack_pointer_rtx, 0);
2558       frame_size_to_allocate -= UNITS_PER_WORD;
2559 
2560     } /* MUST_SAVE_RETURN_ADDR */
2561 
2562   /* Save any needed call-saved regs (and call-used if this is an
2563      interrupt handler) for ARCompact ISA.  */
2564   if (cfun->machine->frame_info.reg_size)
2565     {
2566       first_offset = -cfun->machine->frame_info.reg_size;
2567       /* N.B. FRAME_POINTER_MASK and RETURN_ADDR_MASK are cleared in gmask.  */
2568       arc_save_restore (stack_pointer_rtx, gmask, 0, &first_offset);
2569       frame_size_to_allocate -= cfun->machine->frame_info.reg_size;
2570     }
2571 
2572 
2573   /* Save frame pointer if needed.  */
2574   if (frame_pointer_needed)
2575     {
2576       rtx addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2577 			       GEN_INT (-UNITS_PER_WORD + first_offset));
2578       rtx mem = gen_frame_mem (Pmode, gen_rtx_PRE_MODIFY (Pmode,
2579 							  stack_pointer_rtx,
2580 							  addr));
2581       frame_move_inc (mem, frame_pointer_rtx, stack_pointer_rtx, 0);
2582       frame_size_to_allocate -= UNITS_PER_WORD;
2583       first_offset = 0;
2584       frame_move (frame_pointer_rtx, stack_pointer_rtx);
2585     }
2586 
2587   /* ??? We don't handle the case where the saved regs are more than 252
2588      bytes away from sp.  This can be handled by decrementing sp once, saving
2589      the regs, and then decrementing it again.  The epilogue doesn't have this
2590      problem as the `ld' insn takes reg+limm values (though it would be more
2591      efficient to avoid reg+limm).  */
2592 
2593   frame_size_to_allocate -= first_offset;
2594   /* Allocate the stack frame.  */
2595   if (frame_size_to_allocate > 0)
2596     frame_stack_add ((HOST_WIDE_INT) 0 - frame_size_to_allocate);
2597 
2598   /* Setup the gp register, if needed.  */
2599   if (crtl->uses_pic_offset_table)
2600     arc_finalize_pic ();
2601 }
2602 
2603 /* Do any necessary cleanup after a function to restore stack, frame,
2604    and regs.  */
2605 
2606 void
2607 arc_expand_epilogue (int sibcall_p)
2608 {
2609   int size = get_frame_size ();
2610   enum arc_function_type fn_type = arc_compute_function_type (cfun);
2611 
2612   size = ARC_STACK_ALIGN (size);
2613   size = (!cfun->machine->frame_info.initialized
2614 	   ? arc_compute_frame_size (size)
2615 	   : cfun->machine->frame_info.total_size);
2616 
2617   unsigned int pretend_size = cfun->machine->frame_info.pretend_size;
2618   unsigned int frame_size;
2619   unsigned int size_to_deallocate;
2620   int restored;
2621   int can_trust_sp_p = !cfun->calls_alloca;
2622   int first_offset = 0;
2623   int millicode_p = cfun->machine->frame_info.millicode_end_reg > 0;
2624   rtx insn;
2625 
2626   size_to_deallocate = size;
2627 
2628   frame_size = size - (pretend_size +
2629 		       cfun->machine->frame_info.reg_size +
2630 		       cfun->machine->frame_info.extra_size);
2631 
2632   /* ??? There are lots of optimizations that can be done here.
2633      EG: Use fp to restore regs if it's closer.
2634      Maybe in time we'll do them all.  For now, always restore regs from
2635      sp, but don't restore sp if we don't have to.  */
2636 
2637   if (!can_trust_sp_p)
2638     gcc_assert (frame_pointer_needed);
2639 
2640   /* Restore stack pointer to the beginning of saved register area for
2641      ARCompact ISA.  */
2642   if (frame_size)
2643     {
2644       if (frame_pointer_needed)
2645 	frame_move (stack_pointer_rtx, frame_pointer_rtx);
2646       else
2647 	first_offset = frame_size;
2648       size_to_deallocate -= frame_size;
2649     }
2650   else if (!can_trust_sp_p)
2651     frame_stack_add (-frame_size);
2652 
2653 
2654   /* Restore any saved registers.  */
2655   if (frame_pointer_needed)
2656     {
2657       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
2658 
2659       insn = frame_move_inc (frame_pointer_rtx, gen_frame_mem (Pmode, addr),
2660 			     stack_pointer_rtx, 0);
2661       add_reg_note (insn, REG_CFA_RESTORE, frame_pointer_rtx);
2662       add_reg_note (insn, REG_CFA_DEF_CFA,
2663 		    plus_constant (SImode, stack_pointer_rtx,
2664 				   4));
2665       size_to_deallocate -= UNITS_PER_WORD;
2666     }
2667 
2668   /* Load blink after the calls to thunk calls in case of optimize size.  */
2669   if (millicode_p)
2670     {
2671 	  int sibthunk_p = (!sibcall_p
2672 			    && fn_type == ARC_FUNCTION_NORMAL
2673 			    && !cfun->machine->frame_info.pretend_size);
2674 
2675 	  gcc_assert (!(cfun->machine->frame_info.gmask
2676 			& (FRAME_POINTER_MASK | RETURN_ADDR_MASK)));
2677 	  arc_save_restore (stack_pointer_rtx,
2678 			    cfun->machine->frame_info.gmask,
2679 			    1 + sibthunk_p, &first_offset);
2680 	  if (sibthunk_p)
2681 	    return;
2682     }
2683   /* If we are to restore registers, and first_offset would require
2684      a limm to be encoded in a PRE_MODIFY, yet we can add it with a
2685      fast add to the stack pointer, do this now.  */
2686   if ((!SMALL_INT (first_offset)
2687        && cfun->machine->frame_info.gmask
2688        && ((TARGET_ARC700 && !optimize_size)
2689 	    ? first_offset <= 0x800
2690 	    : satisfies_constraint_C2a (GEN_INT (first_offset))))
2691        /* Also do this if we have both gprs and return
2692 	  address to restore, and they both would need a LIMM.  */
2693        || (MUST_SAVE_RETURN_ADDR
2694 	   && !SMALL_INT ((cfun->machine->frame_info.reg_size + first_offset) >> 2)
2695 	   && cfun->machine->frame_info.gmask))
2696     {
2697       frame_stack_add (first_offset);
2698       first_offset = 0;
2699     }
2700   if (MUST_SAVE_RETURN_ADDR)
2701     {
2702       rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
2703       int ra_offs = cfun->machine->frame_info.reg_size + first_offset;
2704       rtx addr = plus_constant (Pmode, stack_pointer_rtx, ra_offs);
2705       HOST_WIDE_INT cfa_adjust = 0;
2706 
2707       /* If the load of blink would need a LIMM, but we can add
2708 	 the offset quickly to sp, do the latter.  */
2709       if (!SMALL_INT (ra_offs >> 2)
2710 	  && !cfun->machine->frame_info.gmask
2711 	  && ((TARGET_ARC700 && !optimize_size)
2712 	       ? ra_offs <= 0x800
2713 	       : satisfies_constraint_C2a (GEN_INT (ra_offs))))
2714 	{
2715 	   size_to_deallocate -= ra_offs - first_offset;
2716 	   first_offset = 0;
2717 	   frame_stack_add (ra_offs);
2718 	   ra_offs = 0;
2719 	   addr = stack_pointer_rtx;
2720 	}
2721       /* See if we can combine the load of the return address with the
2722 	 final stack adjustment.
2723 	 We need a separate load if there are still registers to
2724 	 restore.  We also want a separate load if the combined insn
2725 	 would need a limm, but a separate load doesn't.  */
2726       if (ra_offs
2727 	  && !cfun->machine->frame_info.gmask
2728 	  && (SMALL_INT (ra_offs) || !SMALL_INT (ra_offs >> 2)))
2729 	{
2730 	  addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, addr);
2731 	  cfa_adjust = ra_offs;
2732 	  first_offset = 0;
2733 	  size_to_deallocate -= cfun->machine->frame_info.reg_size;
2734 	}
2735       else if (!ra_offs && size_to_deallocate == UNITS_PER_WORD)
2736 	{
2737 	  addr = gen_rtx_POST_INC (Pmode, addr);
2738 	  cfa_adjust = GET_MODE_SIZE (Pmode);
2739 	  size_to_deallocate = 0;
2740 	}
2741 
2742       insn = frame_move_inc (ra, gen_frame_mem (Pmode, addr),
2743 			     stack_pointer_rtx, addr);
2744       if (cfa_adjust)
2745 	{
2746 	  enum reg_note note = REG_CFA_ADJUST_CFA;
2747 
2748 	  add_reg_note (insn, note,
2749 			gen_rtx_SET (stack_pointer_rtx,
2750 				     plus_constant (SImode, stack_pointer_rtx,
2751 						    cfa_adjust)));
2752 	}
2753       add_reg_note (insn, REG_CFA_RESTORE, ra);
2754     }
2755 
2756   if (!millicode_p)
2757     {
2758        if (cfun->machine->frame_info.reg_size)
2759 	 arc_save_restore (stack_pointer_rtx,
2760 	   /* The zeroing of these two bits is unnecessary, but leave this in for clarity.  */
2761 			   cfun->machine->frame_info.gmask
2762 			   & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK), 1, &first_offset);
2763     }
2764 
2765 
2766   /* The rest of this function does the following:
2767      ARCompact    : handle epilogue_delay, restore sp (phase-2), return
2768   */
2769 
2770   /* Keep track of how much of the stack pointer we've restored.
2771      It makes the following a lot more readable.  */
2772   size_to_deallocate += first_offset;
2773   restored = size - size_to_deallocate;
2774 
2775   if (size > restored)
2776     frame_stack_add (size - restored);
2777 
2778   /* Emit the return instruction.  */
2779   if (sibcall_p == FALSE)
2780     emit_jump_insn (gen_simple_return ());
2781 }
2782 
2783 /* Return the offset relative to the stack pointer where the return address
2784    is stored, or -1 if it is not stored.  */
2785 
2786 int
2787 arc_return_slot_offset ()
2788 {
2789   struct arc_frame_info *afi = &cfun->machine->frame_info;
2790 
2791   return (afi->save_return_addr
2792 	  ? afi->total_size - afi->pretend_size - afi->extra_size : -1);
2793 }
2794 
2795 /* PIC */
2796 
2797 /* Helper to generate unspec constant.  */
2798 
2799 static rtx
2800 arc_unspec_offset (rtx loc, int unspec)
2801 {
2802   return gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (Pmode, gen_rtvec (1, loc),
2803 					       unspec));
2804 }
2805 
2806 /* Emit special PIC prologues and epilogues.  */
2807 /* If the function has any GOTOFF relocations, then the GOTBASE
2808    register has to be setup in the prologue
2809    The instruction needed at the function start for setting up the
2810    GOTBASE register is
2811       add rdest, pc,
2812    ----------------------------------------------------------
2813    The rtl to be emitted for this should be:
2814      set (reg basereg)
2815          (plus (reg pc)
2816                (const (unspec (symref _DYNAMIC) 3)))
2817    ----------------------------------------------------------  */
2818 
2819 static void
2820 arc_finalize_pic (void)
2821 {
2822   rtx pat;
2823   rtx baseptr_rtx = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM);
2824 
2825   if (crtl->uses_pic_offset_table == 0)
2826     return;
2827 
2828   gcc_assert (flag_pic != 0);
2829 
2830   pat = gen_rtx_SYMBOL_REF (Pmode, "_DYNAMIC");
2831   pat = arc_unspec_offset (pat, ARC_UNSPEC_GOT);
2832   pat = gen_rtx_SET (baseptr_rtx, pat);
2833 
2834   emit_insn (pat);
2835 }
2836 
2837 /* !TARGET_BARREL_SHIFTER support.  */
2838 /* Emit a shift insn to set OP0 to OP1 shifted by OP2; CODE specifies what
2839    kind of shift.  */
2840 
2841 void
2842 emit_shift (enum rtx_code code, rtx op0, rtx op1, rtx op2)
2843 {
2844   rtx shift = gen_rtx_fmt_ee (code, SImode, op1, op2);
2845   rtx pat
2846     = ((shift4_operator (shift, SImode) ?  gen_shift_si3 : gen_shift_si3_loop)
2847 	(op0, op1, op2, shift));
2848   emit_insn (pat);
2849 }
2850 
2851 /* Output the assembler code for doing a shift.
2852    We go to a bit of trouble to generate efficient code as the ARC601 only has
2853    single bit shifts.  This is taken from the h8300 port.  We only have one
2854    mode of shifting and can't access individual bytes like the h8300 can, so
2855    this is greatly simplified (at the expense of not generating hyper-
2856    efficient code).
2857 
2858    This function is not used if the variable shift insns are present.  */
2859 
2860 /* FIXME:  This probably can be done using a define_split in arc.md.
2861    Alternately, generate rtx rather than output instructions.  */
2862 
2863 const char *
2864 output_shift (rtx *operands)
2865 {
2866   /*  static int loopend_lab;*/
2867   rtx shift = operands[3];
2868   machine_mode mode = GET_MODE (shift);
2869   enum rtx_code code = GET_CODE (shift);
2870   const char *shift_one;
2871 
2872   gcc_assert (mode == SImode);
2873 
2874   switch (code)
2875     {
2876     case ASHIFT:   shift_one = "add %0,%1,%1"; break;
2877     case ASHIFTRT: shift_one = "asr %0,%1"; break;
2878     case LSHIFTRT: shift_one = "lsr %0,%1"; break;
2879     default:       gcc_unreachable ();
2880     }
2881 
2882   if (GET_CODE (operands[2]) != CONST_INT)
2883     {
2884       output_asm_insn ("and.f lp_count,%2, 0x1f", operands);
2885       goto shiftloop;
2886     }
2887   else
2888     {
2889       int n;
2890 
2891       n = INTVAL (operands[2]);
2892 
2893       /* Only consider the lower 5 bits of the shift count.  */
2894       n = n & 0x1f;
2895 
2896       /* First see if we can do them inline.  */
2897       /* ??? We could get better scheduling & shorter code (using short insns)
2898 	 by using splitters.  Alas, that'd be even more verbose.  */
2899       if (code == ASHIFT && n <= 9 && n > 2
2900 	  && dest_reg_operand (operands[4], SImode))
2901 	{
2902 	  output_asm_insn ("mov %4,0\n\tadd3 %0,%4,%1", operands);
2903 	  for (n -=3 ; n >= 3; n -= 3)
2904 	    output_asm_insn ("add3 %0,%4,%0", operands);
2905 	  if (n == 2)
2906 	    output_asm_insn ("add2 %0,%4,%0", operands);
2907 	  else if (n)
2908 	    output_asm_insn ("add %0,%0,%0", operands);
2909 	}
2910       else if (n <= 4)
2911 	{
2912 	  while (--n >= 0)
2913 	    {
2914 	      output_asm_insn (shift_one, operands);
2915 	      operands[1] = operands[0];
2916 	    }
2917 	}
2918       /* See if we can use a rotate/and.  */
2919       else if (n == BITS_PER_WORD - 1)
2920 	{
2921 	  switch (code)
2922 	    {
2923 	    case ASHIFT :
2924 	      output_asm_insn ("and %0,%1,1\n\tror %0,%0", operands);
2925 	      break;
2926 	    case ASHIFTRT :
2927 	      /* The ARC doesn't have a rol insn.  Use something else.  */
2928 	      output_asm_insn ("add.f 0,%1,%1\n\tsbc %0,%0,%0", operands);
2929 	      break;
2930 	    case LSHIFTRT :
2931 	      /* The ARC doesn't have a rol insn.  Use something else.  */
2932 	      output_asm_insn ("add.f 0,%1,%1\n\trlc %0,0", operands);
2933 	      break;
2934 	    default:
2935 	      break;
2936 	    }
2937 	}
2938       else if (n == BITS_PER_WORD - 2 && dest_reg_operand (operands[4], SImode))
2939 	{
2940 	  switch (code)
2941 	    {
2942 	    case ASHIFT :
2943 	      output_asm_insn ("and %0,%1,3\n\tror %0,%0\n\tror %0,%0", operands);
2944 	      break;
2945 	    case ASHIFTRT :
2946 #if 1 /* Need some scheduling comparisons.  */
2947 	      output_asm_insn ("add.f %4,%1,%1\n\tsbc %0,%0,%0\n\t"
2948 			       "add.f 0,%4,%4\n\trlc %0,%0", operands);
2949 #else
2950 	      output_asm_insn ("add.f %4,%1,%1\n\tbxor %0,%4,31\n\t"
2951 			       "sbc.f %0,%0,%4\n\trlc %0,%0", operands);
2952 #endif
2953 	      break;
2954 	    case LSHIFTRT :
2955 #if 1
2956 	      output_asm_insn ("add.f %4,%1,%1\n\trlc %0,0\n\t"
2957 			       "add.f 0,%4,%4\n\trlc %0,%0", operands);
2958 #else
2959 	      output_asm_insn ("add.f %0,%1,%1\n\trlc.f %0,0\n\t"
2960 			       "and %0,%0,1\n\trlc %0,%0", operands);
2961 #endif
2962 	      break;
2963 	    default:
2964 	      break;
2965 	    }
2966 	}
2967       else if (n == BITS_PER_WORD - 3 && code == ASHIFT)
2968 	output_asm_insn ("and %0,%1,7\n\tror %0,%0\n\tror %0,%0\n\tror %0,%0",
2969 			 operands);
2970       /* Must loop.  */
2971       else
2972 	{
2973 	  operands[2] = GEN_INT (n);
2974 	  output_asm_insn ("mov.f lp_count, %2", operands);
2975 
2976 	shiftloop:
2977 	    {
2978 	      output_asm_insn ("lpnz\t2f", operands);
2979 	      output_asm_insn (shift_one, operands);
2980 	      output_asm_insn ("nop", operands);
2981 	      fprintf (asm_out_file, "2:\t%s end single insn loop\n",
2982 		       ASM_COMMENT_START);
2983 	    }
2984 	}
2985     }
2986 
2987   return "";
2988 }
2989 
2990 /* Nested function support.  */
2991 
2992 /* Directly store VALUE into memory object BLOCK at OFFSET.  */
2993 
2994 static void
2995 emit_store_direct (rtx block, int offset, int value)
2996 {
2997   emit_insn (gen_store_direct (adjust_address (block, SImode, offset),
2998 			       force_reg (SImode,
2999 					  gen_int_mode (value, SImode))));
3000 }
3001 
3002 /* Emit RTL insns to initialize the variable parts of a trampoline.
3003    FNADDR is an RTX for the address of the function's pure code.
3004    CXT is an RTX for the static chain value for the function.  */
3005 /* With potentially multiple shared objects loaded, and multiple stacks
3006    present for multiple thereds where trampolines might reside, a simple
3007    range check will likely not suffice for the profiler to tell if a callee
3008    is a trampoline.  We a speedier check by making the trampoline start at
3009    an address that is not 4-byte aligned.
3010    A trampoline looks like this:
3011 
3012    nop_s	     0x78e0
3013 entry:
3014    ld_s r12,[pcl,12] 0xd403
3015    ld   r11,[pcl,12] 0x170c 700b
3016    j_s [r12]         0x7c00
3017    nop_s	     0x78e0
3018 
3019    The fastest trampoline to execute for trampolines within +-8KB of CTX
3020    would be:
3021    add2 r11,pcl,s12
3022    j [limm]           0x20200f80 limm
3023    and that would also be faster to write to the stack by computing the offset
3024    from CTX to TRAMP at compile time.  However, it would really be better to
3025    get rid of the high cost of cache invalidation when generating trampolines,
3026    which requires that the code part of trampolines stays constant, and
3027    additionally either
3028    - making sure that no executable code but trampolines is on the stack,
3029      no icache entries linger for the area of the stack from when before the
3030      stack was allocated, and allocating trampolines in trampoline-only
3031      cache lines
3032   or
3033    - allocate trampolines fram a special pool of pre-allocated trampolines.  */
3034 
3035 static void
3036 arc_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt)
3037 {
3038   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
3039 
3040   emit_store_direct (tramp, 0, TARGET_BIG_ENDIAN ? 0x78e0d403 : 0xd40378e0);
3041   emit_store_direct (tramp, 4, TARGET_BIG_ENDIAN ? 0x170c700b : 0x700b170c);
3042   emit_store_direct (tramp, 8, TARGET_BIG_ENDIAN ? 0x7c0078e0 : 0x78e07c00);
3043   emit_move_insn (adjust_address (tramp, SImode, 12), fnaddr);
3044   emit_move_insn (adjust_address (tramp, SImode, 16), cxt);
3045   emit_insn (gen_flush_icache (adjust_address (tramp, SImode, 0)));
3046 }
3047 
3048 /* Allow the profiler to easily distinguish trampolines from normal
3049   functions.  */
3050 
3051 static rtx
3052 arc_trampoline_adjust_address (rtx addr)
3053 {
3054   return plus_constant (Pmode, addr, 2);
3055 }
3056 
3057 /* This is set briefly to 1 when we output a ".as" address modifer, and then
3058    reset when we output the scaled address.  */
3059 static int output_scaled = 0;
3060 
3061 /* Print operand X (an rtx) in assembler syntax to file FILE.
3062    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
3063    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
3064 /* In final.c:output_asm_insn:
3065     'l' : label
3066     'a' : address
3067     'c' : constant address if CONSTANT_ADDRESS_P
3068     'n' : negative
3069    Here:
3070     'Z': log2(x+1)-1
3071     'z': log2
3072     'M': log2(~x)
3073     'p': bit Position of lsb
3074     's': size of bit field
3075     '#': condbranch delay slot suffix
3076     '*': jump delay slot suffix
3077     '?' : nonjump-insn suffix for conditional execution or short instruction
3078     '!' : jump / call suffix for conditional execution or short instruction
3079     '`': fold constant inside unary o-perator, re-recognize, and emit.
3080     'd'
3081     'D'
3082     'R': Second word
3083     'S'
3084     'B': Branch comparison operand - suppress sda reference
3085     'H': Most significant word
3086     'L': Least significant word
3087     'A': ASCII decimal representation of floating point value
3088     'U': Load/store update or scaling indicator
3089     'V': cache bypass indicator for volatile
3090     'P'
3091     'F'
3092     '^'
3093     'O': Operator
3094     'o': original symbol - no @ prepending.  */
3095 
3096 void
3097 arc_print_operand (FILE *file, rtx x, int code)
3098 {
3099   switch (code)
3100     {
3101     case 'Z':
3102       if (GET_CODE (x) == CONST_INT)
3103 	fprintf (file, "%d",exact_log2(INTVAL (x) + 1) - 1 );
3104       else
3105 	output_operand_lossage ("invalid operand to %%Z code");
3106 
3107       return;
3108 
3109     case 'z':
3110       if (GET_CODE (x) == CONST_INT)
3111 	fprintf (file, "%d",exact_log2(INTVAL (x)) );
3112       else
3113 	output_operand_lossage ("invalid operand to %%z code");
3114 
3115       return;
3116 
3117     case 'M':
3118       if (GET_CODE (x) == CONST_INT)
3119 	fprintf (file, "%d",exact_log2(~INTVAL (x)) );
3120       else
3121 	output_operand_lossage ("invalid operand to %%M code");
3122 
3123       return;
3124 
3125     case 'p':
3126       if (GET_CODE (x) == CONST_INT)
3127 	fprintf (file, "%d", exact_log2 (INTVAL (x) & -INTVAL (x)));
3128       else
3129 	output_operand_lossage ("invalid operand to %%p code");
3130       return;
3131 
3132     case 's':
3133       if (GET_CODE (x) == CONST_INT)
3134 	{
3135 	  HOST_WIDE_INT i = INTVAL (x);
3136 	  HOST_WIDE_INT s = exact_log2 (i & -i);
3137 	  fprintf (file, "%d", exact_log2 (((0xffffffffUL & i) >> s) + 1));
3138 	}
3139       else
3140 	output_operand_lossage ("invalid operand to %%s code");
3141       return;
3142 
3143     case '#' :
3144       /* Conditional branches depending on condition codes.
3145 	 Note that this is only for branches that were known to depend on
3146 	 condition codes before delay slot scheduling;
3147 	 out-of-range brcc / bbit expansions should use '*'.
3148 	 This distinction is important because of the different
3149 	 allowable delay slot insns and the output of the delay suffix
3150 	 for TARGET_AT_DBR_COND_EXEC.  */
3151     case '*' :
3152       /* Unconditional branches / branches not depending on condition codes.
3153 	 This could also be a CALL_INSN.
3154 	 Output the appropriate delay slot suffix.  */
3155       if (final_sequence && final_sequence->len () != 1)
3156 	{
3157 	  rtx_insn *jump = final_sequence->insn (0);
3158 	  rtx_insn *delay = final_sequence->insn (1);
3159 
3160 	  /* For TARGET_PAD_RETURN we might have grabbed the delay insn.  */
3161 	  if (delay->deleted ())
3162 	    return;
3163 	  if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump))
3164 	    fputs (INSN_FROM_TARGET_P (delay) ? ".d"
3165 		   : TARGET_AT_DBR_CONDEXEC && code == '#' ? ".d"
3166 		   : get_attr_type (jump) == TYPE_RETURN && code == '#' ? ""
3167 		   : ".nd",
3168 		   file);
3169 	  else
3170 	    fputs (".d", file);
3171 	}
3172       return;
3173     case '?' : /* with leading "." */
3174     case '!' : /* without leading "." */
3175       /* This insn can be conditionally executed.  See if the ccfsm machinery
3176 	 says it should be conditionalized.
3177 	 If it shouldn't, we'll check the compact attribute if this insn
3178 	 has a short variant, which may be used depending on code size and
3179 	 alignment considerations.  */
3180       if (current_insn_predicate)
3181 	arc_ccfsm_current.cc
3182 	  = get_arc_condition_code (current_insn_predicate);
3183       if (ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current))
3184 	{
3185 	  /* Is this insn in a delay slot sequence?  */
3186 	  if (!final_sequence || XVECLEN (final_sequence, 0) < 2
3187 	      || current_insn_predicate
3188 	      || CALL_P (final_sequence->insn (0))
3189 	      || simplejump_p (final_sequence->insn (0)))
3190 	    {
3191 	      /* This insn isn't in a delay slot sequence, or conditionalized
3192 		 independently of its position in a delay slot.  */
3193 	      fprintf (file, "%s%s",
3194 		       code == '?' ? "." : "",
3195 		       arc_condition_codes[arc_ccfsm_current.cc]);
3196 	      /* If this is a jump, there are still short variants.  However,
3197 		 only beq_s / bne_s have the same offset range as b_s,
3198 		 and the only short conditional returns are jeq_s and jne_s.  */
3199 	      if (code == '!'
3200 		  && (arc_ccfsm_current.cc == ARC_CC_EQ
3201 		      || arc_ccfsm_current.cc == ARC_CC_NE
3202 		      || 0 /* FIXME: check if branch in 7 bit range.  */))
3203 		output_short_suffix (file);
3204 	    }
3205 	  else if (code == '!') /* Jump with delay slot.  */
3206 	    fputs (arc_condition_codes[arc_ccfsm_current.cc], file);
3207 	  else /* An Instruction in a delay slot of a jump or call.  */
3208 	    {
3209 	      rtx jump = XVECEXP (final_sequence, 0, 0);
3210 	      rtx insn = XVECEXP (final_sequence, 0, 1);
3211 
3212 	      /* If the insn is annulled and is from the target path, we need
3213 		 to inverse the condition test.  */
3214 	      if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump))
3215 		{
3216 		  if (INSN_FROM_TARGET_P (insn))
3217 		    fprintf (file, "%s%s",
3218 			     code == '?' ? "." : "",
3219 			     arc_condition_codes[ARC_INVERSE_CONDITION_CODE (arc_ccfsm_current.cc)]);
3220 		  else
3221 		    fprintf (file, "%s%s",
3222 			     code == '?' ? "." : "",
3223 			     arc_condition_codes[arc_ccfsm_current.cc]);
3224 		  if (arc_ccfsm_current.state == 5)
3225 		    arc_ccfsm_current.state = 0;
3226 		}
3227 	      else
3228 		/* This insn is executed for either path, so don't
3229 		   conditionalize it at all.  */
3230 		output_short_suffix (file);
3231 
3232 	    }
3233 	}
3234       else
3235 	output_short_suffix (file);
3236       return;
3237     case'`':
3238       /* FIXME: fold constant inside unary operator, re-recognize, and emit.  */
3239       gcc_unreachable ();
3240     case 'd' :
3241       fputs (arc_condition_codes[get_arc_condition_code (x)], file);
3242       return;
3243     case 'D' :
3244       fputs (arc_condition_codes[ARC_INVERSE_CONDITION_CODE
3245 				 (get_arc_condition_code (x))],
3246 	     file);
3247       return;
3248     case 'R' :
3249       /* Write second word of DImode or DFmode reference,
3250 	 register or memory.  */
3251       if (GET_CODE (x) == REG)
3252 	fputs (reg_names[REGNO (x)+1], file);
3253       else if (GET_CODE (x) == MEM)
3254 	{
3255 	  fputc ('[', file);
3256 
3257 	  /* Handle possible auto-increment.  For PRE_INC / PRE_DEC /
3258 	    PRE_MODIFY, we will have handled the first word already;
3259 	    For POST_INC / POST_DEC / POST_MODIFY, the access to the
3260 	    first word will be done later.  In either case, the access
3261 	    to the first word will do the modify, and we only have
3262 	    to add an offset of four here.  */
3263 	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
3264 	      || GET_CODE (XEXP (x, 0)) == PRE_DEC
3265 	      || GET_CODE (XEXP (x, 0)) == PRE_MODIFY
3266 	      || GET_CODE (XEXP (x, 0)) == POST_INC
3267 	      || GET_CODE (XEXP (x, 0)) == POST_DEC
3268 	      || GET_CODE (XEXP (x, 0)) == POST_MODIFY)
3269 	    output_address (VOIDmode,
3270 			    plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 4));
3271 	  else if (output_scaled)
3272 	    {
3273 	      rtx addr = XEXP (x, 0);
3274 	      int size = GET_MODE_SIZE (GET_MODE (x));
3275 
3276 	      output_address (VOIDmode,
3277 			      plus_constant (Pmode, XEXP (addr, 0),
3278 					     ((INTVAL (XEXP (addr, 1)) + 4)
3279 					      >> (size == 2 ? 1 : 2))));
3280 	      output_scaled = 0;
3281 	    }
3282 	  else
3283 	    output_address (VOIDmode,
3284 			    plus_constant (Pmode, XEXP (x, 0), 4));
3285 	  fputc (']', file);
3286 	}
3287       else
3288 	output_operand_lossage ("invalid operand to %%R code");
3289       return;
3290     case 'S' :
3291 	/* FIXME: remove %S option.  */
3292 	break;
3293     case 'B' /* Branch or other LIMM ref - must not use sda references.  */ :
3294       if (CONSTANT_P (x))
3295 	{
3296 	  output_addr_const (file, x);
3297 	  return;
3298 	}
3299       break;
3300     case 'H' :
3301     case 'L' :
3302       if (GET_CODE (x) == REG)
3303 	{
3304 	  /* L = least significant word, H = most significant word.  */
3305 	  if ((WORDS_BIG_ENDIAN != 0) ^ (code == 'L'))
3306 	    fputs (reg_names[REGNO (x)], file);
3307 	  else
3308 	    fputs (reg_names[REGNO (x)+1], file);
3309 	}
3310       else if (GET_CODE (x) == CONST_INT
3311 	       || GET_CODE (x) == CONST_DOUBLE)
3312 	{
3313 	  rtx first, second, word;
3314 
3315 	  split_double (x, &first, &second);
3316 
3317 	  if((WORDS_BIG_ENDIAN) == 0)
3318 	    word = (code == 'L' ? first : second);
3319 	  else
3320 	    word = (code == 'L' ? second : first);
3321 
3322 	  fprintf (file, "0x%08" PRIx32, ((uint32_t) INTVAL (word)));
3323 	}
3324       else
3325 	output_operand_lossage ("invalid operand to %%H/%%L code");
3326       return;
3327     case 'A' :
3328       {
3329 	char str[30];
3330 
3331 	gcc_assert (GET_CODE (x) == CONST_DOUBLE
3332 		    && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT);
3333 
3334 	real_to_decimal (str, CONST_DOUBLE_REAL_VALUE (x), sizeof (str), 0, 1);
3335 	fprintf (file, "%s", str);
3336 	return;
3337       }
3338     case 'U' :
3339       /* Output a load/store with update indicator if appropriate.  */
3340       if (GET_CODE (x) == MEM)
3341 	{
3342 	  rtx addr = XEXP (x, 0);
3343 	  switch (GET_CODE (addr))
3344 	    {
3345 	    case PRE_INC: case PRE_DEC: case PRE_MODIFY:
3346 	      fputs (".a", file); break;
3347 	    case POST_INC: case POST_DEC: case POST_MODIFY:
3348 	      fputs (".ab", file); break;
3349 	    case PLUS:
3350 	      /* Are we using a scaled index?  */
3351 	      if (GET_CODE (XEXP (addr, 0)) == MULT)
3352 		fputs (".as", file);
3353 	      /* Can we use a scaled offset?  */
3354 	      else if (CONST_INT_P (XEXP (addr, 1))
3355 		       && GET_MODE_SIZE (GET_MODE (x)) > 1
3356 		       && (!(INTVAL (XEXP (addr, 1))
3357 			     & (GET_MODE_SIZE (GET_MODE (x)) - 1) & 3))
3358 		       /* Does it make a difference?  */
3359 		       && !SMALL_INT_RANGE(INTVAL (XEXP (addr, 1)),
3360 					   GET_MODE_SIZE (GET_MODE (x)) - 2, 0))
3361 		{
3362 		  fputs (".as", file);
3363 		  output_scaled = 1;
3364 		}
3365 	      break;
3366 	    case REG:
3367 	      break;
3368 	    default:
3369 	      gcc_assert (CONSTANT_P (addr)); break;
3370 	    }
3371 	}
3372       else
3373 	output_operand_lossage ("invalid operand to %%U code");
3374       return;
3375     case 'V' :
3376       /* Output cache bypass indicator for a load/store insn.  Volatile memory
3377 	 refs are defined to use the cache bypass mechanism.  */
3378       if (GET_CODE (x) == MEM)
3379 	{
3380 	  if (MEM_VOLATILE_P (x) && !TARGET_VOLATILE_CACHE_SET )
3381 	    fputs (".di", file);
3382 	}
3383       else
3384 	output_operand_lossage ("invalid operand to %%V code");
3385       return;
3386       /* plt code.  */
3387     case 'P':
3388     case 0 :
3389       /* Do nothing special.  */
3390       break;
3391     case 'F':
3392       fputs (reg_names[REGNO (x)]+1, file);
3393       return;
3394     case '^':
3395 	/* This punctuation character is needed because label references are
3396 	printed in the output template using %l. This is a front end
3397 	character, and when we want to emit a '@' before it, we have to use
3398 	this '^'.  */
3399 
3400 	fputc('@',file);
3401 	return;
3402     case 'O':
3403       /* Output an operator.  */
3404       switch (GET_CODE (x))
3405 	{
3406 	case PLUS:	fputs ("add", file); return;
3407 	case SS_PLUS:	fputs ("adds", file); return;
3408 	case AND:	fputs ("and", file); return;
3409 	case IOR:	fputs ("or", file); return;
3410 	case XOR:	fputs ("xor", file); return;
3411 	case MINUS:	fputs ("sub", file); return;
3412 	case SS_MINUS:	fputs ("subs", file); return;
3413 	case ASHIFT:	fputs ("asl", file); return;
3414 	case ASHIFTRT:	fputs ("asr", file); return;
3415 	case LSHIFTRT:	fputs ("lsr", file); return;
3416 	case ROTATERT:	fputs ("ror", file); return;
3417 	case MULT:	fputs ("mpy", file); return;
3418 	case ABS:	fputs ("abs", file); return; /* Unconditional.  */
3419 	case NEG:	fputs ("neg", file); return;
3420 	case SS_NEG:	fputs ("negs", file); return;
3421 	case NOT:	fputs ("not", file); return; /* Unconditional.  */
3422 	case ZERO_EXTEND:
3423 	  fputs ("ext", file); /* bmsk allows predication.  */
3424 	  goto size_suffix;
3425 	case SIGN_EXTEND: /* Unconditional.  */
3426 	  fputs ("sex", file);
3427 	size_suffix:
3428 	  switch (GET_MODE (XEXP (x, 0)))
3429 	    {
3430 	    case QImode: fputs ("b", file); return;
3431 	    case HImode: fputs ("w", file); return;
3432 	    default: break;
3433 	    }
3434 	  break;
3435 	case SS_TRUNCATE:
3436 	  if (GET_MODE (x) != HImode)
3437 	    break;
3438 	  fputs ("sat16", file);
3439 	default: break;
3440 	}
3441       output_operand_lossage ("invalid operand to %%O code"); return;
3442     case 'o':
3443       if (GET_CODE (x) == SYMBOL_REF)
3444 	{
3445 	  assemble_name (file, XSTR (x, 0));
3446 	  return;
3447 	}
3448       break;
3449     case '&':
3450       if (TARGET_ANNOTATE_ALIGN && cfun->machine->size_reason)
3451 	fprintf (file, "; unalign: %d", cfun->machine->unalign);
3452       return;
3453     case '+':
3454       if (TARGET_V2)
3455 	fputs ("m", file);
3456       else
3457 	fputs ("h", file);
3458       return;
3459     case '_':
3460       if (TARGET_V2)
3461 	fputs ("h", file);
3462       else
3463 	fputs ("w", file);
3464       return;
3465     default :
3466       /* Unknown flag.  */
3467       output_operand_lossage ("invalid operand output code");
3468     }
3469 
3470   switch (GET_CODE (x))
3471     {
3472     case REG :
3473       fputs (reg_names[REGNO (x)], file);
3474       break;
3475     case MEM :
3476       {
3477 	rtx addr = XEXP (x, 0);
3478 	int size = GET_MODE_SIZE (GET_MODE (x));
3479 
3480 	fputc ('[', file);
3481 
3482 	switch (GET_CODE (addr))
3483 	  {
3484 	  case PRE_INC: case POST_INC:
3485 	    output_address (VOIDmode,
3486 			    plus_constant (Pmode, XEXP (addr, 0), size)); break;
3487 	  case PRE_DEC: case POST_DEC:
3488 	    output_address (VOIDmode,
3489 			    plus_constant (Pmode, XEXP (addr, 0), -size));
3490 	    break;
3491 	  case PRE_MODIFY: case POST_MODIFY:
3492 	    output_address (VOIDmode, XEXP (addr, 1)); break;
3493 	  case PLUS:
3494 	    if (output_scaled)
3495 	      {
3496 		output_address (VOIDmode,
3497 				plus_constant (Pmode, XEXP (addr, 0),
3498 					       (INTVAL (XEXP (addr, 1))
3499 						>> (size == 2 ? 1 : 2))));
3500 		output_scaled = 0;
3501 	      }
3502 	    else
3503 	      output_address (VOIDmode, addr);
3504 	    break;
3505 	  default:
3506 	    if (flag_pic && CONSTANT_ADDRESS_P (addr))
3507 	      arc_output_pic_addr_const (file, addr, code);
3508 	    else
3509 	      output_address (VOIDmode, addr);
3510 	    break;
3511 	  }
3512 	fputc (']', file);
3513 	break;
3514       }
3515     case CONST_DOUBLE :
3516       /* We handle SFmode constants here as output_addr_const doesn't.  */
3517       if (GET_MODE (x) == SFmode)
3518 	{
3519 	  long l;
3520 
3521 	  REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
3522 	  fprintf (file, "0x%08lx", l);
3523 	  break;
3524 	}
3525       /* FALLTHRU */
3526       /* Let output_addr_const deal with it.  */
3527     default :
3528       if (flag_pic
3529 	  || (GET_CODE (x) == CONST
3530 	      && GET_CODE (XEXP (x, 0)) == UNSPEC
3531 	      && (XINT (XEXP (x, 0), 1) == UNSPEC_TLS_OFF
3532 		  || XINT (XEXP (x, 0), 1) == UNSPEC_TLS_GD))
3533 	  || (GET_CODE (x) == CONST
3534 	      && GET_CODE (XEXP (x, 0)) == PLUS
3535 	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
3536 	      && (XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_TLS_OFF
3537 		  || XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_TLS_GD)))
3538 	arc_output_pic_addr_const (file, x, code);
3539       else
3540 	{
3541 	  /* FIXME: Dirty way to handle @var@sda+const. Shd be handled
3542 	     with asm_output_symbol_ref */
3543 	  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3544 	    {
3545 	      x = XEXP (x, 0);
3546 	      output_addr_const (file, XEXP (x, 0));
3547 	      if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF && SYMBOL_REF_SMALL_P (XEXP (x, 0)))
3548 		fprintf (file, "@sda");
3549 
3550 	      if (GET_CODE (XEXP (x, 1)) != CONST_INT
3551 		  || INTVAL (XEXP (x, 1)) >= 0)
3552 		fprintf (file, "+");
3553 	      output_addr_const (file, XEXP (x, 1));
3554 	    }
3555 	  else
3556 	    output_addr_const (file, x);
3557 	}
3558       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_SMALL_P (x))
3559 	fprintf (file, "@sda");
3560       break;
3561     }
3562 }
3563 
3564 /* Print a memory address as an operand to reference that memory location.  */
3565 
3566 void
3567 arc_print_operand_address (FILE *file , rtx addr)
3568 {
3569   register rtx base, index = 0;
3570 
3571   switch (GET_CODE (addr))
3572     {
3573     case REG :
3574       fputs (reg_names[REGNO (addr)], file);
3575       break;
3576     case SYMBOL_REF :
3577       output_addr_const (file, addr);
3578       if (SYMBOL_REF_SMALL_P (addr))
3579 	fprintf (file, "@sda");
3580       break;
3581     case PLUS :
3582       if (GET_CODE (XEXP (addr, 0)) == MULT)
3583 	index = XEXP (XEXP (addr, 0), 0), base = XEXP (addr, 1);
3584       else if (CONST_INT_P (XEXP (addr, 0)))
3585 	index = XEXP (addr, 0), base = XEXP (addr, 1);
3586       else
3587 	base = XEXP (addr, 0), index = XEXP (addr, 1);
3588 
3589       gcc_assert (OBJECT_P (base));
3590       arc_print_operand_address (file, base);
3591       if (CONSTANT_P (base) && CONST_INT_P (index))
3592 	fputc ('+', file);
3593       else
3594 	fputc (',', file);
3595       gcc_assert (OBJECT_P (index));
3596       arc_print_operand_address (file, index);
3597       break;
3598     case CONST:
3599       {
3600 	rtx c = XEXP (addr, 0);
3601 
3602 	if ((GET_CODE (c) == UNSPEC
3603 	     && (XINT (c, 1) == UNSPEC_TLS_OFF
3604 		 || XINT (c, 1) == UNSPEC_TLS_IE))
3605 	    || (GET_CODE (c) == PLUS
3606 		&& GET_CODE (XEXP (c, 0)) == UNSPEC
3607 		&& (XINT (XEXP (c, 0), 1) == UNSPEC_TLS_OFF
3608 		    || XINT (XEXP (c, 0), 1) == ARC_UNSPEC_GOTOFFPC)))
3609 	  {
3610 	    arc_output_pic_addr_const (file, c, 0);
3611 	    break;
3612 	  }
3613 	gcc_assert (GET_CODE (c) == PLUS);
3614 	gcc_assert (GET_CODE (XEXP (c, 0)) == SYMBOL_REF);
3615 	gcc_assert (GET_CODE (XEXP (c, 1)) == CONST_INT);
3616 
3617 	output_address (VOIDmode, XEXP (addr, 0));
3618 
3619 	break;
3620       }
3621     case PRE_INC :
3622     case PRE_DEC :
3623       /* We shouldn't get here as we've lost the mode of the memory object
3624 	 (which says how much to inc/dec by.  */
3625       gcc_unreachable ();
3626       break;
3627     default :
3628       if (flag_pic)
3629 	arc_output_pic_addr_const (file, addr, 0);
3630       else
3631 	output_addr_const (file, addr);
3632       break;
3633     }
3634 }
3635 
3636 /* Conditional execution support.
3637 
3638    This is based on the ARM port but for now is much simpler.
3639 
3640    A finite state machine takes care of noticing whether or not instructions
3641    can be conditionally executed, and thus decrease execution time and code
3642    size by deleting branch instructions.  The fsm is controlled by
3643    arc_ccfsm_advance (called by arc_final_prescan_insn), and controls the
3644    actions of PRINT_OPERAND.  The patterns in the .md file for the branch
3645    insns also have a hand in this.  */
3646 /* The way we leave dealing with non-anulled or annull-false delay slot
3647    insns to the consumer is awkward.  */
3648 
3649 /* The state of the fsm controlling condition codes are:
3650    0: normal, do nothing special
3651    1: don't output this insn
3652    2: don't output this insn
3653    3: make insns conditional
3654    4: make insns conditional
3655    5: make insn conditional (only for outputting anulled delay slot insns)
3656 
3657    special value for cfun->machine->uid_ccfsm_state:
3658    6: return with but one insn before it since function start / call
3659 
3660    State transitions (state->state by whom, under what condition):
3661    0 -> 1 arc_ccfsm_advance, if insn is a conditional branch skipping over
3662           some instructions.
3663    0 -> 2 arc_ccfsm_advance, if insn is a conditional branch followed
3664           by zero or more non-jump insns and an unconditional branch with
3665 	  the same target label as the condbranch.
3666    1 -> 3 branch patterns, after having not output the conditional branch
3667    2 -> 4 branch patterns, after having not output the conditional branch
3668    0 -> 5 branch patterns, for anulled delay slot insn.
3669    3 -> 0 ASM_OUTPUT_INTERNAL_LABEL, if the `target' label is reached
3670           (the target label has CODE_LABEL_NUMBER equal to
3671 	  arc_ccfsm_target_label).
3672    4 -> 0 arc_ccfsm_advance, if `target' unconditional branch is reached
3673    3 -> 1 arc_ccfsm_advance, finding an 'else' jump skipping over some insns.
3674    5 -> 0 when outputting the delay slot insn
3675 
3676    If the jump clobbers the conditions then we use states 2 and 4.
3677 
3678    A similar thing can be done with conditional return insns.
3679 
3680    We also handle separating branches from sets of the condition code.
3681    This is done here because knowledge of the ccfsm state is required,
3682    we may not be outputting the branch.  */
3683 
3684 /* arc_final_prescan_insn calls arc_ccfsm_advance to adjust arc_ccfsm_current,
3685    before letting final output INSN.  */
3686 
3687 static void
3688 arc_ccfsm_advance (rtx_insn *insn, struct arc_ccfsm *state)
3689 {
3690   /* BODY will hold the body of INSN.  */
3691   register rtx body;
3692 
3693   /* This will be 1 if trying to repeat the trick (ie: do the `else' part of
3694      an if/then/else), and things need to be reversed.  */
3695   int reverse = 0;
3696 
3697   /* If we start with a return insn, we only succeed if we find another one.  */
3698   int seeking_return = 0;
3699 
3700   /* START_INSN will hold the insn from where we start looking.  This is the
3701      first insn after the following code_label if REVERSE is true.  */
3702   rtx_insn *start_insn = insn;
3703 
3704   /* Type of the jump_insn. Brcc insns don't affect ccfsm changes,
3705      since they don't rely on a cmp preceding the.  */
3706   enum attr_type jump_insn_type;
3707 
3708   /* Allow -mdebug-ccfsm to turn this off so we can see how well it does.
3709      We can't do this in macro FINAL_PRESCAN_INSN because its called from
3710      final_scan_insn which has `optimize' as a local.  */
3711   if (optimize < 2 || TARGET_NO_COND_EXEC)
3712     return;
3713 
3714   /* Ignore notes and labels.  */
3715   if (!INSN_P (insn))
3716     return;
3717   body = PATTERN (insn);
3718   /* If in state 4, check if the target branch is reached, in order to
3719      change back to state 0.  */
3720   if (state->state == 4)
3721     {
3722       if (insn == state->target_insn)
3723 	{
3724 	  state->target_insn = NULL;
3725 	  state->state = 0;
3726 	}
3727       return;
3728     }
3729 
3730   /* If in state 3, it is possible to repeat the trick, if this insn is an
3731      unconditional branch to a label, and immediately following this branch
3732      is the previous target label which is only used once, and the label this
3733      branch jumps to is not too far off.  Or in other words "we've done the
3734      `then' part, see if we can do the `else' part."  */
3735   if (state->state == 3)
3736     {
3737       if (simplejump_p (insn))
3738 	{
3739 	  start_insn = next_nonnote_insn (start_insn);
3740 	  if (GET_CODE (start_insn) == BARRIER)
3741 	    {
3742 	      /* ??? Isn't this always a barrier?  */
3743 	      start_insn = next_nonnote_insn (start_insn);
3744 	    }
3745 	  if (GET_CODE (start_insn) == CODE_LABEL
3746 	      && CODE_LABEL_NUMBER (start_insn) == state->target_label
3747 	      && LABEL_NUSES (start_insn) == 1)
3748 	    reverse = TRUE;
3749 	  else
3750 	    return;
3751 	}
3752       else if (GET_CODE (body) == SIMPLE_RETURN)
3753 	{
3754 	  start_insn = next_nonnote_insn (start_insn);
3755 	  if (GET_CODE (start_insn) == BARRIER)
3756 	    start_insn = next_nonnote_insn (start_insn);
3757 	  if (GET_CODE (start_insn) == CODE_LABEL
3758 	      && CODE_LABEL_NUMBER (start_insn) == state->target_label
3759 	      && LABEL_NUSES (start_insn) == 1)
3760 	    {
3761 	      reverse = TRUE;
3762 	      seeking_return = 1;
3763 	    }
3764 	  else
3765 	    return;
3766 	}
3767       else
3768 	return;
3769     }
3770 
3771   if (GET_CODE (insn) != JUMP_INSN
3772       || GET_CODE (PATTERN (insn)) == ADDR_VEC
3773       || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
3774     return;
3775 
3776  /* We can't predicate BRCC or loop ends.
3777     Also, when generating PIC code, and considering a medium range call,
3778     we can't predicate the call.  */
3779   jump_insn_type = get_attr_type (insn);
3780   if (jump_insn_type == TYPE_BRCC
3781       || jump_insn_type == TYPE_BRCC_NO_DELAY_SLOT
3782       || jump_insn_type == TYPE_LOOP_END
3783       || (jump_insn_type == TYPE_CALL && !get_attr_predicable (insn)))
3784     return;
3785 
3786   /* This jump might be paralleled with a clobber of the condition codes,
3787      the jump should always come first.  */
3788   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
3789     body = XVECEXP (body, 0, 0);
3790 
3791   if (reverse
3792       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
3793 	  && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
3794     {
3795       int insns_skipped = 0, fail = FALSE, succeed = FALSE;
3796       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
3797       int then_not_else = TRUE;
3798       /* Nonzero if next insn must be the target label.  */
3799       int next_must_be_target_label_p;
3800       rtx_insn *this_insn = start_insn;
3801       rtx label = 0;
3802 
3803       /* Register the insn jumped to.  */
3804       if (reverse)
3805 	{
3806 	  if (!seeking_return)
3807 	    label = XEXP (SET_SRC (body), 0);
3808 	}
3809       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
3810 	label = XEXP (XEXP (SET_SRC (body), 1), 0);
3811       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
3812 	{
3813 	  label = XEXP (XEXP (SET_SRC (body), 2), 0);
3814 	  then_not_else = FALSE;
3815 	}
3816       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == SIMPLE_RETURN)
3817 	seeking_return = 1;
3818       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == SIMPLE_RETURN)
3819 	{
3820 	  seeking_return = 1;
3821 	  then_not_else = FALSE;
3822 	}
3823       else
3824 	gcc_unreachable ();
3825 
3826       /* If this is a non-annulled branch with a delay slot, there is
3827 	 no need to conditionalize the delay slot.  */
3828       if (NEXT_INSN (PREV_INSN (insn)) != insn
3829 	  && state->state == 0 && !INSN_ANNULLED_BRANCH_P (insn))
3830 	{
3831 	  this_insn = NEXT_INSN (this_insn);
3832 	  gcc_assert (NEXT_INSN (NEXT_INSN (PREV_INSN (start_insn)))
3833 		      == NEXT_INSN (this_insn));
3834 	}
3835       /* See how many insns this branch skips, and what kind of insns.  If all
3836 	 insns are okay, and the label or unconditional branch to the same
3837 	 label is not too far away, succeed.  */
3838       for (insns_skipped = 0, next_must_be_target_label_p = FALSE;
3839 	   !fail && !succeed && insns_skipped < MAX_INSNS_SKIPPED;
3840 	   insns_skipped++)
3841 	{
3842 	  rtx scanbody;
3843 
3844 	  this_insn = next_nonnote_insn (this_insn);
3845 	  if (!this_insn)
3846 	    break;
3847 
3848 	  if (next_must_be_target_label_p)
3849 	    {
3850 	      if (GET_CODE (this_insn) == BARRIER)
3851 		continue;
3852 	      if (GET_CODE (this_insn) == CODE_LABEL
3853 		  && this_insn == label)
3854 		{
3855 		  state->state = 1;
3856 		  succeed = TRUE;
3857 		}
3858 	      else
3859 		fail = TRUE;
3860 	      break;
3861 	    }
3862 
3863 	  switch (GET_CODE (this_insn))
3864 	    {
3865 	    case CODE_LABEL:
3866 	      /* Succeed if it is the target label, otherwise fail since
3867 		 control falls in from somewhere else.  */
3868 	      if (this_insn == label)
3869 		{
3870 		  state->state = 1;
3871 		  succeed = TRUE;
3872 		}
3873 	      else
3874 		fail = TRUE;
3875 	      break;
3876 
3877 	    case BARRIER:
3878 	      /* Succeed if the following insn is the target label.
3879 		 Otherwise fail.
3880 		 If return insns are used then the last insn in a function
3881 		 will be a barrier.  */
3882 	      next_must_be_target_label_p = TRUE;
3883 	      break;
3884 
3885 	    case CALL_INSN:
3886 	      /* Can handle a call insn if there are no insns after it.
3887 		 IE: The next "insn" is the target label.  We don't have to
3888 		 worry about delay slots as such insns are SEQUENCE's inside
3889 		 INSN's.  ??? It is possible to handle such insns though.  */
3890 	      if (get_attr_cond (this_insn) == COND_CANUSE)
3891 		next_must_be_target_label_p = TRUE;
3892 	      else
3893 		fail = TRUE;
3894 	      break;
3895 
3896 	    case JUMP_INSN:
3897 	      scanbody = PATTERN (this_insn);
3898 
3899 	      /* If this is an unconditional branch to the same label, succeed.
3900 		 If it is to another label, do nothing.  If it is conditional,
3901 		 fail.  */
3902 	      /* ??? Probably, the test for the SET and the PC are
3903 		 unnecessary.  */
3904 
3905 	      if (GET_CODE (scanbody) == SET
3906 		  && GET_CODE (SET_DEST (scanbody)) == PC)
3907 		{
3908 		  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
3909 		      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
3910 		    {
3911 		      state->state = 2;
3912 		      succeed = TRUE;
3913 		    }
3914 		  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
3915 		    fail = TRUE;
3916 		  else if (get_attr_cond (this_insn) != COND_CANUSE)
3917 		    fail = TRUE;
3918 		}
3919 	      else if (GET_CODE (scanbody) == SIMPLE_RETURN
3920 		       && seeking_return)
3921 		{
3922 		  state->state = 2;
3923 		  succeed = TRUE;
3924 		}
3925 	      else if (GET_CODE (scanbody) == PARALLEL)
3926 		{
3927 		  if (get_attr_cond (this_insn) != COND_CANUSE)
3928 		    fail = TRUE;
3929 		}
3930 	      break;
3931 
3932 	    case INSN:
3933 	      scanbody = PATTERN (this_insn);
3934 
3935 	      /* We can only do this with insns that can use the condition
3936 		 codes (and don't set them).  */
3937 	      if (GET_CODE (scanbody) == SET
3938 		  || GET_CODE (scanbody) == PARALLEL)
3939 		{
3940 		  if (get_attr_cond (this_insn) != COND_CANUSE)
3941 		    fail = TRUE;
3942 		}
3943 	      /* We can't handle other insns like sequences.  */
3944 	      else
3945 		fail = TRUE;
3946 	      break;
3947 
3948 	    default:
3949 	      break;
3950 	    }
3951 	}
3952 
3953       if (succeed)
3954 	{
3955 	  if ((!seeking_return) && (state->state == 1 || reverse))
3956 	    state->target_label = CODE_LABEL_NUMBER (label);
3957 	  else if (seeking_return || state->state == 2)
3958 	    {
3959 	      while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
3960 		{
3961 		  this_insn = next_nonnote_insn (this_insn);
3962 
3963 		  gcc_assert (!this_insn ||
3964 			      (GET_CODE (this_insn) != BARRIER
3965 			       && GET_CODE (this_insn) != CODE_LABEL));
3966 		}
3967 	      if (!this_insn)
3968 		{
3969 		  /* Oh dear! we ran off the end, give up.  */
3970 		  extract_insn_cached (insn);
3971 		  state->state = 0;
3972 		  state->target_insn = NULL;
3973 		  return;
3974 		}
3975 	      state->target_insn = this_insn;
3976 	    }
3977 	  else
3978 	    gcc_unreachable ();
3979 
3980 	  /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
3981 	     what it was.  */
3982 	  if (!reverse)
3983 	    {
3984 	      state->cond = XEXP (SET_SRC (body), 0);
3985 	      state->cc = get_arc_condition_code (XEXP (SET_SRC (body), 0));
3986 	    }
3987 
3988 	  if (reverse || then_not_else)
3989 	    state->cc = ARC_INVERSE_CONDITION_CODE (state->cc);
3990 	}
3991 
3992       /* Restore recog_operand.  Getting the attributes of other insns can
3993 	 destroy this array, but final.c assumes that it remains intact
3994 	 across this call; since the insn has been recognized already we
3995 	 call insn_extract direct.  */
3996       extract_insn_cached (insn);
3997     }
3998 }
3999 
4000 /* Record that we are currently outputting label NUM with prefix PREFIX.
4001    It it's the label we're looking for, reset the ccfsm machinery.
4002 
4003    Called from ASM_OUTPUT_INTERNAL_LABEL.  */
4004 
4005 static void
4006 arc_ccfsm_at_label (const char *prefix, int num, struct arc_ccfsm *state)
4007 {
4008   if (state->state == 3 && state->target_label == num
4009       && !strcmp (prefix, "L"))
4010     {
4011       state->state = 0;
4012       state->target_insn = NULL;
4013     }
4014 }
4015 
4016 /* We are considering a conditional branch with the condition COND.
4017    Check if we want to conditionalize a delay slot insn, and if so modify
4018    the ccfsm state accordingly.
4019    REVERSE says branch will branch when the condition is false.  */
4020 void
4021 arc_ccfsm_record_condition (rtx cond, bool reverse, rtx_insn *jump,
4022 			    struct arc_ccfsm *state)
4023 {
4024   rtx_insn *seq_insn = NEXT_INSN (PREV_INSN (jump));
4025   if (!state)
4026     state = &arc_ccfsm_current;
4027 
4028   gcc_assert (state->state == 0);
4029   if (seq_insn != jump)
4030     {
4031       rtx insn = XVECEXP (PATTERN (seq_insn), 0, 1);
4032 
4033       if (!as_a<rtx_insn *> (insn)->deleted ()
4034 	  && INSN_ANNULLED_BRANCH_P (jump)
4035 	  && (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (insn)))
4036 	{
4037 	  state->cond = cond;
4038 	  state->cc = get_arc_condition_code (cond);
4039 	  if (!reverse)
4040 	    arc_ccfsm_current.cc
4041 	      = ARC_INVERSE_CONDITION_CODE (state->cc);
4042 	  rtx pat = PATTERN (insn);
4043 	  if (GET_CODE (pat) == COND_EXEC)
4044 	    gcc_assert ((INSN_FROM_TARGET_P (insn)
4045 			 ? ARC_INVERSE_CONDITION_CODE (state->cc) : state->cc)
4046 			== get_arc_condition_code (XEXP (pat, 0)));
4047 	  else
4048 	    state->state = 5;
4049 	}
4050     }
4051 }
4052 
4053 /* Update *STATE as we would when we emit INSN.  */
4054 
4055 static void
4056 arc_ccfsm_post_advance (rtx_insn *insn, struct arc_ccfsm *state)
4057 {
4058   enum attr_type type;
4059 
4060   if (LABEL_P (insn))
4061     arc_ccfsm_at_label ("L", CODE_LABEL_NUMBER (insn), state);
4062   else if (JUMP_P (insn)
4063 	   && GET_CODE (PATTERN (insn)) != ADDR_VEC
4064 	   && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
4065 	   && ((type = get_attr_type (insn)) == TYPE_BRANCH
4066 	       || ((type == TYPE_UNCOND_BRANCH
4067 		    || type == TYPE_RETURN)
4068 		   && ARC_CCFSM_BRANCH_DELETED_P (state))))
4069     {
4070       if (ARC_CCFSM_BRANCH_DELETED_P (state))
4071 	ARC_CCFSM_RECORD_BRANCH_DELETED (state);
4072       else
4073 	{
4074 	  rtx src = SET_SRC (PATTERN (insn));
4075 	  arc_ccfsm_record_condition (XEXP (src, 0), XEXP (src, 1) == pc_rtx,
4076 				      insn, state);
4077 	}
4078     }
4079   else if (arc_ccfsm_current.state == 5)
4080     arc_ccfsm_current.state = 0;
4081 }
4082 
4083 /* Return true if the current insn, which is a conditional branch, is to be
4084    deleted.  */
4085 
4086 bool
4087 arc_ccfsm_branch_deleted_p (void)
4088 {
4089   return ARC_CCFSM_BRANCH_DELETED_P (&arc_ccfsm_current);
4090 }
4091 
4092 /* Record a branch isn't output because subsequent insns can be
4093    conditionalized.  */
4094 
4095 void
4096 arc_ccfsm_record_branch_deleted (void)
4097 {
4098   ARC_CCFSM_RECORD_BRANCH_DELETED (&arc_ccfsm_current);
4099 }
4100 
4101 /* During insn output, indicate if the current insn is predicated.  */
4102 
4103 bool
4104 arc_ccfsm_cond_exec_p (void)
4105 {
4106   return (cfun->machine->prescan_initialized
4107 	  && ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current));
4108 }
4109 
4110 /* Like next_active_insn, but return NULL if we find an ADDR_(DIFF_)VEC,
4111    and look inside SEQUENCEs.  */
4112 
4113 static rtx_insn *
4114 arc_next_active_insn (rtx_insn *insn, struct arc_ccfsm *statep)
4115 {
4116   rtx pat;
4117 
4118   do
4119     {
4120       if (statep)
4121 	arc_ccfsm_post_advance (insn, statep);
4122       insn = NEXT_INSN (insn);
4123       if (!insn || BARRIER_P (insn))
4124 	return NULL;
4125       if (statep)
4126 	arc_ccfsm_advance (insn, statep);
4127     }
4128   while (NOTE_P (insn)
4129 	 || (cfun->machine->arc_reorg_started
4130 	     && LABEL_P (insn) && !label_to_alignment (insn))
4131 	 || (NONJUMP_INSN_P (insn)
4132 	     && (GET_CODE (PATTERN (insn)) == USE
4133 		 || GET_CODE (PATTERN (insn)) == CLOBBER)));
4134   if (!LABEL_P (insn))
4135     {
4136       gcc_assert (INSN_P (insn));
4137       pat = PATTERN (insn);
4138       if (GET_CODE (pat) == ADDR_VEC || GET_CODE (pat) == ADDR_DIFF_VEC)
4139 	return NULL;
4140       if (GET_CODE (pat) == SEQUENCE)
4141 	return as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
4142     }
4143   return insn;
4144 }
4145 
4146 /* When deciding if an insn should be output short, we want to know something
4147    about the following insns:
4148    - if another insn follows which we know we can output as a short insn
4149      before an alignment-sensitive point, we can output this insn short:
4150      the decision about the eventual alignment can be postponed.
4151    - if a to-be-aligned label comes next, we should output this insn such
4152      as to get / preserve 4-byte alignment.
4153    - if a likely branch without delay slot insn, or a call with an immediately
4154      following short insn comes next, we should out output this insn such as to
4155      get / preserve 2 mod 4 unalignment.
4156    - do the same for a not completely unlikely branch with a short insn
4157      following before any other branch / label.
4158    - in order to decide if we are actually looking at a branch, we need to
4159      call arc_ccfsm_advance.
4160    - in order to decide if we are looking at a short insn, we should know
4161      if it is conditionalized.  To a first order of approximation this is
4162      the case if the state from arc_ccfsm_advance from before this insn
4163      indicates the insn is conditionalized.  However, a further refinement
4164      could be to not conditionalize an insn if the destination register(s)
4165      is/are dead in the non-executed case.  */
4166 /* Return non-zero if INSN should be output as a short insn.  UNALIGN is
4167    zero if the current insn is aligned to a 4-byte-boundary, two otherwise.
4168    If CHECK_ATTR is greater than 0, check the iscompact attribute first.  */
4169 
4170 int
4171 arc_verify_short (rtx_insn *insn, int, int check_attr)
4172 {
4173   enum attr_iscompact iscompact;
4174   struct machine_function *machine;
4175 
4176   if (check_attr > 0)
4177     {
4178       iscompact = get_attr_iscompact (insn);
4179       if (iscompact == ISCOMPACT_FALSE)
4180 	return 0;
4181     }
4182   machine = cfun->machine;
4183 
4184   if (machine->force_short_suffix >= 0)
4185     return machine->force_short_suffix;
4186 
4187   return (get_attr_length (insn) & 2) != 0;
4188 }
4189 
4190 /* When outputting an instruction (alternative) that can potentially be short,
4191    output the short suffix if the insn is in fact short, and update
4192    cfun->machine->unalign accordingly.  */
4193 
4194 static void
4195 output_short_suffix (FILE *file)
4196 {
4197   rtx_insn *insn = current_output_insn;
4198 
4199   if (arc_verify_short (insn, cfun->machine->unalign, 1))
4200     {
4201       fprintf (file, "_s");
4202       cfun->machine->unalign ^= 2;
4203     }
4204   /* Restore recog_operand.  */
4205   extract_insn_cached (insn);
4206 }
4207 
4208 /* Implement FINAL_PRESCAN_INSN.  */
4209 
4210 void
4211 arc_final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
4212 			int noperands ATTRIBUTE_UNUSED)
4213 {
4214   if (TARGET_DUMPISIZE)
4215     fprintf (asm_out_file, "\n; at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4216 
4217   /* Output a nop if necessary to prevent a hazard.
4218      Don't do this for delay slots: inserting a nop would
4219      alter semantics, and the only time we would find a hazard is for a
4220      call function result - and in that case, the hazard is spurious to
4221      start with.  */
4222   if (PREV_INSN (insn)
4223       && PREV_INSN (NEXT_INSN (insn)) == insn
4224       && arc_hazard (prev_real_insn (insn), insn))
4225     {
4226       current_output_insn =
4227 	emit_insn_before (gen_nop (), NEXT_INSN (PREV_INSN (insn)));
4228       final_scan_insn (current_output_insn, asm_out_file, optimize, 1, NULL);
4229       current_output_insn = insn;
4230     }
4231   /* Restore extraction data which might have been clobbered by arc_hazard.  */
4232   extract_constrain_insn_cached (insn);
4233 
4234   if (!cfun->machine->prescan_initialized)
4235     {
4236       /* Clear lingering state from branch shortening.  */
4237       memset (&arc_ccfsm_current, 0, sizeof arc_ccfsm_current);
4238       cfun->machine->prescan_initialized = 1;
4239     }
4240   arc_ccfsm_advance (insn, &arc_ccfsm_current);
4241 
4242   cfun->machine->size_reason = 0;
4243 }
4244 
4245 /* Given FROM and TO register numbers, say whether this elimination is allowed.
4246    Frame pointer elimination is automatically handled.
4247 
4248    All eliminations are permissible. If we need a frame
4249    pointer, we must eliminate ARG_POINTER_REGNUM into
4250    FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4251 
4252 static bool
4253 arc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
4254 {
4255   return to == FRAME_POINTER_REGNUM || !arc_frame_pointer_required ();
4256 }
4257 
4258 /* Define the offset between two registers, one to be eliminated, and
4259    the other its replacement, at the start of a routine.  */
4260 
4261 int
4262 arc_initial_elimination_offset (int from, int to)
4263 {
4264   if (! cfun->machine->frame_info.initialized)
4265      arc_compute_frame_size (get_frame_size ());
4266 
4267   if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
4268     {
4269       return (cfun->machine->frame_info.extra_size
4270 	      + cfun->machine->frame_info.reg_size);
4271     }
4272 
4273   if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4274     {
4275       return (cfun->machine->frame_info.total_size
4276 	      - cfun->machine->frame_info.pretend_size);
4277     }
4278 
4279   if ((from == FRAME_POINTER_REGNUM) && (to == STACK_POINTER_REGNUM))
4280     {
4281       return (cfun->machine->frame_info.total_size
4282 	      - (cfun->machine->frame_info.pretend_size
4283 	      + cfun->machine->frame_info.extra_size
4284 	      + cfun->machine->frame_info.reg_size));
4285     }
4286 
4287   gcc_unreachable ();
4288 }
4289 
4290 static bool
4291 arc_frame_pointer_required (void)
4292 {
4293  return cfun->calls_alloca;
4294 }
4295 
4296 
4297 /* Return the destination address of a branch.  */
4298 
4299 int
4300 branch_dest (rtx branch)
4301 {
4302   rtx pat = PATTERN (branch);
4303   rtx dest = (GET_CODE (pat) == PARALLEL
4304 	      ? SET_SRC (XVECEXP (pat, 0, 0)) : SET_SRC (pat));
4305   int dest_uid;
4306 
4307   if (GET_CODE (dest) == IF_THEN_ELSE)
4308     dest = XEXP (dest, XEXP (dest, 1) == pc_rtx ? 2 : 1);
4309 
4310   dest = XEXP (dest, 0);
4311   dest_uid = INSN_UID (dest);
4312 
4313   return INSN_ADDRESSES (dest_uid);
4314 }
4315 
4316 
4317 /* Implement TARGET_ENCODE_SECTION_INFO hook.  */
4318 
4319 static void
4320 arc_encode_section_info (tree decl, rtx rtl, int first)
4321 {
4322   /* For sdata, SYMBOL_FLAG_LOCAL and SYMBOL_FLAG_FUNCTION.
4323      This clears machine specific flags, so has to come first.  */
4324   default_encode_section_info (decl, rtl, first);
4325 
4326   /* Check if it is a function, and whether it has the
4327      [long/medium/short]_call attribute specified.  */
4328   if (TREE_CODE (decl) == FUNCTION_DECL)
4329     {
4330       rtx symbol = XEXP (rtl, 0);
4331       int flags = SYMBOL_REF_FLAGS (symbol);
4332 
4333       tree attr = (TREE_TYPE (decl) != error_mark_node
4334 		   ? TYPE_ATTRIBUTES (TREE_TYPE (decl)) : NULL_TREE);
4335       tree long_call_attr = lookup_attribute ("long_call", attr);
4336       tree medium_call_attr = lookup_attribute ("medium_call", attr);
4337       tree short_call_attr = lookup_attribute ("short_call", attr);
4338 
4339       if (long_call_attr != NULL_TREE)
4340 	flags |= SYMBOL_FLAG_LONG_CALL;
4341       else if (medium_call_attr != NULL_TREE)
4342 	flags |= SYMBOL_FLAG_MEDIUM_CALL;
4343       else if (short_call_attr != NULL_TREE)
4344 	flags |= SYMBOL_FLAG_SHORT_CALL;
4345 
4346       SYMBOL_REF_FLAGS (symbol) = flags;
4347     }
4348   else if (TREE_CODE (decl) == VAR_DECL)
4349     {
4350       rtx symbol = XEXP (rtl, 0);
4351 
4352       tree attr = (TREE_TYPE (decl) != error_mark_node
4353 		   ? DECL_ATTRIBUTES (decl) : NULL_TREE);
4354 
4355       tree sec_attr = lookup_attribute ("section", attr);
4356       if (sec_attr)
4357 	{
4358 	  const char *sec_name
4359 	    = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (sec_attr)));
4360 	  if (strcmp (sec_name, ".cmem") == 0
4361 	      || strcmp (sec_name, ".cmem_shared") == 0
4362 	      || strcmp (sec_name, ".cmem_private") == 0)
4363 	    SYMBOL_REF_FLAGS (symbol) |= SYMBOL_FLAG_CMEM;
4364 	}
4365     }
4366 }
4367 
4368 /* This is how to output a definition of an internal numbered label where
4369    PREFIX is the class of label and NUM is the number within the class.  */
4370 
4371 static void arc_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
4372 {
4373   if (cfun)
4374     arc_ccfsm_at_label (prefix, labelno, &arc_ccfsm_current);
4375   default_internal_label (stream, prefix, labelno);
4376 }
4377 
4378 /* Set the cpu type and print out other fancy things,
4379    at the top of the file.  */
4380 
4381 static void arc_file_start (void)
4382 {
4383   default_file_start ();
4384   fprintf (asm_out_file, "\t.cpu %s\n", arc_cpu_string);
4385 }
4386 
4387 /* Cost functions.  */
4388 
4389 /* Compute a (partial) cost for rtx X.  Return true if the complete
4390    cost has been computed, and false if subexpressions should be
4391    scanned.  In either case, *TOTAL contains the cost result.  */
4392 
4393 static bool
4394 arc_rtx_costs (rtx x, machine_mode mode, int outer_code,
4395 	       int opno ATTRIBUTE_UNUSED, int *total, bool speed)
4396 {
4397   int code = GET_CODE (x);
4398 
4399   switch (code)
4400     {
4401       /* Small integers are as cheap as registers.  */
4402     case CONST_INT:
4403       {
4404 	bool nolimm = false; /* Can we do without long immediate?  */
4405 	bool fast = false; /* Is the result available immediately?  */
4406 	bool condexec = false; /* Does this allow conditiobnal execution?  */
4407 	bool compact = false; /* Is a 16 bit opcode available?  */
4408 	/* CONDEXEC also implies that we can have an unconditional
4409 	   3-address operation.  */
4410 
4411 	nolimm = compact = condexec = false;
4412 	if (UNSIGNED_INT6 (INTVAL (x)))
4413 	  nolimm = condexec = compact = true;
4414 	else
4415 	  {
4416 	    if (SMALL_INT (INTVAL (x)))
4417 	      nolimm = fast = true;
4418 	    switch (outer_code)
4419 	      {
4420 	      case AND: /* bclr, bmsk, ext[bw] */
4421 		if (satisfies_constraint_Ccp (x) /* bclr */
4422 		    || satisfies_constraint_C1p (x) /* bmsk */)
4423 		  nolimm = fast = condexec = compact = true;
4424 		break;
4425 	      case IOR: /* bset */
4426 		if (satisfies_constraint_C0p (x)) /* bset */
4427 		  nolimm = fast = condexec = compact = true;
4428 		break;
4429 	      case XOR:
4430 		if (satisfies_constraint_C0p (x)) /* bxor */
4431 		  nolimm = fast = condexec = true;
4432 		break;
4433 	      case SET:
4434 		if (satisfies_constraint_Crr (x)) /* ror b,u6 */
4435 		  nolimm = true;
4436 	      default:
4437 		break;
4438 	      }
4439 	  }
4440 	/* FIXME: Add target options to attach a small cost if
4441 	   condexec / compact is not true.  */
4442 	if (nolimm)
4443 	  {
4444 	    *total = 0;
4445 	    return true;
4446 	  }
4447       }
4448       /* FALLTHRU */
4449 
4450       /*  4 byte values can be fetched as immediate constants -
4451 	  let's give that the cost of an extra insn.  */
4452     case CONST:
4453     case LABEL_REF:
4454     case SYMBOL_REF:
4455       *total = COSTS_N_INSNS (1);
4456       return true;
4457 
4458     case CONST_DOUBLE:
4459       {
4460 	rtx first, second;
4461 
4462 	if (TARGET_DPFP)
4463 	  {
4464 	    *total = COSTS_N_INSNS (1);
4465 	    return true;
4466 	  }
4467 	split_double (x, &first, &second);
4468 	*total = COSTS_N_INSNS (!SMALL_INT (INTVAL (first))
4469 				+ !SMALL_INT (INTVAL (second)));
4470 	return true;
4471       }
4472 
4473     /* Encourage synth_mult to find a synthetic multiply when reasonable.
4474        If we need more than 12 insns to do a multiply, then go out-of-line,
4475        since the call overhead will be < 10% of the cost of the multiply.  */
4476     case ASHIFT:
4477     case ASHIFTRT:
4478     case LSHIFTRT:
4479       if (TARGET_BARREL_SHIFTER)
4480 	{
4481 	  /* If we want to shift a constant, we need a LIMM.  */
4482 	  /* ??? when the optimizers want to know if a constant should be
4483 	     hoisted, they ask for the cost of the constant.  OUTER_CODE is
4484 	     insufficient context for shifts since we don't know which operand
4485 	     we are looking at.  */
4486 	  if (CONSTANT_P (XEXP (x, 0)))
4487 	    {
4488 	      *total += (COSTS_N_INSNS (2)
4489 			 + rtx_cost (XEXP (x, 1), mode, (enum rtx_code) code,
4490 				     0, speed));
4491 	      return true;
4492 	    }
4493 	  *total = COSTS_N_INSNS (1);
4494 	}
4495       else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4496 	*total = COSTS_N_INSNS (16);
4497       else
4498 	{
4499 	  *total = COSTS_N_INSNS (INTVAL (XEXP ((x), 1)));
4500 	  /* ??? want_to_gcse_p can throw negative shift counts at us,
4501 	     and then panics when it gets a negative cost as result.
4502 	     Seen for gcc.c-torture/compile/20020710-1.c -Os .  */
4503 	  if (*total < 0)
4504 	    *total = 0;
4505 	}
4506       return false;
4507 
4508     case DIV:
4509     case UDIV:
4510       if (speed)
4511 	*total = COSTS_N_INSNS(30);
4512       else
4513 	*total = COSTS_N_INSNS(1);
4514 	return false;
4515 
4516     case MULT:
4517       if ((TARGET_DPFP && GET_MODE (x) == DFmode))
4518 	*total = COSTS_N_INSNS (1);
4519       else if (speed)
4520 	*total= arc_multcost;
4521       /* We do not want synth_mult sequences when optimizing
4522 	 for size.  */
4523       else if (TARGET_MUL64_SET || TARGET_ARC700_MPY)
4524 	*total = COSTS_N_INSNS (1);
4525       else
4526 	*total = COSTS_N_INSNS (2);
4527       return false;
4528     case PLUS:
4529       if (GET_CODE (XEXP (x, 0)) == MULT
4530 	  && _2_4_8_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
4531 	{
4532 	  *total += (rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed)
4533 		     + rtx_cost (XEXP (XEXP (x, 0), 0), mode, PLUS, 1, speed));
4534 	  return true;
4535 	}
4536       return false;
4537     case MINUS:
4538       if (GET_CODE (XEXP (x, 1)) == MULT
4539 	  && _2_4_8_operand (XEXP (XEXP (x, 1), 1), VOIDmode))
4540 	{
4541 	  *total += (rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed)
4542 		     + rtx_cost (XEXP (XEXP (x, 1), 0), mode, PLUS, 1, speed));
4543 	  return true;
4544 	}
4545       return false;
4546     case COMPARE:
4547       {
4548 	rtx op0 = XEXP (x, 0);
4549 	rtx op1 = XEXP (x, 1);
4550 
4551 	if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
4552 	    && XEXP (op0, 1) == const1_rtx)
4553 	  {
4554 	    /* btst / bbit0 / bbit1:
4555 	       Small integers and registers are free; everything else can
4556 	       be put in a register.  */
4557 	    mode = GET_MODE (XEXP (op0, 0));
4558 	    *total = (rtx_cost (XEXP (op0, 0), mode, SET, 1, speed)
4559 		      + rtx_cost (XEXP (op0, 2), mode, SET, 1, speed));
4560 	    return true;
4561 	  }
4562 	if (GET_CODE (op0) == AND && op1 == const0_rtx
4563 	    && satisfies_constraint_C1p (XEXP (op0, 1)))
4564 	  {
4565 	    /* bmsk.f */
4566 	    *total = rtx_cost (XEXP (op0, 0), VOIDmode, SET, 1, speed);
4567 	    return true;
4568 	  }
4569 	/* add.f  */
4570 	if (GET_CODE (op1) == NEG)
4571 	  {
4572 	    /* op0 might be constant, the inside of op1 is rather
4573 	       unlikely to be so.  So swapping the operands might lower
4574 	       the cost.  */
4575 	    mode = GET_MODE (op0);
4576 	    *total = (rtx_cost (op0, mode, PLUS, 1, speed)
4577 		      + rtx_cost (XEXP (op1, 0), mode, PLUS, 0, speed));
4578 	  }
4579 	return false;
4580       }
4581     case EQ: case NE:
4582       if (outer_code == IF_THEN_ELSE
4583 	  && GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
4584 	  && XEXP (x, 1) == const0_rtx
4585 	  && XEXP (XEXP (x, 0), 1) == const1_rtx)
4586 	{
4587 	  /* btst / bbit0 / bbit1:
4588 	     Small integers and registers are free; everything else can
4589 	     be put in a register.  */
4590 	  rtx op0 = XEXP (x, 0);
4591 
4592 	  mode = GET_MODE (XEXP (op0, 0));
4593 	  *total = (rtx_cost (XEXP (op0, 0), mode, SET, 1, speed)
4594 		    + rtx_cost (XEXP (op0, 2), mode, SET, 1, speed));
4595 	  return true;
4596 	}
4597       /* Fall through.  */
4598     /* scc_insn expands into two insns.  */
4599     case GTU: case GEU: case LEU:
4600       if (mode == SImode)
4601 	*total += COSTS_N_INSNS (1);
4602       return false;
4603     case LTU: /* might use adc.  */
4604       if (mode == SImode)
4605 	*total += COSTS_N_INSNS (1) - 1;
4606       return false;
4607     default:
4608       return false;
4609     }
4610 }
4611 
4612 /* Helper used by arc_legitimate_pc_offset_p.  */
4613 
4614 static bool
4615 arc_needs_pcl_p (rtx x)
4616 {
4617   register const char *fmt;
4618   register int i, j;
4619 
4620   if ((GET_CODE (x) == UNSPEC)
4621       && (XVECLEN (x, 0) == 1)
4622       && (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF))
4623     switch (XINT (x, 1))
4624       {
4625       case ARC_UNSPEC_GOT:
4626       case ARC_UNSPEC_GOTOFFPC:
4627       case UNSPEC_TLS_GD:
4628       case UNSPEC_TLS_IE:
4629 	return true;
4630       default:
4631 	break;
4632       }
4633 
4634   fmt = GET_RTX_FORMAT (GET_CODE (x));
4635   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4636     {
4637       if (fmt[i] == 'e')
4638 	{
4639 	  if (arc_needs_pcl_p (XEXP (x, i)))
4640 	    return true;
4641 	}
4642       else if (fmt[i] == 'E')
4643 	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4644 	  if (arc_needs_pcl_p (XVECEXP (x, i, j)))
4645 	    return true;
4646     }
4647 
4648   return false;
4649 }
4650 
4651 /* Return true if ADDR is an address that needs to be expressed as an
4652    explicit sum of pcl + offset.  */
4653 
4654 bool
4655 arc_legitimate_pc_offset_p (rtx addr)
4656 {
4657   if (GET_CODE (addr) != CONST)
4658     return false;
4659 
4660   return arc_needs_pcl_p (addr);
4661 }
4662 
4663 /* Return true if ADDR is a valid pic address.
4664    A valid pic address on arc should look like
4665    const (unspec (SYMBOL_REF/LABEL) (ARC_UNSPEC_GOTOFF/ARC_UNSPEC_GOT))  */
4666 
4667 bool
4668 arc_legitimate_pic_addr_p (rtx addr)
4669 {
4670   if (GET_CODE (addr) == LABEL_REF)
4671     return true;
4672   if (GET_CODE (addr) != CONST)
4673     return false;
4674 
4675   addr = XEXP (addr, 0);
4676 
4677 
4678   if (GET_CODE (addr) == PLUS)
4679     {
4680       if (GET_CODE (XEXP (addr, 1)) != CONST_INT)
4681 	return false;
4682       addr = XEXP (addr, 0);
4683     }
4684 
4685   if (GET_CODE (addr) != UNSPEC
4686       || XVECLEN (addr, 0) != 1)
4687     return false;
4688 
4689   /* Must be one of @GOT, @GOTOFF, @GOTOFFPC, @tlsgd, tlsie.  */
4690   if (XINT (addr, 1) != ARC_UNSPEC_GOT
4691       && XINT (addr, 1) != ARC_UNSPEC_GOTOFF
4692       && XINT (addr, 1) != ARC_UNSPEC_GOTOFFPC
4693       && XINT (addr, 1) != UNSPEC_TLS_GD
4694       && XINT (addr, 1) != UNSPEC_TLS_IE)
4695     return false;
4696 
4697   if (GET_CODE (XVECEXP (addr, 0, 0)) != SYMBOL_REF
4698       && GET_CODE (XVECEXP (addr, 0, 0)) != LABEL_REF)
4699     return false;
4700 
4701   return true;
4702 }
4703 
4704 
4705 
4706 /* Return true if OP contains a symbol reference.  */
4707 
4708 static bool
4709 symbolic_reference_mentioned_p (rtx op)
4710 {
4711   register const char *fmt;
4712   register int i;
4713 
4714   if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4715     return true;
4716 
4717   fmt = GET_RTX_FORMAT (GET_CODE (op));
4718   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4719     {
4720       if (fmt[i] == 'E')
4721 	{
4722 	  register int j;
4723 
4724 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4725 	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4726 	      return true;
4727 	}
4728 
4729       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4730 	return true;
4731     }
4732 
4733   return false;
4734 }
4735 
4736 /* Return true if OP contains a SYMBOL_REF that is not wrapped in an unspec.
4737    If SKIP_LOCAL is true, skip symbols that bind locally.
4738    This is used further down in this file, and, without SKIP_LOCAL,
4739    in the addsi3 / subsi3 expanders when generating PIC code.  */
4740 
4741 bool
4742 arc_raw_symbolic_reference_mentioned_p (rtx op, bool skip_local)
4743 {
4744   register const char *fmt;
4745   register int i;
4746 
4747   if (GET_CODE(op) == UNSPEC)
4748     return false;
4749 
4750   if (GET_CODE (op) == SYMBOL_REF)
4751     {
4752       if (SYMBOL_REF_TLS_MODEL (op))
4753 	return true;
4754       if (!flag_pic)
4755 	return false;
4756       tree decl = SYMBOL_REF_DECL (op);
4757       return !skip_local || !decl || !default_binds_local_p (decl);
4758     }
4759 
4760   fmt = GET_RTX_FORMAT (GET_CODE (op));
4761   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4762     {
4763       if (fmt[i] == 'E')
4764 	{
4765 	  register int j;
4766 
4767 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4768 	    if (arc_raw_symbolic_reference_mentioned_p (XVECEXP (op, i, j),
4769 							skip_local))
4770 	      return true;
4771 	}
4772 
4773       else if (fmt[i] == 'e'
4774 	       && arc_raw_symbolic_reference_mentioned_p (XEXP (op, i),
4775 							  skip_local))
4776 	return true;
4777     }
4778 
4779   return false;
4780 }
4781 
4782 /* Get the thread pointer.  */
4783 
4784 static rtx
4785 arc_get_tp (void)
4786 {
4787    /* If arc_tp_regno has been set, we can use that hard register
4788       directly as a base register.  */
4789   if (arc_tp_regno != -1)
4790     return gen_rtx_REG (Pmode, arc_tp_regno);
4791 
4792   /* Otherwise, call __read_tp.  Copy the result to a pseudo to avoid
4793      conflicts with function arguments / results.  */
4794   rtx reg = gen_reg_rtx (Pmode);
4795   emit_insn (gen_tls_load_tp_soft ());
4796   emit_move_insn (reg, gen_rtx_REG (Pmode, R0_REG));
4797   return reg;
4798 }
4799 
4800 /* Helper to be used by TLS Global dynamic model.  */
4801 
4802 static rtx
4803 arc_emit_call_tls_get_addr (rtx sym, int reloc, rtx eqv)
4804 {
4805   rtx r0 = gen_rtx_REG (Pmode, R0_REG);
4806   rtx call_fusage = NULL_RTX;
4807 
4808   start_sequence ();
4809 
4810   rtx x = arc_unspec_offset (sym, reloc);
4811   emit_move_insn (r0, x);
4812   use_reg (&call_fusage, r0);
4813 
4814   gcc_assert (reloc == UNSPEC_TLS_GD);
4815   rtx call_insn = emit_call_insn (gen_tls_gd_get_addr (sym));
4816   /* Should we set RTL_CONST_CALL_P?  We read memory, but not in a
4817      way that the application should care.  */
4818   RTL_PURE_CALL_P (call_insn) = 1;
4819   add_function_usage_to (call_insn, call_fusage);
4820 
4821   rtx_insn *insns = get_insns ();
4822   end_sequence ();
4823 
4824   rtx dest = gen_reg_rtx (Pmode);
4825   emit_libcall_block (insns, dest, r0, eqv);
4826   return dest;
4827 }
4828 
4829 #define DTPOFF_ZERO_SYM ".tdata"
4830 
4831 /* Return a legitimized address for ADDR,
4832    which is a SYMBOL_REF with tls_model MODEL.  */
4833 
4834 static rtx
4835 arc_legitimize_tls_address (rtx addr, enum tls_model model)
4836 {
4837   if (!flag_pic && model == TLS_MODEL_LOCAL_DYNAMIC)
4838     model = TLS_MODEL_LOCAL_EXEC;
4839 
4840   switch (model)
4841     {
4842     case TLS_MODEL_LOCAL_DYNAMIC:
4843       rtx base;
4844       tree decl;
4845       const char *base_name;
4846       rtvec v;
4847 
4848       decl = SYMBOL_REF_DECL (addr);
4849       base_name = DTPOFF_ZERO_SYM;
4850       if (decl && bss_initializer_p (decl))
4851 	base_name = ".tbss";
4852 
4853       base = gen_rtx_SYMBOL_REF (Pmode, base_name);
4854       if (strcmp (base_name, DTPOFF_ZERO_SYM) == 0)
4855 	{
4856 	  if (!flag_pic)
4857 	    goto local_exec;
4858 	  v = gen_rtvec (1, addr);
4859 	}
4860       else
4861 	v = gen_rtvec (2, addr, base);
4862       addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_TLS_OFF);
4863       addr = gen_rtx_CONST (Pmode, addr);
4864       base = arc_legitimize_tls_address (base, TLS_MODEL_GLOBAL_DYNAMIC);
4865       return gen_rtx_PLUS (Pmode, force_reg (Pmode, base), addr);
4866 
4867     case TLS_MODEL_GLOBAL_DYNAMIC:
4868       return arc_emit_call_tls_get_addr (addr, UNSPEC_TLS_GD, addr);
4869 
4870     case TLS_MODEL_INITIAL_EXEC:
4871       addr = arc_unspec_offset (addr, UNSPEC_TLS_IE);
4872       addr = copy_to_mode_reg (Pmode, gen_const_mem (Pmode, addr));
4873       return gen_rtx_PLUS (Pmode, arc_get_tp (), addr);
4874 
4875     case TLS_MODEL_LOCAL_EXEC:
4876     local_exec:
4877       addr = arc_unspec_offset (addr, UNSPEC_TLS_OFF);
4878       return gen_rtx_PLUS (Pmode, arc_get_tp (), addr);
4879     default:
4880       gcc_unreachable ();
4881     }
4882 }
4883 
4884 /* Legitimize a pic address reference in ORIG.
4885    The return value is the legitimated address.
4886    If OLDX is non-zero, it is the target to assign the address to first.  */
4887 
4888 static rtx
4889 arc_legitimize_pic_address (rtx orig, rtx oldx)
4890 {
4891   rtx addr = orig;
4892   rtx pat = orig;
4893   rtx base;
4894 
4895   if (oldx == orig)
4896     oldx = NULL;
4897 
4898   if (GET_CODE (addr) == LABEL_REF)
4899     ; /* Do nothing.  */
4900   else if (GET_CODE (addr) == SYMBOL_REF)
4901     {
4902       enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
4903       if (model != 0)
4904 	return arc_legitimize_tls_address (addr, model);
4905       else if (!flag_pic)
4906 	return orig;
4907       else if (CONSTANT_POOL_ADDRESS_P (addr) || SYMBOL_REF_LOCAL_P (addr))
4908 	return arc_unspec_offset (addr, ARC_UNSPEC_GOTOFFPC);
4909 
4910       /* This symbol must be referenced via a load from the Global
4911 	 Offset Table (@GOTPC).  */
4912       pat = arc_unspec_offset (addr, ARC_UNSPEC_GOT);
4913       pat = gen_const_mem (Pmode, pat);
4914 
4915       if (oldx == NULL)
4916 	oldx = gen_reg_rtx (Pmode);
4917 
4918       emit_move_insn (oldx, pat);
4919       pat = oldx;
4920     }
4921   else
4922     {
4923       if (GET_CODE (addr) == CONST)
4924 	{
4925 	  addr = XEXP (addr, 0);
4926 	  if (GET_CODE (addr) == UNSPEC)
4927 	    {
4928 	      /* Check that the unspec is one of the ones we generate?  */
4929 	      return orig;
4930 	    }
4931 	  /* fwprop is placing in the REG_EQUIV notes constant pic
4932 	     unspecs expressions.  Then, loop may use these notes for
4933 	     optimizations resulting in complex patterns that are not
4934 	     supported by the current implementation. The following
4935 	     two if-cases are simplifying the complex patters to
4936 	     simpler ones.  */
4937 	  else if (GET_CODE (addr) == MINUS)
4938 	    {
4939 	      rtx op0 = XEXP (addr, 0);
4940 	      rtx op1 = XEXP (addr, 1);
4941 	      gcc_assert (oldx);
4942 	      gcc_assert (GET_CODE (op1) == UNSPEC);
4943 
4944 	      emit_move_insn (oldx,
4945 			      gen_rtx_CONST (SImode,
4946 					     arc_legitimize_pic_address (op1,
4947 									 NULL_RTX)));
4948 	      emit_insn (gen_rtx_SET (oldx, gen_rtx_MINUS (SImode, op0, oldx)));
4949 	      return oldx;
4950 
4951 	    }
4952 	  else if (GET_CODE (addr) != PLUS)
4953 	    {
4954 	      rtx tmp = XEXP (addr, 0);
4955 	      enum rtx_code code = GET_CODE (addr);
4956 
4957 	      /* It only works for UNARY operations.  */
4958 	      gcc_assert (UNARY_P (addr));
4959 	      gcc_assert (GET_CODE (tmp) == UNSPEC);
4960 	      gcc_assert (oldx);
4961 
4962 	      emit_move_insn
4963 		(oldx,
4964 		 gen_rtx_CONST (SImode,
4965 				arc_legitimize_pic_address (tmp,
4966 							    NULL_RTX)));
4967 
4968 	      emit_insn (gen_rtx_SET (oldx,
4969 				      gen_rtx_fmt_ee (code, SImode,
4970 						      oldx, const0_rtx)));
4971 
4972 	      return oldx;
4973 	    }
4974 	  else
4975 	    {
4976 	      gcc_assert (GET_CODE (addr) == PLUS);
4977 	      if (GET_CODE (XEXP (addr, 0)) == UNSPEC)
4978 		return orig;
4979 	    }
4980 	}
4981 
4982       if (GET_CODE (addr) == PLUS)
4983 	{
4984 	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
4985 
4986 	  base = arc_legitimize_pic_address (op0, oldx);
4987 	  pat  = arc_legitimize_pic_address (op1,
4988 					     base == oldx ? NULL_RTX : oldx);
4989 
4990 	  if (base == op0 && pat == op1)
4991 	    return orig;
4992 
4993 	  if (GET_CODE (pat) == CONST_INT)
4994 	    pat = plus_constant (Pmode, base, INTVAL (pat));
4995 	  else
4996 	    {
4997 	      if (GET_CODE (pat) == PLUS && CONSTANT_P (XEXP (pat, 1)))
4998 		{
4999 		  base = gen_rtx_PLUS (Pmode, base, XEXP (pat, 0));
5000 		  pat = XEXP (pat, 1);
5001 		}
5002 	      pat = gen_rtx_PLUS (Pmode, base, pat);
5003 	    }
5004 	}
5005     }
5006 
5007  return pat;
5008 }
5009 
5010 /* Output address constant X to FILE, taking PIC into account.  */
5011 
5012 void
5013 arc_output_pic_addr_const (FILE * file, rtx x, int code)
5014 {
5015   char buf[256];
5016 
5017  restart:
5018   switch (GET_CODE (x))
5019     {
5020     case PC:
5021       if (flag_pic)
5022 	putc ('.', file);
5023       else
5024 	gcc_unreachable ();
5025       break;
5026 
5027     case SYMBOL_REF:
5028       output_addr_const (file, x);
5029 
5030       /* Local functions do not get references through the PLT.  */
5031       if (code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
5032 	fputs ("@plt", file);
5033       break;
5034 
5035     case LABEL_REF:
5036       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (XEXP (x, 0)));
5037       assemble_name (file, buf);
5038       break;
5039 
5040     case CODE_LABEL:
5041       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5042       assemble_name (file, buf);
5043       break;
5044 
5045     case CONST_INT:
5046       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5047       break;
5048 
5049     case CONST:
5050       arc_output_pic_addr_const (file, XEXP (x, 0), code);
5051       break;
5052 
5053     case CONST_DOUBLE:
5054       if (GET_MODE (x) == VOIDmode)
5055 	{
5056 	  /* We can use %d if the number is one word and positive.  */
5057 	  if (CONST_DOUBLE_HIGH (x))
5058 	    fprintf (file, HOST_WIDE_INT_PRINT_DOUBLE_HEX,
5059 		     CONST_DOUBLE_HIGH (x), CONST_DOUBLE_LOW (x));
5060 	  else if  (CONST_DOUBLE_LOW (x) < 0)
5061 	    fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (x));
5062 	  else
5063 	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5064 	}
5065       else
5066 	/* We can't handle floating point constants;
5067 	   PRINT_OPERAND must handle them.  */
5068 	output_operand_lossage ("floating constant misused");
5069       break;
5070 
5071     case PLUS:
5072       /* FIXME: Not needed here.  */
5073       /* Some assemblers need integer constants to appear last (eg masm).  */
5074       if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5075 	{
5076 	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
5077 	  fprintf (file, "+");
5078 	  arc_output_pic_addr_const (file, XEXP (x, 0), code);
5079 	}
5080       else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5081 	{
5082 	  arc_output_pic_addr_const (file, XEXP (x, 0), code);
5083 	  if (INTVAL (XEXP (x, 1)) >= 0)
5084 	    fprintf (file, "+");
5085 	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
5086 	}
5087       else
5088 	gcc_unreachable();
5089       break;
5090 
5091     case MINUS:
5092       /* Avoid outputting things like x-x or x+5-x,
5093 	 since some assemblers can't handle that.  */
5094       x = simplify_subtraction (x);
5095       if (GET_CODE (x) != MINUS)
5096 	goto restart;
5097 
5098       arc_output_pic_addr_const (file, XEXP (x, 0), code);
5099       fprintf (file, "-");
5100       if (GET_CODE (XEXP (x, 1)) == CONST_INT
5101 	  && INTVAL (XEXP (x, 1)) < 0)
5102 	{
5103 	  fprintf (file, "(");
5104 	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
5105 	  fprintf (file, ")");
5106 	}
5107       else
5108 	arc_output_pic_addr_const (file, XEXP (x, 1), code);
5109       break;
5110 
5111     case ZERO_EXTEND:
5112     case SIGN_EXTEND:
5113       arc_output_pic_addr_const (file, XEXP (x, 0), code);
5114       break;
5115 
5116 
5117     case UNSPEC:
5118       const char *suffix;
5119       bool pcrel; pcrel = false;
5120       rtx base; base = NULL;
5121       gcc_assert (XVECLEN (x, 0) >= 1);
5122       switch (XINT (x, 1))
5123 	{
5124 	case ARC_UNSPEC_GOT:
5125 	  suffix = "@gotpc", pcrel = true;
5126 	  break;
5127 	case ARC_UNSPEC_GOTOFF:
5128 	  suffix = "@gotoff";
5129 	  break;
5130 	case ARC_UNSPEC_GOTOFFPC:
5131 	  suffix = "@pcl",   pcrel = true;
5132 	  break;
5133 	case ARC_UNSPEC_PLT:
5134 	  suffix = "@plt";
5135 	  break;
5136 	case UNSPEC_TLS_GD:
5137 	  suffix = "@tlsgd", pcrel = true;
5138 	  break;
5139 	case UNSPEC_TLS_IE:
5140 	  suffix = "@tlsie", pcrel = true;
5141 	  break;
5142 	case UNSPEC_TLS_OFF:
5143 	  if (XVECLEN (x, 0) == 2)
5144 	    base = XVECEXP (x, 0, 1);
5145 	  if (SYMBOL_REF_TLS_MODEL (XVECEXP (x, 0, 0)) == TLS_MODEL_LOCAL_EXEC
5146 	      || (!flag_pic && !base))
5147 	    suffix = "@tpoff";
5148 	  else
5149 	    suffix = "@dtpoff";
5150 	  break;
5151 	default:
5152 	  suffix = "@invalid";
5153 	  output_operand_lossage ("invalid UNSPEC as operand: %d", XINT (x,1));
5154 	  break;
5155 	}
5156       if (pcrel)
5157 	fputs ("pcl,", file);
5158       arc_output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5159       fputs (suffix, file);
5160       if (base)
5161 	arc_output_pic_addr_const (file, base, code);
5162       break;
5163 
5164     default:
5165       output_operand_lossage ("invalid expression as operand");
5166     }
5167 }
5168 
5169 #define SYMBOLIC_CONST(X)	\
5170 (GET_CODE (X) == SYMBOL_REF						\
5171  || GET_CODE (X) == LABEL_REF						\
5172  || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X)))
5173 
5174 /* Emit insns to move operands[1] into operands[0].  */
5175 
5176 static void
5177 prepare_pic_move (rtx *operands, machine_mode)
5178 {
5179   if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1])
5180       && flag_pic)
5181     operands[1] = force_reg (Pmode, operands[1]);
5182   else
5183     {
5184       rtx temp = (reload_in_progress ? operands[0]
5185 		  : flag_pic? gen_reg_rtx (Pmode) : NULL_RTX);
5186       operands[1] = arc_legitimize_pic_address (operands[1], temp);
5187     }
5188 }
5189 
5190 
5191 /* The function returning the number of words, at the beginning of an
5192    argument, must be put in registers.  The returned value must be
5193    zero for arguments that are passed entirely in registers or that
5194    are entirely pushed on the stack.
5195 
5196    On some machines, certain arguments must be passed partially in
5197    registers and partially in memory.  On these machines, typically
5198    the first N words of arguments are passed in registers, and the
5199    rest on the stack.  If a multi-word argument (a `double' or a
5200    structure) crosses that boundary, its first few words must be
5201    passed in registers and the rest must be pushed.  This function
5202    tells the compiler when this occurs, and how many of the words
5203    should go in registers.
5204 
5205    `FUNCTION_ARG' for these arguments should return the first register
5206    to be used by the caller for this argument; likewise
5207    `FUNCTION_INCOMING_ARG', for the called function.
5208 
5209    The function is used to implement macro FUNCTION_ARG_PARTIAL_NREGS.  */
5210 
5211 /* If REGNO is the least arg reg available then what is the total number of arg
5212    regs available.  */
5213 #define GPR_REST_ARG_REGS(REGNO) \
5214   ((REGNO) <= MAX_ARC_PARM_REGS ? MAX_ARC_PARM_REGS - (REGNO) : 0 )
5215 
5216 /* Since arc parm regs are contiguous.  */
5217 #define ARC_NEXT_ARG_REG(REGNO) ( (REGNO) + 1 )
5218 
5219 /* Implement TARGET_ARG_PARTIAL_BYTES.  */
5220 
5221 static int
5222 arc_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
5223 		       tree type, bool named ATTRIBUTE_UNUSED)
5224 {
5225   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5226   int bytes = (mode == BLKmode
5227 	       ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode));
5228   int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5229   int arg_num = *cum;
5230   int ret;
5231 
5232   arg_num = ROUND_ADVANCE_CUM (arg_num, mode, type);
5233   ret = GPR_REST_ARG_REGS (arg_num);
5234 
5235   /* ICEd at function.c:2361, and ret is copied to data->partial */
5236     ret = (ret >= words ? 0 : ret * UNITS_PER_WORD);
5237 
5238   return ret;
5239 }
5240 
5241 /* This function is used to control a function argument is passed in a
5242    register, and which register.
5243 
5244    The arguments are CUM, of type CUMULATIVE_ARGS, which summarizes
5245    (in a way defined by INIT_CUMULATIVE_ARGS and FUNCTION_ARG_ADVANCE)
5246    all of the previous arguments so far passed in registers; MODE, the
5247    machine mode of the argument; TYPE, the data type of the argument
5248    as a tree node or 0 if that is not known (which happens for C
5249    support library functions); and NAMED, which is 1 for an ordinary
5250    argument and 0 for nameless arguments that correspond to `...' in
5251    the called function's prototype.
5252 
5253    The returned value should either be a `reg' RTX for the hard
5254    register in which to pass the argument, or zero to pass the
5255    argument on the stack.
5256 
5257    For machines like the Vax and 68000, where normally all arguments
5258    are pushed, zero suffices as a definition.
5259 
5260    The usual way to make the ANSI library `stdarg.h' work on a machine
5261    where some arguments are usually passed in registers, is to cause
5262    nameless arguments to be passed on the stack instead.  This is done
5263    by making the function return 0 whenever NAMED is 0.
5264 
5265    You may use the macro `MUST_PASS_IN_STACK (MODE, TYPE)' in the
5266    definition of this function to determine if this argument is of a
5267    type that must be passed in the stack.  If `REG_PARM_STACK_SPACE'
5268    is not defined and the function returns non-zero for such an
5269    argument, the compiler will abort.  If `REG_PARM_STACK_SPACE' is
5270    defined, the argument will be computed in the stack and then loaded
5271    into a register.
5272 
5273    The function is used to implement macro FUNCTION_ARG.  */
5274 /* On the ARC the first MAX_ARC_PARM_REGS args are normally in registers
5275    and the rest are pushed.  */
5276 
5277 static rtx
5278 arc_function_arg (cumulative_args_t cum_v,
5279 		  machine_mode mode,
5280 		  const_tree type ATTRIBUTE_UNUSED,
5281 		  bool named ATTRIBUTE_UNUSED)
5282 {
5283   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5284   int arg_num = *cum;
5285   rtx ret;
5286   const char *debstr ATTRIBUTE_UNUSED;
5287 
5288   arg_num = ROUND_ADVANCE_CUM (arg_num, mode, type);
5289   /* Return a marker for use in the call instruction.  */
5290   if (mode == VOIDmode)
5291     {
5292       ret = const0_rtx;
5293       debstr = "<0>";
5294     }
5295   else if (GPR_REST_ARG_REGS (arg_num) > 0)
5296     {
5297       ret = gen_rtx_REG (mode, arg_num);
5298       debstr = reg_names [arg_num];
5299     }
5300   else
5301     {
5302       ret = NULL_RTX;
5303       debstr = "memory";
5304     }
5305   return ret;
5306 }
5307 
5308 /* The function to update the summarizer variable *CUM to advance past
5309    an argument in the argument list.  The values MODE, TYPE and NAMED
5310    describe that argument.  Once this is done, the variable *CUM is
5311    suitable for analyzing the *following* argument with
5312    `FUNCTION_ARG', etc.
5313 
5314    This function need not do anything if the argument in question was
5315    passed on the stack.  The compiler knows how to track the amount of
5316    stack space used for arguments without any special help.
5317 
5318    The function is used to implement macro FUNCTION_ARG_ADVANCE.  */
5319 /* For the ARC: the cum set here is passed on to function_arg where we
5320    look at its value and say which reg to use. Strategy: advance the
5321    regnumber here till we run out of arg regs, then set *cum to last
5322    reg. In function_arg, since *cum > last arg reg we would return 0
5323    and thus the arg will end up on the stack. For straddling args of
5324    course function_arg_partial_nregs will come into play.  */
5325 
5326 static void
5327 arc_function_arg_advance (cumulative_args_t cum_v,
5328 			  machine_mode mode,
5329 			  const_tree type,
5330 			  bool named ATTRIBUTE_UNUSED)
5331 {
5332   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5333   int bytes = (mode == BLKmode
5334 	       ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode));
5335   int words = (bytes + UNITS_PER_WORD  - 1) / UNITS_PER_WORD;
5336   int i;
5337 
5338   if (words)
5339     *cum = ROUND_ADVANCE_CUM (*cum, mode, type);
5340   for (i = 0; i < words; i++)
5341     *cum = ARC_NEXT_ARG_REG (*cum);
5342 
5343 }
5344 
5345 /* Define how to find the value returned by a function.
5346    VALTYPE is the data type of the value (as a tree).
5347    If the precise function being called is known, FN_DECL_OR_TYPE is its
5348    FUNCTION_DECL; otherwise, FN_DECL_OR_TYPE is its type.  */
5349 
5350 static rtx
5351 arc_function_value (const_tree valtype,
5352 		    const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
5353 		    bool outgoing ATTRIBUTE_UNUSED)
5354 {
5355   machine_mode mode = TYPE_MODE (valtype);
5356   int unsignedp ATTRIBUTE_UNUSED;
5357 
5358   unsignedp = TYPE_UNSIGNED (valtype);
5359   if (INTEGRAL_TYPE_P (valtype) || TREE_CODE (valtype) == OFFSET_TYPE)
5360     PROMOTE_MODE (mode, unsignedp, valtype);
5361   return gen_rtx_REG (mode, 0);
5362 }
5363 
5364 /* Returns the return address that is used by builtin_return_address.  */
5365 
5366 rtx
5367 arc_return_addr_rtx (int count, ATTRIBUTE_UNUSED rtx frame)
5368 {
5369   if (count != 0)
5370     return const0_rtx;
5371 
5372   return get_hard_reg_initial_val (Pmode , RETURN_ADDR_REGNUM);
5373 }
5374 
5375 /* Nonzero if the constant value X is a legitimate general operand
5376    when generating PIC code.  It is given that flag_pic is on and
5377    that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
5378 
5379 bool
5380 arc_legitimate_pic_operand_p (rtx x)
5381 {
5382   return !arc_raw_symbolic_reference_mentioned_p (x, true);
5383 }
5384 
5385 /* Determine if a given RTX is a valid constant.  We already know this
5386    satisfies CONSTANT_P.  */
5387 
5388 bool
5389 arc_legitimate_constant_p (machine_mode mode, rtx x)
5390 {
5391   if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
5392     return false;
5393 
5394   if (!flag_pic && mode != Pmode)
5395     return true;
5396 
5397   switch (GET_CODE (x))
5398     {
5399     case CONST:
5400       x = XEXP (x, 0);
5401 
5402       if (GET_CODE (x) == PLUS)
5403 	{
5404 	  if (flag_pic
5405 	      ? GET_CODE (XEXP (x, 1)) != CONST_INT
5406 	      : !arc_legitimate_constant_p (mode, XEXP (x, 1)))
5407 	    return false;
5408 	  x = XEXP (x, 0);
5409 	}
5410 
5411       /* Only some unspecs are valid as "constants".  */
5412       if (GET_CODE (x) == UNSPEC)
5413 	switch (XINT (x, 1))
5414 	  {
5415 	  case ARC_UNSPEC_PLT:
5416 	  case ARC_UNSPEC_GOTOFF:
5417 	  case ARC_UNSPEC_GOTOFFPC:
5418 	  case ARC_UNSPEC_GOT:
5419 	  case UNSPEC_TLS_GD:
5420 	  case UNSPEC_TLS_IE:
5421 	  case UNSPEC_TLS_OFF:
5422 	    return true;
5423 
5424 	  default:
5425 	    gcc_unreachable ();
5426 	  }
5427 
5428       /* We must have drilled down to a symbol.  */
5429       if (arc_raw_symbolic_reference_mentioned_p (x, false))
5430 	return false;
5431 
5432       /* Return true.  */
5433       break;
5434 
5435     case SYMBOL_REF:
5436       if (SYMBOL_REF_TLS_MODEL (x))
5437 	return false;
5438       /* Fall through.  */
5439     case LABEL_REF:
5440       if (flag_pic)
5441 	return false;
5442       /* Fall through.  */
5443 
5444     default:
5445       break;
5446     }
5447 
5448   /* Otherwise we handle everything else in the move patterns.  */
5449   return true;
5450 }
5451 
5452 static bool
5453 arc_legitimate_address_p (machine_mode mode, rtx x, bool strict)
5454 {
5455   if (RTX_OK_FOR_BASE_P (x, strict))
5456      return true;
5457   if (LEGITIMATE_OFFSET_ADDRESS_P (mode, x, TARGET_INDEXED_LOADS, strict))
5458      return true;
5459   if (LEGITIMATE_SCALED_ADDRESS_P (mode, x, strict))
5460     return true;
5461   if (LEGITIMATE_SMALL_DATA_ADDRESS_P (x))
5462      return true;
5463   if (GET_CODE (x) == CONST_INT && LARGE_INT (INTVAL (x)))
5464      return true;
5465 
5466   /* When we compile for size avoid const (@sym + offset)
5467      addresses.  */
5468   if (!flag_pic && optimize_size && !reload_completed
5469       && (GET_CODE (x) == CONST)
5470       && (GET_CODE (XEXP (x, 0)) == PLUS)
5471       && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5472       && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) == 0
5473       && !SYMBOL_REF_FUNCTION_P (XEXP (XEXP (x, 0), 0)))
5474     {
5475       rtx addend = XEXP (XEXP (x, 0), 1);
5476       gcc_assert (CONST_INT_P (addend));
5477       HOST_WIDE_INT offset = INTVAL (addend);
5478 
5479       /* Allow addresses having a large offset to pass.  Anyhow they
5480 	 will end in a limm.  */
5481       return !(offset > -1024 && offset < 1020);
5482     }
5483 
5484   if ((GET_MODE_SIZE (mode) != 16) && CONSTANT_P (x))
5485     {
5486       if (flag_pic ? arc_legitimate_pic_addr_p (x)
5487 	  : arc_legitimate_constant_p (Pmode, x))
5488 	return true;
5489     }
5490   if ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC
5491        || GET_CODE (x) == POST_DEC || GET_CODE (x) == POST_INC)
5492       && RTX_OK_FOR_BASE_P (XEXP (x, 0), strict))
5493     return true;
5494       /* We're restricted here by the `st' insn.  */
5495   if ((GET_CODE (x) == PRE_MODIFY || GET_CODE (x) == POST_MODIFY)
5496       && GET_CODE (XEXP ((x), 1)) == PLUS
5497       && rtx_equal_p (XEXP ((x), 0), XEXP (XEXP (x, 1), 0))
5498       && LEGITIMATE_OFFSET_ADDRESS_P (QImode, XEXP (x, 1),
5499 				      TARGET_AUTO_MODIFY_REG, strict))
5500     return true;
5501   return false;
5502 }
5503 
5504 /* Return true iff ADDR (a legitimate address expression)
5505    has an effect that depends on the machine mode it is used for.  */
5506 
5507 static bool
5508 arc_mode_dependent_address_p (const_rtx addr, addr_space_t)
5509 {
5510   /* SYMBOL_REF is not mode dependent: it is either a small data reference,
5511      which is valid for loads and stores, or a limm offset, which is valid for
5512      loads.  Scaled indices are scaled by the access mode.  */
5513   if (GET_CODE (addr) == PLUS
5514       && GET_CODE (XEXP ((addr), 0)) == MULT)
5515     return true;
5516   return false;
5517 }
5518 
5519 /* Determine if it's legal to put X into the constant pool.  */
5520 
5521 static bool
5522 arc_cannot_force_const_mem (machine_mode mode, rtx x)
5523 {
5524   return !arc_legitimate_constant_p (mode, x);
5525 }
5526 
5527 /* IDs for all the ARC builtins.  */
5528 
5529 enum arc_builtin_id
5530   {
5531 #define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK)	\
5532     ARC_BUILTIN_ ## NAME,
5533 #include "builtins.def"
5534 #undef DEF_BUILTIN
5535 
5536     ARC_BUILTIN_COUNT
5537   };
5538 
5539 struct GTY(()) arc_builtin_description
5540 {
5541   enum insn_code icode;
5542   int n_args;
5543   tree fndecl;
5544 };
5545 
5546 static GTY(()) struct arc_builtin_description
5547 arc_bdesc[ARC_BUILTIN_COUNT] =
5548 {
5549 #define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK)		\
5550   { (enum insn_code) CODE_FOR_ ## ICODE, N_ARGS, NULL_TREE },
5551 #include "builtins.def"
5552 #undef DEF_BUILTIN
5553 };
5554 
5555 /* Transform UP into lowercase and write the result to LO.
5556    You must provide enough space for LO.  Return LO.  */
5557 
5558 static char*
5559 arc_tolower (char *lo, const char *up)
5560 {
5561   char *lo0 = lo;
5562 
5563   for (; *up; up++, lo++)
5564     *lo = TOLOWER (*up);
5565 
5566   *lo = '\0';
5567 
5568   return lo0;
5569 }
5570 
5571 /* Implement `TARGET_BUILTIN_DECL'.  */
5572 
5573 static tree
5574 arc_builtin_decl (unsigned id, bool initialize_p ATTRIBUTE_UNUSED)
5575 {
5576   if (id < ARC_BUILTIN_COUNT)
5577     return arc_bdesc[id].fndecl;
5578 
5579   return error_mark_node;
5580 }
5581 
5582 static void
5583 arc_init_builtins (void)
5584 {
5585   tree V4HI_type_node;
5586   tree V2SI_type_node;
5587   tree V2HI_type_node;
5588 
5589   /* Vector types based on HS SIMD elements.  */
5590   V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
5591   V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
5592   V2HI_type_node = build_vector_type_for_mode (intHI_type_node, V2HImode);
5593 
5594   tree pcvoid_type_node
5595     = build_pointer_type (build_qualified_type (void_type_node,
5596 						TYPE_QUAL_CONST));
5597   tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node,
5598 						    V8HImode);
5599 
5600   tree void_ftype_void
5601     = build_function_type_list (void_type_node, NULL_TREE);
5602   tree int_ftype_int
5603     = build_function_type_list (integer_type_node, integer_type_node,
5604 				NULL_TREE);
5605   tree int_ftype_pcvoid_int
5606     = build_function_type_list (integer_type_node, pcvoid_type_node,
5607 				integer_type_node, NULL_TREE);
5608   tree void_ftype_usint_usint
5609     = build_function_type_list (void_type_node, long_unsigned_type_node,
5610 				long_unsigned_type_node, NULL_TREE);
5611   tree int_ftype_int_int
5612     = build_function_type_list (integer_type_node, integer_type_node,
5613 				integer_type_node, NULL_TREE);
5614   tree usint_ftype_usint
5615     = build_function_type_list  (long_unsigned_type_node,
5616 				 long_unsigned_type_node, NULL_TREE);
5617   tree void_ftype_usint
5618     = build_function_type_list (void_type_node, long_unsigned_type_node,
5619 				NULL_TREE);
5620   tree int_ftype_void
5621     = build_function_type_list (integer_type_node, void_type_node,
5622 				NULL_TREE);
5623   tree void_ftype_int
5624     = build_function_type_list (void_type_node, integer_type_node,
5625 				NULL_TREE);
5626   tree int_ftype_short
5627     = build_function_type_list (integer_type_node, short_integer_type_node,
5628 				NULL_TREE);
5629 
5630   /* Old ARC SIMD types.  */
5631   tree v8hi_ftype_v8hi_v8hi
5632     = build_function_type_list (V8HI_type_node, V8HI_type_node,
5633 				V8HI_type_node, NULL_TREE);
5634   tree v8hi_ftype_v8hi_int
5635     = build_function_type_list (V8HI_type_node, V8HI_type_node,
5636 				integer_type_node, NULL_TREE);
5637   tree v8hi_ftype_v8hi_int_int
5638     = build_function_type_list (V8HI_type_node, V8HI_type_node,
5639 				integer_type_node, integer_type_node,
5640 				NULL_TREE);
5641   tree void_ftype_v8hi_int_int
5642     = build_function_type_list (void_type_node, V8HI_type_node,
5643 				integer_type_node, integer_type_node,
5644 				NULL_TREE);
5645   tree void_ftype_v8hi_int_int_int
5646     = build_function_type_list (void_type_node, V8HI_type_node,
5647 				integer_type_node, integer_type_node,
5648 				integer_type_node, NULL_TREE);
5649   tree v8hi_ftype_int_int
5650     = build_function_type_list (V8HI_type_node, integer_type_node,
5651 				integer_type_node, NULL_TREE);
5652   tree void_ftype_int_int
5653     = build_function_type_list (void_type_node, integer_type_node,
5654 				integer_type_node, NULL_TREE);
5655   tree v8hi_ftype_v8hi
5656     = build_function_type_list (V8HI_type_node, V8HI_type_node,
5657 				NULL_TREE);
5658   /* ARCv2 SIMD types.  */
5659   tree long_ftype_v4hi_v4hi
5660     = build_function_type_list (long_long_integer_type_node,
5661 				V4HI_type_node,	V4HI_type_node, NULL_TREE);
5662   tree int_ftype_v2hi_v2hi
5663     = build_function_type_list (integer_type_node,
5664 				V2HI_type_node, V2HI_type_node, NULL_TREE);
5665   tree v2si_ftype_v2hi_v2hi
5666     = build_function_type_list (V2SI_type_node,
5667 				V2HI_type_node, V2HI_type_node, NULL_TREE);
5668   tree v2hi_ftype_v2hi_v2hi
5669     = build_function_type_list (V2HI_type_node,
5670 				V2HI_type_node, V2HI_type_node, NULL_TREE);
5671   tree v2si_ftype_v2si_v2si
5672     = build_function_type_list (V2SI_type_node,
5673 				V2SI_type_node, V2SI_type_node, NULL_TREE);
5674   tree v4hi_ftype_v4hi_v4hi
5675     = build_function_type_list (V4HI_type_node,
5676 				V4HI_type_node, V4HI_type_node, NULL_TREE);
5677   tree long_ftype_v2si_v2hi
5678     = build_function_type_list (long_long_integer_type_node,
5679 				V2SI_type_node, V2HI_type_node, NULL_TREE);
5680 
5681   /* Add the builtins.  */
5682 #define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK)			\
5683   {									\
5684     int id = ARC_BUILTIN_ ## NAME;					\
5685     const char *Name = "__builtin_arc_" #NAME;				\
5686     char *name = (char*) alloca (1 + strlen (Name));			\
5687 									\
5688     gcc_assert (id < ARC_BUILTIN_COUNT);				\
5689     if (MASK)								\
5690       arc_bdesc[id].fndecl						\
5691 	= add_builtin_function (arc_tolower(name, Name), TYPE, id,	\
5692 				BUILT_IN_MD, NULL, NULL_TREE);		\
5693   }
5694 #include "builtins.def"
5695 #undef DEF_BUILTIN
5696 }
5697 
5698 /* Helper to expand __builtin_arc_aligned (void* val, int
5699   alignval).  */
5700 
5701 static rtx
5702 arc_expand_builtin_aligned (tree exp)
5703 {
5704   tree arg0 = CALL_EXPR_ARG (exp, 0);
5705   tree arg1 = CALL_EXPR_ARG (exp, 1);
5706   fold (arg1);
5707   rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5708   rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5709 
5710   if (!CONST_INT_P (op1))
5711     {
5712       /* If we can't fold the alignment to a constant integer
5713 	 whilst optimizing, this is probably a user error.  */
5714       if (optimize)
5715 	warning (0, "__builtin_arc_aligned with non-constant alignment");
5716     }
5717   else
5718     {
5719       HOST_WIDE_INT alignTest = INTVAL (op1);
5720       /* Check alignTest is positive, and a power of two.  */
5721       if (alignTest <= 0 || alignTest != (alignTest & -alignTest))
5722 	{
5723 	  error ("invalid alignment value for __builtin_arc_aligned");
5724 	  return NULL_RTX;
5725 	}
5726 
5727       if (CONST_INT_P (op0))
5728 	{
5729 	  HOST_WIDE_INT pnt = INTVAL (op0);
5730 
5731 	  if ((pnt & (alignTest - 1)) == 0)
5732 	    return const1_rtx;
5733 	}
5734       else
5735 	{
5736 	  unsigned  align = get_pointer_alignment (arg0);
5737 	  unsigned  numBits = alignTest * BITS_PER_UNIT;
5738 
5739 	  if (align && align >= numBits)
5740 	    return const1_rtx;
5741 	  /* Another attempt to ascertain alignment.  Check the type
5742 	     we are pointing to.  */
5743 	  if (POINTER_TYPE_P (TREE_TYPE (arg0))
5744 	      && TYPE_ALIGN (TREE_TYPE (TREE_TYPE (arg0))) >= numBits)
5745 	    return const1_rtx;
5746 	}
5747     }
5748 
5749   /* Default to false.  */
5750   return const0_rtx;
5751 }
5752 
5753 /* Helper arc_expand_builtin, generates a pattern for the given icode
5754    and arguments.  */
5755 
5756 static rtx_insn *
5757 apply_GEN_FCN (enum insn_code icode, rtx *arg)
5758 {
5759   switch (insn_data[icode].n_generator_args)
5760     {
5761     case 0:
5762       return GEN_FCN (icode) ();
5763     case 1:
5764       return GEN_FCN (icode) (arg[0]);
5765     case 2:
5766       return GEN_FCN (icode) (arg[0], arg[1]);
5767     case 3:
5768       return GEN_FCN (icode) (arg[0], arg[1], arg[2]);
5769     case 4:
5770       return GEN_FCN (icode) (arg[0], arg[1], arg[2], arg[3]);
5771     case 5:
5772       return GEN_FCN (icode) (arg[0], arg[1], arg[2], arg[3], arg[4]);
5773     default:
5774       gcc_unreachable ();
5775     }
5776 }
5777 
5778 /* Expand an expression EXP that calls a built-in function,
5779    with result going to TARGET if that's convenient
5780    (and in mode MODE if that's convenient).
5781    SUBTARGET may be used as the target for computing one of EXP's operands.
5782    IGNORE is nonzero if the value is to be ignored.  */
5783 
5784 static rtx
5785 arc_expand_builtin (tree exp,
5786 		    rtx target,
5787 		    rtx subtarget ATTRIBUTE_UNUSED,
5788 		    machine_mode mode ATTRIBUTE_UNUSED,
5789 		    int ignore ATTRIBUTE_UNUSED)
5790 {
5791   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
5792   unsigned int id = DECL_FUNCTION_CODE (fndecl);
5793   const struct arc_builtin_description *d = &arc_bdesc[id];
5794   int i, j, n_args = call_expr_nargs (exp);
5795   rtx pat = NULL_RTX;
5796   rtx xop[5];
5797   enum insn_code icode = d->icode;
5798   machine_mode tmode = insn_data[icode].operand[0].mode;
5799   int nonvoid;
5800   tree arg0;
5801   tree arg1;
5802   tree arg2;
5803   tree arg3;
5804   rtx op0;
5805   rtx op1;
5806   rtx op2;
5807   rtx op3;
5808   rtx op4;
5809   machine_mode mode0;
5810   machine_mode mode1;
5811   machine_mode mode2;
5812   machine_mode mode3;
5813   machine_mode mode4;
5814 
5815   if (id >= ARC_BUILTIN_COUNT)
5816     internal_error ("bad builtin fcode");
5817 
5818   /* 1st part: Expand special builtins.  */
5819   switch (id)
5820     {
5821     case ARC_BUILTIN_NOP:
5822       emit_insn (gen_nopv ());
5823       return NULL_RTX;
5824 
5825     case ARC_BUILTIN_RTIE:
5826     case ARC_BUILTIN_SYNC:
5827     case ARC_BUILTIN_BRK:
5828     case ARC_BUILTIN_SWI:
5829     case ARC_BUILTIN_UNIMP_S:
5830       gcc_assert (icode != 0);
5831       emit_insn (GEN_FCN (icode) (const1_rtx));
5832       return NULL_RTX;
5833 
5834     case ARC_BUILTIN_ALIGNED:
5835       return arc_expand_builtin_aligned (exp);
5836 
5837     case ARC_BUILTIN_CLRI:
5838       target = gen_reg_rtx (SImode);
5839       emit_insn (gen_clri (target, const1_rtx));
5840       return target;
5841 
5842     case ARC_BUILTIN_TRAP_S:
5843     case ARC_BUILTIN_SLEEP:
5844       arg0 = CALL_EXPR_ARG (exp, 0);
5845       fold (arg0);
5846       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5847 
5848       if  (!CONST_INT_P (op0) || !satisfies_constraint_L (op0))
5849 	{
5850 	  error ("builtin operand should be an unsigned 6-bit value");
5851 	  return NULL_RTX;
5852 	}
5853       gcc_assert (icode != 0);
5854       emit_insn (GEN_FCN (icode) (op0));
5855       return NULL_RTX;
5856 
5857     case ARC_BUILTIN_VDORUN:
5858     case ARC_BUILTIN_VDIRUN:
5859       arg0 = CALL_EXPR_ARG (exp, 0);
5860       arg1 = CALL_EXPR_ARG (exp, 1);
5861       op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
5862       op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
5863 
5864       target = gen_rtx_REG (SImode, (id == ARC_BUILTIN_VDIRUN) ? 131 : 139);
5865 
5866       mode0 =  insn_data[icode].operand[1].mode;
5867       mode1 =  insn_data[icode].operand[2].mode;
5868 
5869       if (!insn_data[icode].operand[1].predicate (op0, mode0))
5870 	op0 = copy_to_mode_reg (mode0, op0);
5871 
5872       if (!insn_data[icode].operand[2].predicate (op1, mode1))
5873 	op1 = copy_to_mode_reg (mode1, op1);
5874 
5875       pat = GEN_FCN (icode) (target, op0, op1);
5876       if (!pat)
5877 	return NULL_RTX;
5878 
5879       emit_insn (pat);
5880       return NULL_RTX;
5881 
5882     case ARC_BUILTIN_VDIWR:
5883     case ARC_BUILTIN_VDOWR:
5884       arg0 = CALL_EXPR_ARG (exp, 0);
5885       arg1 = CALL_EXPR_ARG (exp, 1);
5886       op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
5887       op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
5888 
5889       if (!CONST_INT_P (op0)
5890 	  || !(UNSIGNED_INT3 (INTVAL (op0))))
5891 	error ("operand 1 should be an unsigned 3-bit immediate");
5892 
5893       mode1 =  insn_data[icode].operand[1].mode;
5894 
5895       if (icode == CODE_FOR_vdiwr_insn)
5896 	target = gen_rtx_REG (SImode,
5897 			      ARC_FIRST_SIMD_DMA_CONFIG_IN_REG + INTVAL (op0));
5898       else if (icode == CODE_FOR_vdowr_insn)
5899 	target = gen_rtx_REG (SImode,
5900 			      ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG + INTVAL (op0));
5901       else
5902 	gcc_unreachable ();
5903 
5904       if (!insn_data[icode].operand[2].predicate (op1, mode1))
5905 	op1 = copy_to_mode_reg (mode1, op1);
5906 
5907       pat = GEN_FCN (icode) (target, op1);
5908       if (!pat)
5909 	return NULL_RTX;
5910 
5911       emit_insn (pat);
5912       return NULL_RTX;
5913 
5914     case ARC_BUILTIN_VASRW:
5915     case ARC_BUILTIN_VSR8:
5916     case ARC_BUILTIN_VSR8AW:
5917       arg0 = CALL_EXPR_ARG (exp, 0);
5918       arg1 = CALL_EXPR_ARG (exp, 1);
5919       op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
5920       op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
5921       op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
5922 
5923       target = gen_reg_rtx (V8HImode);
5924       mode0 =  insn_data[icode].operand[1].mode;
5925       mode1 =  insn_data[icode].operand[2].mode;
5926 
5927       if (!insn_data[icode].operand[1].predicate (op0, mode0))
5928 	op0 = copy_to_mode_reg (mode0, op0);
5929 
5930       if ((!insn_data[icode].operand[2].predicate (op1, mode1))
5931 	  || !(UNSIGNED_INT3 (INTVAL (op1))))
5932 	error ("operand 2 should be an unsigned 3-bit value (I0-I7)");
5933 
5934       pat = GEN_FCN (icode) (target, op0, op1, op2);
5935       if (!pat)
5936 	return NULL_RTX;
5937 
5938       emit_insn (pat);
5939       return target;
5940 
5941     case ARC_BUILTIN_VLD32WH:
5942     case ARC_BUILTIN_VLD32WL:
5943     case ARC_BUILTIN_VLD64:
5944     case ARC_BUILTIN_VLD32:
5945       rtx src_vreg;
5946       icode = d->icode;
5947       arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg.  */
5948       arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7.  */
5949       arg2 = CALL_EXPR_ARG (exp, 2); /* u8.  */
5950 
5951       src_vreg = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
5952       op0 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
5953       op1 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL);
5954       op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
5955 
5956       /* target <- src vreg.  */
5957       emit_insn (gen_move_insn (target, src_vreg));
5958 
5959       /* target <- vec_concat: target, mem (Ib, u8).  */
5960       mode0 =  insn_data[icode].operand[3].mode;
5961       mode1 =  insn_data[icode].operand[1].mode;
5962 
5963       if ((!insn_data[icode].operand[3].predicate (op0, mode0))
5964 	  || !(UNSIGNED_INT3 (INTVAL (op0))))
5965 	error ("operand 1 should be an unsigned 3-bit value (I0-I7)");
5966 
5967       if ((!insn_data[icode].operand[1].predicate (op1, mode1))
5968 	  || !(UNSIGNED_INT8 (INTVAL (op1))))
5969 	error ("operand 2 should be an unsigned 8-bit value");
5970 
5971       pat = GEN_FCN (icode) (target, op1, op2, op0);
5972       if (!pat)
5973 	return NULL_RTX;
5974 
5975       emit_insn (pat);
5976       return target;
5977 
5978     case ARC_BUILTIN_VLD64W:
5979     case ARC_BUILTIN_VLD128:
5980       arg0 = CALL_EXPR_ARG (exp, 0); /* dest vreg.  */
5981       arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7.  */
5982 
5983       op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
5984       op1 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
5985       op2 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
5986 
5987       /* target <- src vreg.  */
5988       target = gen_reg_rtx (V8HImode);
5989 
5990       /* target <- vec_concat: target, mem (Ib, u8).  */
5991       mode0 =  insn_data[icode].operand[1].mode;
5992       mode1 =  insn_data[icode].operand[2].mode;
5993       mode2 =  insn_data[icode].operand[3].mode;
5994 
5995       if ((!insn_data[icode].operand[2].predicate (op1, mode1))
5996 	  || !(UNSIGNED_INT3 (INTVAL (op1))))
5997 	error ("operand 1 should be an unsigned 3-bit value (I0-I7)");
5998 
5999       if ((!insn_data[icode].operand[3].predicate (op2, mode2))
6000 	  || !(UNSIGNED_INT8 (INTVAL (op2))))
6001 	error ("operand 2 should be an unsigned 8-bit value");
6002 
6003       pat = GEN_FCN (icode) (target, op0, op1, op2);
6004 
6005       if (!pat)
6006 	return NULL_RTX;
6007 
6008       emit_insn (pat);
6009       return target;
6010 
6011     case ARC_BUILTIN_VST128:
6012     case ARC_BUILTIN_VST64:
6013       arg0 = CALL_EXPR_ARG (exp, 0); /* src vreg.  */
6014       arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7.  */
6015       arg2 = CALL_EXPR_ARG (exp, 2); /* u8.  */
6016 
6017       op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
6018       op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
6019       op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL);
6020       op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
6021 
6022       mode0 = insn_data[icode].operand[0].mode;
6023       mode1 = insn_data[icode].operand[1].mode;
6024       mode2 = insn_data[icode].operand[2].mode;
6025       mode3 = insn_data[icode].operand[3].mode;
6026 
6027       if ((!insn_data[icode].operand[1].predicate (op1, mode1))
6028 	  || !(UNSIGNED_INT3 (INTVAL (op1))))
6029 	error ("operand 2 should be an unsigned 3-bit value (I0-I7)");
6030 
6031       if ((!insn_data[icode].operand[2].predicate (op2, mode2))
6032 	  || !(UNSIGNED_INT8 (INTVAL (op2))))
6033 	error ("operand 3 should be an unsigned 8-bit value");
6034 
6035       if (!insn_data[icode].operand[3].predicate (op3, mode3))
6036 	op3 = copy_to_mode_reg (mode3, op3);
6037 
6038       pat = GEN_FCN (icode) (op0, op1, op2, op3);
6039       if (!pat)
6040 	return NULL_RTX;
6041 
6042       emit_insn (pat);
6043       return NULL_RTX;
6044 
6045     case ARC_BUILTIN_VST16_N:
6046     case ARC_BUILTIN_VST32_N:
6047       arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg.  */
6048       arg1 = CALL_EXPR_ARG (exp, 1); /* u3.  */
6049       arg2 = CALL_EXPR_ARG (exp, 2); /* [I]0-7.  */
6050       arg3 = CALL_EXPR_ARG (exp, 3); /* u8.  */
6051 
6052       op0 = expand_expr (arg3, NULL_RTX, SImode, EXPAND_NORMAL);
6053       op1 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
6054       op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL);
6055       op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
6056       op4 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
6057 
6058       mode0 = insn_data[icode].operand[0].mode;
6059       mode2 = insn_data[icode].operand[2].mode;
6060       mode3 = insn_data[icode].operand[3].mode;
6061       mode4 = insn_data[icode].operand[4].mode;
6062 
6063       /* Do some correctness checks for the operands.  */
6064       if ((!insn_data[icode].operand[0].predicate (op0, mode0))
6065 	  || !(UNSIGNED_INT8 (INTVAL (op0))))
6066 	error ("operand 4 should be an unsigned 8-bit value (0-255)");
6067 
6068       if ((!insn_data[icode].operand[2].predicate (op2, mode2))
6069 	  || !(UNSIGNED_INT3 (INTVAL (op2))))
6070 	error ("operand 3 should be an unsigned 3-bit value (I0-I7)");
6071 
6072       if (!insn_data[icode].operand[3].predicate (op3, mode3))
6073 	op3 = copy_to_mode_reg (mode3, op3);
6074 
6075       if ((!insn_data[icode].operand[4].predicate (op4, mode4))
6076 	   || !(UNSIGNED_INT3 (INTVAL (op4))))
6077 	error ("operand 2 should be an unsigned 3-bit value (subreg 0-7)");
6078       else if (icode == CODE_FOR_vst32_n_insn
6079 	       && ((INTVAL (op4) % 2) != 0))
6080 	error ("operand 2 should be an even 3-bit value (subreg 0,2,4,6)");
6081 
6082       pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
6083       if (!pat)
6084 	return NULL_RTX;
6085 
6086       emit_insn (pat);
6087       return NULL_RTX;
6088 
6089     default:
6090       break;
6091     }
6092 
6093   /* 2nd part: Expand regular builtins.  */
6094   if (icode == 0)
6095     internal_error ("bad builtin fcode");
6096 
6097   nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
6098   j = 0;
6099 
6100   if (nonvoid)
6101     {
6102       if (target == NULL_RTX
6103 	  || GET_MODE (target) != tmode
6104 	  || !insn_data[icode].operand[0].predicate (target, tmode))
6105 	{
6106 	  target = gen_reg_rtx (tmode);
6107 	}
6108       xop[j++] = target;
6109     }
6110 
6111   gcc_assert (n_args <= 4);
6112   for (i = 0; i < n_args; i++, j++)
6113     {
6114       tree arg = CALL_EXPR_ARG (exp, i);
6115       machine_mode mode = insn_data[icode].operand[j].mode;
6116       rtx op = expand_expr (arg, NULL_RTX, mode, EXPAND_NORMAL);
6117       machine_mode opmode = GET_MODE (op);
6118       char c = insn_data[icode].operand[j].constraint[0];
6119 
6120       /* SIMD extension requires exact immediate operand match.  */
6121       if ((id > ARC_BUILTIN_SIMD_BEGIN)
6122 	  && (id < ARC_BUILTIN_SIMD_END)
6123 	  && (c != 'v')
6124 	  && (c != 'r'))
6125 	{
6126 	  if (!CONST_INT_P (op))
6127 	    error ("builtin requires an immediate for operand %d", j);
6128 	  switch (c)
6129 	    {
6130 	    case 'L':
6131 	      if (!satisfies_constraint_L (op))
6132 		error ("operand %d should be a 6 bit unsigned immediate", j);
6133 	      break;
6134 	    case 'P':
6135 	      if (!satisfies_constraint_P (op))
6136 		error ("operand %d should be a 8 bit unsigned immediate", j);
6137 	      break;
6138 	    case 'K':
6139 	      if (!satisfies_constraint_K (op))
6140 		error ("operand %d should be a 3 bit unsigned immediate", j);
6141 	      break;
6142 	    default:
6143 	      error ("unknown builtin immediate operand type for operand %d",
6144 		     j);
6145 	    }
6146 	}
6147 
6148       if (CONST_INT_P (op))
6149 	opmode = mode;
6150 
6151       if ((opmode == SImode) && (mode == HImode))
6152 	{
6153 	  opmode = HImode;
6154 	  op = gen_lowpart (HImode, op);
6155 	}
6156 
6157       /* In case the insn wants input operands in modes different from
6158 	 the result, abort.  */
6159       gcc_assert (opmode == mode || opmode == VOIDmode);
6160 
6161       if (!insn_data[icode].operand[i + nonvoid].predicate (op, mode))
6162 	op = copy_to_mode_reg (mode, op);
6163 
6164       xop[j] = op;
6165     }
6166 
6167   pat = apply_GEN_FCN (icode, xop);
6168   if (pat == NULL_RTX)
6169     return NULL_RTX;
6170 
6171   emit_insn (pat);
6172 
6173   if (nonvoid)
6174     return target;
6175   else
6176     return const0_rtx;
6177 }
6178 
6179 /* Returns true if the operands[opno] is a valid compile-time constant to be
6180    used as register number in the code for builtins.  Else it flags an error
6181    and returns false.  */
6182 
6183 bool
6184 check_if_valid_regno_const (rtx *operands, int opno)
6185 {
6186 
6187   switch (GET_CODE (operands[opno]))
6188     {
6189     case SYMBOL_REF :
6190     case CONST :
6191     case CONST_INT :
6192       return true;
6193     default:
6194 	error ("register number must be a compile-time constant. Try giving higher optimization levels");
6195 	break;
6196     }
6197   return false;
6198 }
6199 
6200 /* Check that after all the constant folding, whether the operand to
6201    __builtin_arc_sleep is an unsigned int of 6 bits.  If not, flag an error.  */
6202 
6203 bool
6204 check_if_valid_sleep_operand (rtx *operands, int opno)
6205 {
6206   switch (GET_CODE (operands[opno]))
6207     {
6208     case CONST :
6209     case CONST_INT :
6210 	if( UNSIGNED_INT6 (INTVAL (operands[opno])))
6211 	    return true;
6212     /* FALLTHRU */
6213     default:
6214 	fatal_error (input_location,
6215 		     "operand for sleep instruction must be an unsigned 6 bit compile-time constant");
6216 	break;
6217     }
6218   return false;
6219 }
6220 
6221 /* Return true if it is ok to make a tail-call to DECL.  */
6222 
6223 static bool
6224 arc_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
6225 			     tree exp ATTRIBUTE_UNUSED)
6226 {
6227   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
6228   if (ARC_INTERRUPT_P (arc_compute_function_type (cfun)))
6229     return false;
6230 
6231   /* Everything else is ok.  */
6232   return true;
6233 }
6234 
6235 /* Output code to add DELTA to the first argument, and then jump
6236    to FUNCTION.  Used for C++ multiple inheritance.  */
6237 
6238 static void
6239 arc_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
6240 		     HOST_WIDE_INT delta,
6241 		     HOST_WIDE_INT vcall_offset,
6242 		     tree function)
6243 {
6244   int mi_delta = delta;
6245   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
6246   int shift = 0;
6247   int this_regno
6248     = aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function) ? 1 : 0;
6249   rtx fnaddr;
6250 
6251   if (mi_delta < 0)
6252     mi_delta = - mi_delta;
6253 
6254   /* Add DELTA.  When possible use a plain add, otherwise load it into
6255      a register first.  */
6256 
6257   while (mi_delta != 0)
6258     {
6259       if ((mi_delta & (3 << shift)) == 0)
6260 	shift += 2;
6261       else
6262 	{
6263 	  asm_fprintf (file, "\t%s\t%s, %s, %d\n",
6264 		       mi_op, reg_names[this_regno], reg_names[this_regno],
6265 		       mi_delta & (0xff << shift));
6266 	  mi_delta &= ~(0xff << shift);
6267 	  shift += 8;
6268 	}
6269     }
6270 
6271   /* If needed, add *(*THIS + VCALL_OFFSET) to THIS.  */
6272   if (vcall_offset != 0)
6273     {
6274       /* ld  r12,[this]           --> temp = *this
6275 	 add r12,r12,vcall_offset --> temp = *(*this + vcall_offset)
6276 	 ld r12,[r12]
6277 	 add this,this,r12        --> this+ = *(*this + vcall_offset) */
6278       asm_fprintf (file, "\tld\t%s, [%s]\n",
6279 		   ARC_TEMP_SCRATCH_REG, reg_names[this_regno]);
6280       asm_fprintf (file, "\tadd\t%s, %s, " HOST_WIDE_INT_PRINT_DEC "\n",
6281 		   ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG, vcall_offset);
6282       asm_fprintf (file, "\tld\t%s, [%s]\n",
6283 		   ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG);
6284       asm_fprintf (file, "\tadd\t%s, %s, %s\n", reg_names[this_regno],
6285 		   reg_names[this_regno], ARC_TEMP_SCRATCH_REG);
6286     }
6287 
6288   fnaddr = XEXP (DECL_RTL (function), 0);
6289 
6290   if (arc_is_longcall_p (fnaddr))
6291     fputs ("\tj\t", file);
6292   else
6293     fputs ("\tb\t", file);
6294   assemble_name (file, XSTR (fnaddr, 0));
6295   fputc ('\n', file);
6296 }
6297 
6298 /* Return true if a 32 bit "long_call" should be generated for
6299    this calling SYM_REF.  We generate a long_call if the function:
6300 
6301         a.  has an __attribute__((long call))
6302      or b.  the -mlong-calls command line switch has been specified
6303 
6304    However we do not generate a long call if the function has an
6305    __attribute__ ((short_call)) or __attribute__ ((medium_call))
6306 
6307    This function will be called by C fragments contained in the machine
6308    description file.  */
6309 
6310 bool
6311 arc_is_longcall_p (rtx sym_ref)
6312 {
6313   if (GET_CODE (sym_ref) != SYMBOL_REF)
6314     return false;
6315 
6316   return (SYMBOL_REF_LONG_CALL_P (sym_ref)
6317 	  || (TARGET_LONG_CALLS_SET
6318 	      && !SYMBOL_REF_SHORT_CALL_P (sym_ref)
6319 	      && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref)));
6320 
6321 }
6322 
6323 /* Likewise for short calls.  */
6324 
6325 bool
6326 arc_is_shortcall_p (rtx sym_ref)
6327 {
6328   if (GET_CODE (sym_ref) != SYMBOL_REF)
6329     return false;
6330 
6331   return (SYMBOL_REF_SHORT_CALL_P (sym_ref)
6332 	  || (!TARGET_LONG_CALLS_SET && !TARGET_MEDIUM_CALLS
6333 	      && !SYMBOL_REF_LONG_CALL_P (sym_ref)
6334 	      && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref)));
6335 
6336 }
6337 
6338 /* Worker function for TARGET_RETURN_IN_MEMORY.  */
6339 
6340 static bool
6341 arc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6342 {
6343   if (AGGREGATE_TYPE_P (type) || TREE_ADDRESSABLE (type))
6344     return true;
6345   else
6346     {
6347       HOST_WIDE_INT size = int_size_in_bytes (type);
6348       return (size == -1 || size > (TARGET_V2 ? 16 : 8));
6349     }
6350 }
6351 
6352 
6353 /* This was in rtlanal.c, and can go in there when we decide we want
6354    to submit the change for inclusion in the GCC tree.  */
6355 /* Like note_stores, but allow the callback to have side effects on the rtl
6356    (like the note_stores of yore):
6357    Call FUN on each register or MEM that is stored into or clobbered by X.
6358    (X would be the pattern of an insn).  DATA is an arbitrary pointer,
6359    ignored by note_stores, but passed to FUN.
6360    FUN may alter parts of the RTL.
6361 
6362    FUN receives three arguments:
6363    1. the REG, MEM, CC0 or PC being stored in or clobbered,
6364    2. the SET or CLOBBER rtx that does the store,
6365    3. the pointer DATA provided to note_stores.
6366 
6367   If the item being stored in or clobbered is a SUBREG of a hard register,
6368   the SUBREG will be passed.  */
6369 
6370 /* For now.  */ static
6371 void
6372 walk_stores (rtx x, void (*fun) (rtx, rtx, void *), void *data)
6373 {
6374   int i;
6375 
6376   if (GET_CODE (x) == COND_EXEC)
6377     x = COND_EXEC_CODE (x);
6378 
6379   if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
6380     {
6381       rtx dest = SET_DEST (x);
6382 
6383       while ((GET_CODE (dest) == SUBREG
6384 	      && (!REG_P (SUBREG_REG (dest))
6385 		  || REGNO (SUBREG_REG (dest)) >= FIRST_PSEUDO_REGISTER))
6386 	     || GET_CODE (dest) == ZERO_EXTRACT
6387 	     || GET_CODE (dest) == STRICT_LOW_PART)
6388 	dest = XEXP (dest, 0);
6389 
6390       /* If we have a PARALLEL, SET_DEST is a list of EXPR_LIST expressions,
6391 	 each of whose first operand is a register.  */
6392       if (GET_CODE (dest) == PARALLEL)
6393 	{
6394 	  for (i = XVECLEN (dest, 0) - 1; i >= 0; i--)
6395 	    if (XEXP (XVECEXP (dest, 0, i), 0) != 0)
6396 	      (*fun) (XEXP (XVECEXP (dest, 0, i), 0), x, data);
6397 	}
6398       else
6399 	(*fun) (dest, x, data);
6400     }
6401 
6402   else if (GET_CODE (x) == PARALLEL)
6403     for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
6404       walk_stores (XVECEXP (x, 0, i), fun, data);
6405 }
6406 
6407 static bool
6408 arc_pass_by_reference (cumulative_args_t ca_v ATTRIBUTE_UNUSED,
6409 		       machine_mode mode ATTRIBUTE_UNUSED,
6410 		       const_tree type,
6411 		       bool named ATTRIBUTE_UNUSED)
6412 {
6413   return (type != 0
6414 	  && (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
6415 	      || TREE_ADDRESSABLE (type)));
6416 }
6417 
6418 /* Implement TARGET_CAN_USE_DOLOOP_P.  */
6419 
6420 static bool
6421 arc_can_use_doloop_p (const widest_int &iterations, const widest_int &,
6422 		      unsigned int loop_depth, bool entered_at_top)
6423 {
6424   if (loop_depth > 1)
6425     return false;
6426   /* Setting up the loop with two sr instructions costs 6 cycles.  */
6427   if (TARGET_ARC700
6428       && !entered_at_top
6429       && wi::gtu_p (iterations, 0)
6430       && wi::leu_p (iterations, flag_pic ? 6 : 3))
6431     return false;
6432   return true;
6433 }
6434 
6435 /* NULL if INSN insn is valid within a low-overhead loop.
6436    Otherwise return why doloop cannot be applied.  */
6437 
6438 static const char *
6439 arc_invalid_within_doloop (const rtx_insn *insn)
6440 {
6441   if (CALL_P (insn))
6442     return "Function call in the loop.";
6443   return NULL;
6444 }
6445 
6446 /* Return true if a load instruction (CONSUMER) uses the same address as a
6447    store instruction (PRODUCER).  This function is used to avoid st/ld
6448    address hazard in ARC700 cores.  */
6449 bool
6450 arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer)
6451 {
6452   rtx in_set, out_set;
6453   rtx out_addr, in_addr;
6454 
6455   if (!producer)
6456     return false;
6457 
6458   if (!consumer)
6459     return false;
6460 
6461   /* Peel the producer and the consumer for the address.  */
6462   out_set = single_set (producer);
6463   if (out_set)
6464     {
6465       out_addr = SET_DEST (out_set);
6466       if (!out_addr)
6467 	return false;
6468       if (GET_CODE (out_addr) == ZERO_EXTEND
6469 	  || GET_CODE (out_addr) == SIGN_EXTEND)
6470 	out_addr = XEXP (out_addr, 0);
6471 
6472       if (!MEM_P (out_addr))
6473 	return false;
6474 
6475       in_set = single_set (consumer);
6476       if (in_set)
6477 	{
6478 	  in_addr = SET_SRC (in_set);
6479 	  if (!in_addr)
6480 	    return false;
6481 	  if (GET_CODE (in_addr) == ZERO_EXTEND
6482 	      || GET_CODE (in_addr) == SIGN_EXTEND)
6483 	    in_addr = XEXP (in_addr, 0);
6484 
6485 	  if (!MEM_P (in_addr))
6486 	    return false;
6487 	  /* Get rid of the MEM and check if the addresses are
6488 	     equivalent.  */
6489 	  in_addr = XEXP (in_addr, 0);
6490 	  out_addr = XEXP (out_addr, 0);
6491 
6492 	  return exp_equiv_p (in_addr, out_addr, 0, true);
6493 	}
6494     }
6495   return false;
6496 }
6497 
6498 /* The same functionality as arc_hazard.  It is called in machine
6499    reorg before any other optimization.  Hence, the NOP size is taken
6500    into account when doing branch shortening.  */
6501 
6502 static void
6503 workaround_arc_anomaly (void)
6504 {
6505   rtx_insn *insn, *succ0;
6506 
6507   /* For any architecture: call arc_hazard here.  */
6508   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6509     {
6510       succ0 = next_real_insn (insn);
6511       if (arc_hazard (insn, succ0))
6512 	{
6513 	  emit_insn_before (gen_nopv (), succ0);
6514 	}
6515     }
6516 
6517   if (TARGET_ARC700)
6518     {
6519       rtx_insn *succ1;
6520 
6521       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6522 	{
6523 	  succ0 = next_real_insn (insn);
6524 	  if (arc_store_addr_hazard_p (insn, succ0))
6525 	    {
6526 	      emit_insn_after (gen_nopv (), insn);
6527 	      emit_insn_after (gen_nopv (), insn);
6528 	      continue;
6529 	    }
6530 
6531 	  /* Avoid adding nops if the instruction between the ST and LD is
6532 	     a call or jump.  */
6533 	  succ1 = next_real_insn (succ0);
6534 	  if (succ0 && !JUMP_P (succ0) && !CALL_P (succ0)
6535 	      && arc_store_addr_hazard_p (insn, succ1))
6536 	    emit_insn_after (gen_nopv (), insn);
6537 	}
6538     }
6539 }
6540 
6541 static int arc_reorg_in_progress = 0;
6542 
6543 /* ARC's machince specific reorg function.  */
6544 
6545 static void
6546 arc_reorg (void)
6547 {
6548   rtx_insn *insn;
6549   rtx pattern;
6550   rtx pc_target;
6551   long offset;
6552   int changed;
6553 
6554   workaround_arc_anomaly ();
6555 
6556   cfun->machine->arc_reorg_started = 1;
6557   arc_reorg_in_progress = 1;
6558 
6559   /* Link up loop ends with their loop start.  */
6560   {
6561     for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6562       if (GET_CODE (insn) == JUMP_INSN
6563 	  && recog_memoized (insn) == CODE_FOR_doloop_end_i)
6564 	{
6565 	  rtx_insn *top_label
6566 	    = as_a <rtx_insn *> (XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 1), 0));
6567 	  rtx num = GEN_INT (CODE_LABEL_NUMBER (top_label));
6568 	  rtx_insn *lp, *prev = prev_nonnote_insn (top_label);
6569 	  rtx_insn *lp_simple = NULL;
6570 	  rtx_insn *next = NULL;
6571 	  rtx op0 = XEXP (XVECEXP (PATTERN (insn), 0, 1), 0);
6572 	  int seen_label = 0;
6573 
6574 	  for (lp = prev;
6575 	       (lp && NONJUMP_INSN_P (lp)
6576 		&& recog_memoized (lp) != CODE_FOR_doloop_begin_i);
6577 	       lp = prev_nonnote_insn (lp))
6578 	    ;
6579 	  if (!lp || !NONJUMP_INSN_P (lp)
6580 	      || dead_or_set_regno_p (lp, LP_COUNT))
6581 	    {
6582 	      HOST_WIDE_INT loop_end_id
6583 		= INTVAL (XEXP (XVECEXP (PATTERN (insn), 0, 4), 0));
6584 
6585 	      for (prev = next = insn, lp = NULL ; prev || next;)
6586 		{
6587 		  if (prev)
6588 		    {
6589 		      if (NONJUMP_INSN_P (prev)
6590 			  && recog_memoized (prev) == CODE_FOR_doloop_begin_i
6591 			  && (INTVAL (XEXP (XVECEXP (PATTERN (prev), 0, 5), 0))
6592 			      == loop_end_id))
6593 			{
6594 			  lp = prev;
6595 			  break;
6596 			}
6597 		      else if (LABEL_P (prev))
6598 			seen_label = 1;
6599 		      prev = prev_nonnote_insn (prev);
6600 		    }
6601 		  if (next)
6602 		    {
6603 		      if (NONJUMP_INSN_P (next)
6604 			  && recog_memoized (next) == CODE_FOR_doloop_begin_i
6605 			  && (INTVAL (XEXP (XVECEXP (PATTERN (next), 0, 5), 0))
6606 			      == loop_end_id))
6607 			{
6608 			  lp = next;
6609 			  break;
6610 			}
6611 		      next = next_nonnote_insn (next);
6612 		    }
6613 		}
6614 	      prev = NULL;
6615 	    }
6616 	  else
6617 	    lp_simple = lp;
6618 	  if (lp && !dead_or_set_regno_p (lp, LP_COUNT))
6619 	    {
6620 	      rtx begin_cnt = XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0);
6621 	      if (INTVAL (XEXP (XVECEXP (PATTERN (lp), 0, 4), 0)))
6622 		/* The loop end insn has been duplicated.  That can happen
6623 		   when there is a conditional block at the very end of
6624 		   the loop.  */
6625 		goto failure;
6626 	      /* If Register allocation failed to allocate to the right
6627 		 register, There is no point into teaching reload to
6628 		 fix this up with reloads, as that would cost more
6629 		 than using an ordinary core register with the
6630 		 doloop_fallback pattern.  */
6631 	      if ((true_regnum (op0) != LP_COUNT || !REG_P (begin_cnt))
6632 	      /* Likewise, if the loop setup is evidently inside the loop,
6633 		 we loose.  */
6634 		  || (!lp_simple && lp != next && !seen_label))
6635 		{
6636 		  remove_insn (lp);
6637 		  goto failure;
6638 		}
6639 	      /* It is common that the optimizers copy the loop count from
6640 		 another register, and doloop_begin_i is stuck with the
6641 		 source of the move.  Making doloop_begin_i only accept "l"
6642 		 is nonsentical, as this then makes reload evict the pseudo
6643 		 used for the loop end.  The underlying cause is that the
6644 		 optimizers don't understand that the register allocation for
6645 		 doloop_begin_i should be treated as part of the loop.
6646 		 Try to work around this problem by verifying the previous
6647 		 move exists.  */
6648 	      if (true_regnum (begin_cnt) != LP_COUNT)
6649 		{
6650 		  rtx_insn *mov;
6651 		  rtx set, note;
6652 
6653 		  for (mov = prev_nonnote_insn (lp); mov;
6654 		       mov = prev_nonnote_insn (mov))
6655 		    {
6656 		      if (!NONJUMP_INSN_P (mov))
6657 			mov = 0;
6658 		      else if ((set = single_set (mov))
6659 			  && rtx_equal_p (SET_SRC (set), begin_cnt)
6660 			  && rtx_equal_p (SET_DEST (set), op0))
6661 			break;
6662 		    }
6663 		  if (mov)
6664 		    {
6665 		      XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0) = op0;
6666 		      note = find_regno_note (lp, REG_DEAD, REGNO (begin_cnt));
6667 		      if (note)
6668 			remove_note (lp, note);
6669 		    }
6670 		  else
6671 		    {
6672 		      remove_insn (lp);
6673 		      goto failure;
6674 		    }
6675 		}
6676 	      XEXP (XVECEXP (PATTERN (insn), 0, 4), 0) = num;
6677 	      XEXP (XVECEXP (PATTERN (lp), 0, 4), 0) = num;
6678 	      if (next == lp)
6679 		XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const2_rtx;
6680 	      else if (!lp_simple)
6681 		XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const1_rtx;
6682 	      else if (prev != lp)
6683 		{
6684 		  remove_insn (lp);
6685 		  add_insn_after (lp, prev, NULL);
6686 		}
6687 	      if (!lp_simple)
6688 		{
6689 		  XEXP (XVECEXP (PATTERN (lp), 0, 7), 0)
6690 		    = gen_rtx_LABEL_REF (Pmode, top_label);
6691 		  add_reg_note (lp, REG_LABEL_OPERAND, top_label);
6692 		  LABEL_NUSES (top_label)++;
6693 		}
6694 	      /* We can avoid tedious loop start / end setting for empty loops
6695 		 be merely setting the loop count to its final value.  */
6696 	      if (next_active_insn (top_label) == insn)
6697 		{
6698 		  rtx lc_set
6699 		    = gen_rtx_SET (XEXP (XVECEXP (PATTERN (lp), 0, 3), 0),
6700 				   const0_rtx);
6701 
6702 		  rtx_insn *lc_set_insn = emit_insn_before (lc_set, insn);
6703 		  delete_insn (lp);
6704 		  delete_insn (insn);
6705 		  insn = lc_set_insn;
6706 		}
6707 	      /* If the loop is non-empty with zero length, we can't make it
6708 		 a zero-overhead loop.  That can happen for empty asms.  */
6709 	      else
6710 		{
6711 		  rtx_insn *scan;
6712 
6713 		  for (scan = top_label;
6714 		       (scan && scan != insn
6715 			&& (!NONJUMP_INSN_P (scan) || !get_attr_length (scan)));
6716 		       scan = NEXT_INSN (scan));
6717 		  if (scan == insn)
6718 		    {
6719 		      remove_insn (lp);
6720 		      goto failure;
6721 		    }
6722 		}
6723 	    }
6724 	  else
6725 	    {
6726 	      /* Sometimes the loop optimizer makes a complete hash of the
6727 		 loop.  If it were only that the loop is not entered at the
6728 		 top, we could fix this up by setting LP_START with SR .
6729 		 However, if we can't find the loop begin were it should be,
6730 		 chances are that it does not even dominate the loop, but is
6731 		 inside the loop instead.  Using SR there would kill
6732 		 performance.
6733 		 We use the doloop_fallback pattern here, which executes
6734 		 in two cycles on the ARC700 when predicted correctly.  */
6735 	    failure:
6736 	      if (!REG_P (op0))
6737 		{
6738 		  rtx op3 = XEXP (XVECEXP (PATTERN (insn), 0, 5), 0);
6739 
6740 		  emit_insn_before (gen_move_insn (op3, op0), insn);
6741 		  PATTERN (insn)
6742 		    = gen_doloop_fallback_m (op3, JUMP_LABEL (insn), op0);
6743 		}
6744 	      else
6745 		XVEC (PATTERN (insn), 0)
6746 		  = gen_rtvec (2, XVECEXP (PATTERN (insn), 0, 0),
6747 			       XVECEXP (PATTERN (insn), 0, 1));
6748 	      INSN_CODE (insn) = -1;
6749 	    }
6750 	}
6751     }
6752 
6753 /* FIXME: should anticipate ccfsm action, generate special patterns for
6754    to-be-deleted branches that have no delay slot and have at least the
6755    length of the size increase forced on other insns that are conditionalized.
6756    This can also have an insn_list inside that enumerates insns which are
6757    not actually conditionalized because the destinations are dead in the
6758    not-execute case.
6759    Could also tag branches that we want to be unaligned if they get no delay
6760    slot, or even ones that we don't want to do delay slot sheduling for
6761    because we can unalign them.
6762 
6763    However, there are cases when conditional execution is only possible after
6764    delay slot scheduling:
6765 
6766    - If a delay slot is filled with a nocond/set insn from above, the previous
6767      basic block can become elegible for conditional execution.
6768    - If a delay slot is filled with a nocond insn from the fall-through path,
6769      the branch with that delay slot can become eligble for conditional
6770      execution (however, with the same sort of data flow analysis that dbr
6771      does, we could have figured out before that we don't need to
6772      conditionalize this insn.)
6773      - If a delay slot insn is filled with an insn from the target, the
6774        target label gets its uses decremented (even deleted if falling to zero),
6775    thus possibly creating more condexec opportunities there.
6776    Therefore, we should still be prepared to apply condexec optimization on
6777    non-prepared branches if the size increase of conditionalized insns is no
6778    more than the size saved from eliminating the branch.  An invocation option
6779    could also be used to reserve a bit of extra size for condbranches so that
6780    this'll work more often (could also test in arc_reorg if the block is
6781    'close enough' to be eligible for condexec to make this likely, and
6782    estimate required size increase).  */
6783   /* Generate BRcc insns, by combining cmp and Bcc insns wherever possible.  */
6784   if (TARGET_NO_BRCC_SET)
6785     return;
6786 
6787   do
6788     {
6789       init_insn_lengths();
6790       changed = 0;
6791 
6792       if (optimize > 1 && !TARGET_NO_COND_EXEC)
6793 	{
6794 	  arc_ifcvt ();
6795 	  unsigned int flags = pass_data_arc_ifcvt.todo_flags_finish;
6796 	  df_finish_pass ((flags & TODO_df_verify) != 0);
6797 	}
6798 
6799       /* Call shorten_branches to calculate the insn lengths.  */
6800       shorten_branches (get_insns());
6801       cfun->machine->ccfsm_current_insn = NULL_RTX;
6802 
6803       if (!INSN_ADDRESSES_SET_P())
6804 	  fatal_error (input_location, "Insn addresses not set after shorten_branches");
6805 
6806       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6807 	{
6808 	  rtx label;
6809 	  enum attr_type insn_type;
6810 
6811 	  /* If a non-jump insn (or a casesi jump table), continue.  */
6812 	  if (GET_CODE (insn) != JUMP_INSN ||
6813 	      GET_CODE (PATTERN (insn)) == ADDR_VEC
6814 	      || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
6815 	    continue;
6816 
6817 	  /* If we already have a brcc, note if it is suitable for brcc_s.
6818 	     Be a bit generous with the brcc_s range so that we can take
6819 	     advantage of any code shortening from delay slot scheduling.  */
6820 	  if (recog_memoized (insn) == CODE_FOR_cbranchsi4_scratch)
6821 	    {
6822 	      rtx pat = PATTERN (insn);
6823 	      rtx op = XEXP (SET_SRC (XVECEXP (pat, 0, 0)), 0);
6824 	      rtx *ccp = &XEXP (XVECEXP (pat, 0, 1), 0);
6825 
6826 	      offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
6827 	      if ((offset >= -140 && offset < 140)
6828 		  && rtx_equal_p (XEXP (op, 1), const0_rtx)
6829 		  && compact_register_operand (XEXP (op, 0), VOIDmode)
6830 		  && equality_comparison_operator (op, VOIDmode))
6831 		PUT_MODE (*ccp, CC_Zmode);
6832 	      else if (GET_MODE (*ccp) == CC_Zmode)
6833 		PUT_MODE (*ccp, CC_ZNmode);
6834 	      continue;
6835 	    }
6836 	  if ((insn_type =  get_attr_type (insn)) == TYPE_BRCC
6837 	      || insn_type == TYPE_BRCC_NO_DELAY_SLOT)
6838 	    continue;
6839 
6840 	  /* OK. so we have a jump insn.  */
6841 	  /* We need to check that it is a bcc.  */
6842 	  /* Bcc => set (pc) (if_then_else ) */
6843 	  pattern = PATTERN (insn);
6844 	  if (GET_CODE (pattern) != SET
6845 	      || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
6846 	      || ANY_RETURN_P (XEXP (SET_SRC (pattern), 1)))
6847 	    continue;
6848 
6849 	  /* Now check if the jump is beyond the s9 range.  */
6850 	  if (CROSSING_JUMP_P (insn))
6851 	    continue;
6852 	  offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
6853 
6854 	  if(offset > 253 || offset < -254)
6855 	    continue;
6856 
6857 	  pc_target = SET_SRC (pattern);
6858 
6859 	  /* Avoid FPU instructions.  */
6860 	  if ((GET_MODE (XEXP (XEXP (pc_target, 0), 0)) == CC_FPUmode)
6861 	      || (GET_MODE (XEXP (XEXP (pc_target, 0), 0)) == CC_FPU_UNEQmode))
6862 	    continue;
6863 
6864 	  /* Now go back and search for the set cc insn.  */
6865 
6866 	  label = XEXP (pc_target, 1);
6867 
6868 	    {
6869 	      rtx pat;
6870 	      rtx_insn *scan, *link_insn = NULL;
6871 
6872 	      for (scan = PREV_INSN (insn);
6873 		   scan && GET_CODE (scan) != CODE_LABEL;
6874 		   scan = PREV_INSN (scan))
6875 		{
6876 		  if (! INSN_P (scan))
6877 		    continue;
6878 		  pat = PATTERN (scan);
6879 		  if (GET_CODE (pat) == SET
6880 		      && cc_register (SET_DEST (pat), VOIDmode))
6881 		    {
6882 		      link_insn = scan;
6883 		      break;
6884 		    }
6885 		}
6886 	      if (!link_insn)
6887 		continue;
6888 	      else
6889 		/* Check if this is a data dependency.  */
6890 		{
6891 		  rtx op, cc_clob_rtx, op0, op1, brcc_insn, note;
6892 		  rtx cmp0, cmp1;
6893 
6894 		  /* Ok this is the set cc. copy args here.  */
6895 		  op = XEXP (pc_target, 0);
6896 
6897 		  op0 = cmp0 = XEXP (SET_SRC (pat), 0);
6898 		  op1 = cmp1 = XEXP (SET_SRC (pat), 1);
6899 		  if (GET_CODE (op0) == ZERO_EXTRACT
6900 		      && XEXP (op0, 1) == const1_rtx
6901 		      && (GET_CODE (op) == EQ
6902 			  || GET_CODE (op) == NE))
6903 		    {
6904 		      /* btst / b{eq,ne} -> bbit{0,1} */
6905 		      op0 = XEXP (cmp0, 0);
6906 		      op1 = XEXP (cmp0, 2);
6907 		    }
6908 		  else if (!register_operand (op0, VOIDmode)
6909 			  || !general_operand (op1, VOIDmode))
6910 		    continue;
6911 		  /* Be careful not to break what cmpsfpx_raw is
6912 		     trying to create for checking equality of
6913 		     single-precision floats.  */
6914 		  else if (TARGET_SPFP
6915 			   && GET_MODE (op0) == SFmode
6916 			   && GET_MODE (op1) == SFmode)
6917 		    continue;
6918 
6919 		  /* None of the two cmp operands should be set between the
6920 		     cmp and the branch.  */
6921 		  if (reg_set_between_p (op0, link_insn, insn))
6922 		    continue;
6923 
6924 		  if (reg_set_between_p (op1, link_insn, insn))
6925 		    continue;
6926 
6927 		  /* Since the MODE check does not work, check that this is
6928 		     CC reg's last set location before insn, and also no
6929 		     instruction between the cmp and branch uses the
6930 		     condition codes.  */
6931 		  if ((reg_set_between_p (SET_DEST (pat), link_insn, insn))
6932 		      || (reg_used_between_p (SET_DEST (pat), link_insn, insn)))
6933 		    continue;
6934 
6935 		  /* CC reg should be dead after insn.  */
6936 		  if (!find_regno_note (insn, REG_DEAD, CC_REG))
6937 		    continue;
6938 
6939 		  op = gen_rtx_fmt_ee (GET_CODE (op),
6940 				       GET_MODE (op), cmp0, cmp1);
6941 		  /* If we create a LIMM where there was none before,
6942 		     we only benefit if we can avoid a scheduling bubble
6943 		     for the ARC600.  Otherwise, we'd only forgo chances
6944 		     at short insn generation, and risk out-of-range
6945 		     branches.  */
6946 		  if (!brcc_nolimm_operator (op, VOIDmode)
6947 		      && !long_immediate_operand (op1, VOIDmode)
6948 		      && (TARGET_ARC700
6949 			  || next_active_insn (link_insn) != insn))
6950 		    continue;
6951 
6952 		  /* Emit bbit / brcc (or brcc_s if possible).
6953 		     CC_Zmode indicates that brcc_s is possible.  */
6954 
6955 		  if (op0 != cmp0)
6956 		    cc_clob_rtx = gen_rtx_REG (CC_ZNmode, CC_REG);
6957 		  else if ((offset >= -140 && offset < 140)
6958 			   && rtx_equal_p (op1, const0_rtx)
6959 			   && compact_register_operand (op0, VOIDmode)
6960 			   && (GET_CODE (op) == EQ
6961 			       || GET_CODE (op) == NE))
6962 		    cc_clob_rtx = gen_rtx_REG (CC_Zmode, CC_REG);
6963 		  else
6964 		    cc_clob_rtx = gen_rtx_REG (CCmode, CC_REG);
6965 
6966 		  brcc_insn
6967 		    = gen_rtx_IF_THEN_ELSE (VOIDmode, op, label, pc_rtx);
6968 		  brcc_insn = gen_rtx_SET (pc_rtx, brcc_insn);
6969 		  cc_clob_rtx = gen_rtx_CLOBBER (VOIDmode, cc_clob_rtx);
6970 		  brcc_insn
6971 		    = gen_rtx_PARALLEL
6972 			(VOIDmode, gen_rtvec (2, brcc_insn, cc_clob_rtx));
6973 		  brcc_insn = emit_jump_insn_before (brcc_insn, insn);
6974 
6975 		  JUMP_LABEL (brcc_insn) = JUMP_LABEL (insn);
6976 		  note = find_reg_note (insn, REG_BR_PROB, 0);
6977 		  if (note)
6978 		    {
6979 		      XEXP (note, 1) = REG_NOTES (brcc_insn);
6980 		      REG_NOTES (brcc_insn) = note;
6981 		    }
6982 		  note = find_reg_note (link_insn, REG_DEAD, op0);
6983 		  if (note)
6984 		    {
6985 		      remove_note (link_insn, note);
6986 		      XEXP (note, 1) = REG_NOTES (brcc_insn);
6987 		      REG_NOTES (brcc_insn) = note;
6988 		    }
6989 		  note = find_reg_note (link_insn, REG_DEAD, op1);
6990 		  if (note)
6991 		    {
6992 		      XEXP (note, 1) = REG_NOTES (brcc_insn);
6993 		      REG_NOTES (brcc_insn) = note;
6994 		    }
6995 
6996 		  changed = 1;
6997 
6998 		  /* Delete the bcc insn.  */
6999 		  set_insn_deleted (insn);
7000 
7001 		  /* Delete the cmp insn.  */
7002 		  set_insn_deleted (link_insn);
7003 
7004 		}
7005 	    }
7006 	}
7007       /* Clear out insn_addresses.  */
7008       INSN_ADDRESSES_FREE ();
7009 
7010     } while (changed);
7011 
7012   if (INSN_ADDRESSES_SET_P())
7013     fatal_error (input_location, "insn addresses not freed");
7014 
7015   arc_reorg_in_progress = 0;
7016 }
7017 
7018  /* Check if the operands are valid for BRcc.d generation
7019     Valid Brcc.d patterns are
7020         Brcc.d b, c, s9
7021         Brcc.d b, u6, s9
7022 
7023         For cc={GT, LE, GTU, LEU}, u6=63 can not be allowed,
7024       since they are encoded by the assembler as {GE, LT, HS, LS} 64, which
7025       does not have a delay slot
7026 
7027   Assumed precondition: Second operand is either a register or a u6 value.  */
7028 
7029 bool
7030 valid_brcc_with_delay_p (rtx *operands)
7031 {
7032   if (optimize_size && GET_MODE (operands[4]) == CC_Zmode)
7033     return false;
7034   return brcc_nolimm_operator (operands[0], VOIDmode);
7035 }
7036 
7037 /* ??? Hack.  This should no really be here.  See PR32143.  */
7038 static bool
7039 arc_decl_anon_ns_mem_p (const_tree decl)
7040 {
7041   while (1)
7042     {
7043       if (decl == NULL_TREE || decl == error_mark_node)
7044 	return false;
7045       if (TREE_CODE (decl) == NAMESPACE_DECL
7046 	  && DECL_NAME (decl) == NULL_TREE)
7047 	return true;
7048       /* Classes and namespaces inside anonymous namespaces have
7049 	 TREE_PUBLIC == 0, so we can shortcut the search.  */
7050       else if (TYPE_P (decl))
7051 	return (TREE_PUBLIC (TYPE_NAME (decl)) == 0);
7052       else if (TREE_CODE (decl) == NAMESPACE_DECL)
7053 	return (TREE_PUBLIC (decl) == 0);
7054       else
7055 	decl = DECL_CONTEXT (decl);
7056     }
7057 }
7058 
7059 /* Implement TARGET_IN_SMALL_DATA_P.  Return true if it would be safe to
7060    access DECL using %gp_rel(...)($gp).  */
7061 
7062 static bool
7063 arc_in_small_data_p (const_tree decl)
7064 {
7065   HOST_WIDE_INT size;
7066 
7067   if (TREE_CODE (decl) == STRING_CST || TREE_CODE (decl) == FUNCTION_DECL)
7068     return false;
7069 
7070 
7071   /* We don't yet generate small-data references for -mabicalls.  See related
7072      -G handling in override_options.  */
7073   if (TARGET_NO_SDATA_SET)
7074     return false;
7075 
7076   if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl) != 0)
7077     {
7078       const char *name;
7079 
7080       /* Reject anything that isn't in a known small-data section.  */
7081       name = DECL_SECTION_NAME (decl);
7082       if (strcmp (name, ".sdata") != 0 && strcmp (name, ".sbss") != 0)
7083 	return false;
7084 
7085       /* If a symbol is defined externally, the assembler will use the
7086 	 usual -G rules when deciding how to implement macros.  */
7087       if (!DECL_EXTERNAL (decl))
7088 	  return true;
7089     }
7090   /* Only global variables go into sdata section for now.  */
7091   else if (1)
7092     {
7093       /* Don't put constants into the small data section: we want them
7094 	 to be in ROM rather than RAM.  */
7095       if (TREE_CODE (decl) != VAR_DECL)
7096 	return false;
7097 
7098       if (TREE_READONLY (decl)
7099 	  && !TREE_SIDE_EFFECTS (decl)
7100 	  && (!DECL_INITIAL (decl) || TREE_CONSTANT (DECL_INITIAL (decl))))
7101 	return false;
7102 
7103       /* TREE_PUBLIC might change after the first call, because of the patch
7104 	 for PR19238.  */
7105       if (default_binds_local_p_1 (decl, 1)
7106 	  || arc_decl_anon_ns_mem_p (decl))
7107 	return false;
7108 
7109       /* To ensure -mvolatile-cache works
7110 	 ld.di does not have a gp-relative variant.  */
7111       if (TREE_THIS_VOLATILE (decl))
7112 	return false;
7113     }
7114 
7115   /* Disable sdata references to weak variables.  */
7116   if (DECL_WEAK (decl))
7117     return false;
7118 
7119   size = int_size_in_bytes (TREE_TYPE (decl));
7120 
7121 /*   if (AGGREGATE_TYPE_P (TREE_TYPE (decl))) */
7122 /*     return false; */
7123 
7124   /* Allow only <=4B long data types into sdata.  */
7125   return (size > 0 && size <= 4);
7126 }
7127 
7128 /* Return true if X is a small data address that can be rewritten
7129    as a gp+symref.  */
7130 
7131 static bool
7132 arc_rewrite_small_data_p (const_rtx x)
7133 {
7134   if (GET_CODE (x) == CONST)
7135     x = XEXP (x, 0);
7136 
7137   if (GET_CODE (x) == PLUS)
7138     {
7139       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7140 	x = XEXP (x, 0);
7141     }
7142 
7143   if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_SMALL_P (x))
7144     {
7145       gcc_assert (SYMBOL_REF_TLS_MODEL (x) == 0);
7146       return true;
7147     }
7148   return false;
7149 }
7150 
7151 /* If possible, rewrite OP so that it refers to small data using
7152    explicit relocations.  */
7153 
7154 rtx
7155 arc_rewrite_small_data (rtx op)
7156 {
7157   op = copy_insn (op);
7158   subrtx_ptr_iterator::array_type array;
7159   FOR_EACH_SUBRTX_PTR (iter, array, &op, ALL)
7160     {
7161       rtx *loc = *iter;
7162       if (arc_rewrite_small_data_p (*loc))
7163 	{
7164 	  gcc_assert (SDATA_BASE_REGNUM == PIC_OFFSET_TABLE_REGNUM);
7165 	  *loc = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, *loc);
7166 	  if (loc != &op)
7167 	    {
7168 	      if (GET_CODE (op) == MEM && &XEXP (op, 0) == loc)
7169 		; /* OK.  */
7170 	      else if (GET_CODE (op) == MEM
7171 		       && GET_CODE (XEXP (op, 0)) == PLUS
7172 		       && GET_CODE (XEXP (XEXP (op, 0), 0)) == MULT)
7173 		*loc = force_reg (Pmode, *loc);
7174 	      else
7175 		gcc_unreachable ();
7176 	    }
7177 	  iter.skip_subrtxes ();
7178 	}
7179       else if (GET_CODE (*loc) == PLUS
7180 	       && rtx_equal_p (XEXP (*loc, 0), pic_offset_table_rtx))
7181 	iter.skip_subrtxes ();
7182     }
7183   return op;
7184 }
7185 
7186 /* Return true if OP refers to small data symbols directly, not through
7187    a PLUS.  */
7188 
7189 bool
7190 small_data_pattern (rtx op, machine_mode)
7191 {
7192   if (GET_CODE (op) == SEQUENCE)
7193     return false;
7194   subrtx_iterator::array_type array;
7195   FOR_EACH_SUBRTX (iter, array, op, ALL)
7196     {
7197       const_rtx x = *iter;
7198       if (GET_CODE (x) == PLUS
7199 	  && rtx_equal_p (XEXP (x, 0), pic_offset_table_rtx))
7200 	iter.skip_subrtxes ();
7201       else if (arc_rewrite_small_data_p (x))
7202 	return true;
7203     }
7204   return false;
7205 }
7206 
7207 /* Return true if OP is an acceptable memory operand for ARCompact
7208    16-bit gp-relative load instructions.
7209    op shd look like : [r26, symref@sda]
7210    i.e. (mem (plus (reg 26) (symref with smalldata flag set))
7211   */
7212 /* volatile cache option still to be handled.  */
7213 
7214 bool
7215 compact_sda_memory_operand (rtx op, machine_mode mode)
7216 {
7217   rtx addr;
7218   int size;
7219 
7220   /* Eliminate non-memory operations.  */
7221   if (GET_CODE (op) != MEM)
7222     return false;
7223 
7224   if (mode == VOIDmode)
7225     mode = GET_MODE (op);
7226 
7227   size = GET_MODE_SIZE (mode);
7228 
7229   /* dword operations really put out 2 instructions, so eliminate them.  */
7230   if (size > UNITS_PER_WORD)
7231     return false;
7232 
7233   /* Decode the address now.  */
7234   addr = XEXP (op, 0);
7235 
7236   return LEGITIMATE_SMALL_DATA_ADDRESS_P  (addr);
7237 }
7238 
7239 /* Implement ASM_OUTPUT_ALIGNED_DECL_LOCAL.  */
7240 
7241 void
7242 arc_asm_output_aligned_decl_local (FILE * stream, tree decl, const char * name,
7243 				   unsigned HOST_WIDE_INT size,
7244 				   unsigned HOST_WIDE_INT align,
7245 				   unsigned HOST_WIDE_INT globalize_p)
7246 {
7247   int in_small_data =   arc_in_small_data_p (decl);
7248 
7249   if (in_small_data)
7250     switch_to_section (get_named_section (NULL, ".sbss", 0));
7251   /*    named_section (0,".sbss",0); */
7252   else
7253     switch_to_section (bss_section);
7254 
7255   if (globalize_p)
7256     (*targetm.asm_out.globalize_label) (stream, name);
7257 
7258   ASM_OUTPUT_ALIGN (stream, floor_log2 ((align) / BITS_PER_UNIT));
7259   ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
7260   ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
7261   ASM_OUTPUT_LABEL (stream, name);
7262 
7263   if (size != 0)
7264     ASM_OUTPUT_SKIP (stream, size);
7265 }
7266 
7267 static bool
7268 arc_preserve_reload_p (rtx in)
7269 {
7270   return (GET_CODE (in) == PLUS
7271 	  && RTX_OK_FOR_BASE_P (XEXP (in, 0), true)
7272 	  && CONST_INT_P (XEXP (in, 1))
7273 	  && !((INTVAL (XEXP (in, 1)) & 511)));
7274 }
7275 
7276 int
7277 arc_register_move_cost (machine_mode,
7278 			enum reg_class from_class, enum reg_class to_class)
7279 {
7280   /* The ARC600 has no bypass for extension registers, hence a nop might be
7281      needed to be inserted after a write so that reads are safe.  */
7282   if (TARGET_ARC600)
7283     {
7284       if (to_class == MPY_WRITABLE_CORE_REGS)
7285 	return 3;
7286      /* Instructions modifying LP_COUNT need 4 additional cycles before
7287 	the register will actually contain the value.  */
7288       else if (to_class == LPCOUNT_REG)
7289 	return 6;
7290       else if (to_class == WRITABLE_CORE_REGS)
7291 	return 6;
7292     }
7293 
7294   /* The ARC700 stalls for 3 cycles when *reading* from lp_count.  */
7295   if (TARGET_ARC700
7296       && (from_class == LPCOUNT_REG || from_class == ALL_CORE_REGS
7297 	  || from_class == WRITABLE_CORE_REGS))
7298     return 8;
7299 
7300   /* Force an attempt to 'mov Dy,Dx' to spill.  */
7301   if ((TARGET_ARC700 || TARGET_EM) && TARGET_DPFP
7302       && from_class == DOUBLE_REGS && to_class == DOUBLE_REGS)
7303     return 100;
7304 
7305   return 2;
7306 }
7307 
7308 /* Emit code for an addsi3 instruction with OPERANDS.
7309    COND_P indicates if this will use conditional execution.
7310    Return the length of the instruction.
7311    If OUTPUT_P is false, don't actually output the instruction, just return
7312    its length.  */
7313 int
7314 arc_output_addsi (rtx *operands, bool cond_p, bool output_p)
7315 {
7316   char format[35];
7317 
7318   int match = operands_match_p (operands[0], operands[1]);
7319   int match2 = operands_match_p (operands[0], operands[2]);
7320   int intval = (REG_P (operands[2]) ? 1
7321 		: CONST_INT_P (operands[2]) ? INTVAL (operands[2]) : 0xbadc057);
7322   int neg_intval = -intval;
7323   int short_0 = satisfies_constraint_Rcq (operands[0]);
7324   int short_p = (!cond_p && short_0 && satisfies_constraint_Rcq (operands[1]));
7325   int ret = 0;
7326 
7327 #define ADDSI_OUTPUT1(FORMAT) do {\
7328   if (output_p) \
7329     output_asm_insn (FORMAT, operands);\
7330   return ret; \
7331 } while (0)
7332 #define ADDSI_OUTPUT(LIST) do {\
7333   if (output_p) \
7334     sprintf LIST;\
7335   ADDSI_OUTPUT1 (format);\
7336   return ret; \
7337 } while (0)
7338 
7339   /* First try to emit a 16 bit insn.  */
7340   ret = 2;
7341   if (!cond_p
7342       /* If we are actually about to output this insn, don't try a 16 bit
7343 	 variant if we already decided that we don't want that
7344 	 (I.e. we upsized this insn to align some following insn.)
7345 	 E.g. add_s r0,sp,70 is 16 bit, but add r0,sp,70 requires a LIMM -
7346 	 but add1 r0,sp,35 doesn't.  */
7347       && (!output_p || (get_attr_length (current_output_insn) & 2)))
7348     {
7349       if (short_p
7350 	  && (REG_P (operands[2])
7351 	      ? (match || satisfies_constraint_Rcq (operands[2]))
7352 	      : (unsigned) intval <= (match ? 127 : 7)))
7353 	ADDSI_OUTPUT1 ("add%? %0,%1,%2");
7354       if (short_0 && REG_P (operands[1]) && match2)
7355 	ADDSI_OUTPUT1 ("add%? %0,%2,%1");
7356       if ((short_0 || REGNO (operands[0]) == STACK_POINTER_REGNUM)
7357 	  && REGNO (operands[1]) == STACK_POINTER_REGNUM && !(intval & ~124))
7358 	ADDSI_OUTPUT1 ("add%? %0,%1,%2");
7359 
7360       if ((short_p && (unsigned) neg_intval <= (match ? 31 : 7))
7361 	  || (REGNO (operands[0]) == STACK_POINTER_REGNUM
7362 	      && match && !(neg_intval & ~124)))
7363 	ADDSI_OUTPUT1 ("sub%? %0,%1,%n2");
7364 
7365       if (REG_P(operands[0]) && REG_P(operands[1])
7366 	  && (REGNO(operands[0]) <= 31) && (REGNO(operands[0]) == REGNO(operands[1]))
7367 	  && CONST_INT_P (operands[2]) && ( (intval>= -1) && (intval <= 6)))
7368 	ADDSI_OUTPUT1 ("add%? %0,%1,%2");
7369 
7370       if (TARGET_CODE_DENSITY && REG_P(operands[0]) && REG_P(operands[1])
7371 	  && ((REGNO(operands[0]) == 0) || (REGNO(operands[0]) == 1))
7372 	  && satisfies_constraint_Rcq (operands[1])
7373 	  && satisfies_constraint_L (operands[2]))
7374 	ADDSI_OUTPUT1 ("add%? %0,%1,%2 ;3");
7375     }
7376 
7377   /* Now try to emit a 32 bit insn without long immediate.  */
7378   ret = 4;
7379   if (!match && match2 && REG_P (operands[1]))
7380     ADDSI_OUTPUT1 ("add%? %0,%2,%1");
7381   if (match || !cond_p)
7382     {
7383       int limit = (match && !cond_p) ? 0x7ff : 0x3f;
7384       int range_factor = neg_intval & intval;
7385       int shift;
7386 
7387       if (intval == (HOST_WIDE_INT) (HOST_WIDE_INT_M1U << 31))
7388 	ADDSI_OUTPUT1 ("bxor%? %0,%1,31");
7389 
7390       /* If we can use a straight add / sub instead of a {add,sub}[123] of
7391 	 same size, do, so - the insn latency is lower.  */
7392       /* -0x800 is a 12-bit constant for add /add3 / sub / sub3, but
7393 	 0x800 is not.  */
7394       if ((intval >= 0 && intval <= limit)
7395 	       || (intval == -0x800 && limit == 0x7ff))
7396 	ADDSI_OUTPUT1 ("add%? %0,%1,%2");
7397       else if ((intval < 0 && neg_intval <= limit)
7398 	       || (intval == 0x800 && limit == 0x7ff))
7399 	ADDSI_OUTPUT1 ("sub%? %0,%1,%n2");
7400       shift = range_factor >= 8 ? 3 : (range_factor >> 1);
7401       gcc_assert (shift == 0 || shift == 1 || shift == 2 || shift == 3);
7402       gcc_assert ((((1 << shift) - 1) & intval) == 0);
7403       if (((intval < 0 && intval != -0x4000)
7404 	   /* sub[123] is slower than add_s / sub, only use it if it
7405 	      avoids a long immediate.  */
7406 	   && neg_intval <= limit << shift)
7407 	  || (intval == 0x4000 && limit == 0x7ff))
7408 	ADDSI_OUTPUT ((format, "sub%d%%? %%0,%%1,%d",
7409 		       shift, neg_intval >> shift));
7410       else if ((intval >= 0 && intval <= limit << shift)
7411 	       || (intval == -0x4000 && limit == 0x7ff))
7412 	ADDSI_OUTPUT ((format, "add%d%%? %%0,%%1,%d", shift, intval >> shift));
7413     }
7414   /* Try to emit a 16 bit opcode with long immediate.  */
7415   ret = 6;
7416   if (short_p && match)
7417     ADDSI_OUTPUT1 ("add%? %0,%1,%S2");
7418 
7419   /* We have to use a 32 bit opcode, and with a long immediate.  */
7420   ret = 8;
7421   ADDSI_OUTPUT1 (intval < 0 ? "sub%? %0,%1,%n2" : "add%? %0,%1,%S2");
7422 }
7423 
7424 /* Emit code for an commutative_cond_exec instruction with OPERANDS.
7425    Return the length of the instruction.
7426    If OUTPUT_P is false, don't actually output the instruction, just return
7427    its length.  */
7428 int
7429 arc_output_commutative_cond_exec (rtx *operands, bool output_p)
7430 {
7431   enum rtx_code commutative_op = GET_CODE (operands[3]);
7432   const char *pat = NULL;
7433 
7434   /* Canonical rtl should not have a constant in the first operand position.  */
7435   gcc_assert (!CONSTANT_P (operands[1]));
7436 
7437   switch (commutative_op)
7438     {
7439       case AND:
7440 	if (satisfies_constraint_C1p (operands[2]))
7441 	  pat = "bmsk%? %0,%1,%Z2";
7442 	else if (satisfies_constraint_C2p (operands[2]))
7443 	  {
7444 	    operands[2] = GEN_INT ((~INTVAL (operands[2])));
7445 	    pat = "bmskn%? %0,%1,%Z2";
7446 	  }
7447 	else if (satisfies_constraint_Ccp (operands[2]))
7448 	  pat = "bclr%? %0,%1,%M2";
7449 	else if (satisfies_constraint_CnL (operands[2]))
7450 	  pat = "bic%? %0,%1,%n2-1";
7451 	break;
7452       case IOR:
7453 	if (satisfies_constraint_C0p (operands[2]))
7454 	  pat = "bset%? %0,%1,%z2";
7455 	break;
7456       case XOR:
7457 	if (satisfies_constraint_C0p (operands[2]))
7458 	  pat = "bxor%? %0,%1,%z2";
7459 	break;
7460       case PLUS:
7461 	return arc_output_addsi (operands, true, output_p);
7462       default: break;
7463     }
7464   if (output_p)
7465     output_asm_insn (pat ? pat : "%O3.%d5 %0,%1,%2", operands);
7466   if (pat || REG_P (operands[2]) || satisfies_constraint_L (operands[2]))
7467     return 4;
7468   return 8;
7469 }
7470 
7471 /* Helper function of arc_expand_movmem.  ADDR points to a chunk of memory.
7472    Emit code and return an potentially modified address such that offsets
7473    up to SIZE are can be added to yield a legitimate address.
7474    if REUSE is set, ADDR is a register that may be modified.  */
7475 
7476 static rtx
7477 force_offsettable (rtx addr, HOST_WIDE_INT size, bool reuse)
7478 {
7479   rtx base = addr;
7480   rtx offs = const0_rtx;
7481 
7482   if (GET_CODE (base) == PLUS)
7483     {
7484       offs = XEXP (base, 1);
7485       base = XEXP (base, 0);
7486     }
7487   if (!REG_P (base)
7488       || (REGNO (base) != STACK_POINTER_REGNUM
7489 	  && REGNO_PTR_FRAME_P (REGNO (base)))
7490       || !CONST_INT_P (offs) || !SMALL_INT (INTVAL (offs))
7491       || !SMALL_INT (INTVAL (offs) + size))
7492     {
7493       if (reuse)
7494 	emit_insn (gen_add2_insn (addr, offs));
7495       else
7496 	addr = copy_to_mode_reg (Pmode, addr);
7497     }
7498   return addr;
7499 }
7500 
7501 /* Like move_by_pieces, but take account of load latency, and actual
7502    offset ranges.  Return true on success.  */
7503 
7504 bool
7505 arc_expand_movmem (rtx *operands)
7506 {
7507   rtx dst = operands[0];
7508   rtx src = operands[1];
7509   rtx dst_addr, src_addr;
7510   HOST_WIDE_INT size;
7511   int align = INTVAL (operands[3]);
7512   unsigned n_pieces;
7513   int piece = align;
7514   rtx store[2];
7515   rtx tmpx[2];
7516   int i;
7517 
7518   if (!CONST_INT_P (operands[2]))
7519     return false;
7520   size = INTVAL (operands[2]);
7521   /* move_by_pieces_ninsns is static, so we can't use it.  */
7522   if (align >= 4)
7523     {
7524       if (TARGET_LL64)
7525 	n_pieces = (size + 4) / 8U + ((size >> 1) & 1) + (size & 1);
7526       else
7527 	n_pieces = (size + 2) / 4U + (size & 1);
7528     }
7529   else if (align == 2)
7530     n_pieces = (size + 1) / 2U;
7531   else
7532     n_pieces = size;
7533   if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15))
7534     return false;
7535   /* Force 32 bit aligned and larger datum to use 64 bit transfers, if
7536      possible.  */
7537   if (TARGET_LL64 && (piece >= 4) && (size >= 8))
7538     piece = 8;
7539   else if (piece > 4)
7540     piece = 4;
7541   dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0);
7542   src_addr = force_offsettable (XEXP (operands[1], 0), size, 0);
7543   store[0] = store[1] = NULL_RTX;
7544   tmpx[0] = tmpx[1] = NULL_RTX;
7545   for (i = 0; size > 0; i ^= 1, size -= piece)
7546     {
7547       rtx tmp;
7548       machine_mode mode;
7549 
7550       while (piece > size)
7551 	piece >>= 1;
7552       mode = smallest_mode_for_size (piece * BITS_PER_UNIT, MODE_INT);
7553       /* If we don't re-use temporaries, the scheduler gets carried away,
7554 	 and the register pressure gets unnecessarily high.  */
7555       if (0 && tmpx[i] && GET_MODE (tmpx[i]) == mode)
7556 	tmp = tmpx[i];
7557       else
7558 	tmpx[i] = tmp = gen_reg_rtx (mode);
7559       dst_addr = force_offsettable (dst_addr, piece, 1);
7560       src_addr = force_offsettable (src_addr, piece, 1);
7561       if (store[i])
7562 	emit_insn (store[i]);
7563       emit_move_insn (tmp, change_address (src, mode, src_addr));
7564       store[i] = gen_move_insn (change_address (dst, mode, dst_addr), tmp);
7565       dst_addr = plus_constant (Pmode, dst_addr, piece);
7566       src_addr = plus_constant (Pmode, src_addr, piece);
7567     }
7568   if (store[i])
7569     emit_insn (store[i]);
7570   if (store[i^1])
7571     emit_insn (store[i^1]);
7572   return true;
7573 }
7574 
7575 /* Prepare operands for move in MODE.  Return true iff the move has
7576    been emitted.  */
7577 
7578 bool
7579 prepare_move_operands (rtx *operands, machine_mode mode)
7580 {
7581   /* We used to do this only for MODE_INT Modes, but addresses to floating
7582      point variables may well be in the small data section.  */
7583   if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[0], Pmode))
7584     operands[0] = arc_rewrite_small_data (operands[0]);
7585 
7586   if (mode == SImode && SYMBOLIC_CONST (operands[1]))
7587     {
7588       prepare_pic_move (operands, SImode);
7589 
7590       /* Disable any REG_EQUALs associated with the symref
7591 	 otherwise the optimization pass undoes the work done
7592 	 here and references the variable directly.  */
7593     }
7594 
7595   if (GET_CODE (operands[0]) != MEM
7596       && !TARGET_NO_SDATA_SET
7597       && small_data_pattern (operands[1], Pmode))
7598     {
7599       /* This is to take care of address calculations involving sdata
7600 	 variables.  */
7601       operands[1] = arc_rewrite_small_data (operands[1]);
7602 
7603       emit_insn (gen_rtx_SET (operands[0],operands[1]));
7604       /* ??? This note is useless, since it only restates the set itself.
7605 	 We should rather use the original SYMBOL_REF.  However, there is
7606 	 the problem that we are lying to the compiler about these
7607 	 SYMBOL_REFs to start with.  symbol@sda should be encoded specially
7608 	 so that we can tell it apart from an actual symbol.  */
7609       set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]);
7610 
7611       /* Take care of the REG_EQUAL note that will be attached to mark the
7612 	 output reg equal to the initial symbol_ref after this code is
7613 	 executed.  */
7614       emit_move_insn (operands[0], operands[0]);
7615       return true;
7616     }
7617 
7618   if (MEM_P (operands[0])
7619       && !(reload_in_progress || reload_completed))
7620     {
7621       operands[1] = force_reg (mode, operands[1]);
7622       if (!move_dest_operand (operands[0], mode))
7623 	{
7624 	  rtx addr = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
7625 	  /* This is like change_address_1 (operands[0], mode, 0, 1) ,
7626 	     except that we can't use that function because it is static.  */
7627 	  rtx pat = change_address (operands[0], mode, addr);
7628 	  MEM_COPY_ATTRIBUTES (pat, operands[0]);
7629 	  operands[0] = pat;
7630 	}
7631       if (!cse_not_expected)
7632 	{
7633 	  rtx pat = XEXP (operands[0], 0);
7634 
7635 	  pat = arc_legitimize_address_0 (pat, pat, mode);
7636 	  if (pat)
7637 	    {
7638 	      pat = change_address (operands[0], mode, pat);
7639 	      MEM_COPY_ATTRIBUTES (pat, operands[0]);
7640 	      operands[0] = pat;
7641 	    }
7642 	}
7643     }
7644 
7645   if (MEM_P (operands[1]) && !cse_not_expected)
7646     {
7647       rtx pat = XEXP (operands[1], 0);
7648 
7649       pat = arc_legitimize_address_0 (pat, pat, mode);
7650       if (pat)
7651 	{
7652 	  pat = change_address (operands[1], mode, pat);
7653 	  MEM_COPY_ATTRIBUTES (pat, operands[1]);
7654 	  operands[1] = pat;
7655 	}
7656     }
7657 
7658   return false;
7659 }
7660 
7661 /* Prepare OPERANDS for an extension using CODE to OMODE.
7662    Return true iff the move has been emitted.  */
7663 
7664 bool
7665 prepare_extend_operands (rtx *operands, enum rtx_code code,
7666 			 machine_mode omode)
7667 {
7668   if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[1], Pmode))
7669     {
7670       /* This is to take care of address calculations involving sdata
7671 	 variables.  */
7672       operands[1]
7673 	= gen_rtx_fmt_e (code, omode, arc_rewrite_small_data (operands[1]));
7674       emit_insn (gen_rtx_SET (operands[0], operands[1]));
7675       set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]);
7676 
7677       /* Take care of the REG_EQUAL note that will be attached to mark the
7678 	 output reg equal to the initial extension after this code is
7679 	 executed.  */
7680       emit_move_insn (operands[0], operands[0]);
7681       return true;
7682     }
7683   return false;
7684 }
7685 
7686 /* Output a library call to a function called FNAME that has been arranged
7687    to be local to any dso.  */
7688 
7689 const char *
7690 arc_output_libcall (const char *fname)
7691 {
7692   unsigned len = strlen (fname);
7693   static char buf[64];
7694 
7695   gcc_assert (len < sizeof buf - 35);
7696   if (TARGET_LONG_CALLS_SET
7697      || (TARGET_MEDIUM_CALLS && arc_ccfsm_cond_exec_p ()))
7698     {
7699       if (flag_pic)
7700 	sprintf (buf, "add r12,pcl,@%s@pcl\n\tjl%%!%%* [r12]", fname);
7701       else
7702 	sprintf (buf, "jl%%! @%s", fname);
7703     }
7704   else
7705     sprintf (buf, "bl%%!%%* @%s", fname);
7706   return buf;
7707 }
7708 
7709 /* Return the SImode highpart of the DImode value IN.  */
7710 
7711 rtx
7712 disi_highpart (rtx in)
7713 {
7714   return simplify_gen_subreg (SImode, in, DImode, TARGET_BIG_ENDIAN ? 0 : 4);
7715 }
7716 
7717 /* Return length adjustment for INSN.
7718    For ARC600:
7719    A write to a core reg greater or equal to 32 must not be immediately
7720    followed by a use.  Anticipate the length requirement to insert a nop
7721    between PRED and SUCC to prevent a hazard.  */
7722 
7723 static int
7724 arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ)
7725 {
7726   if (!TARGET_ARC600)
7727     return 0;
7728   /* If SUCC is a doloop_end_i with a preceding label, we must output a nop
7729      in front of SUCC anyway, so there will be separation between PRED and
7730      SUCC.  */
7731   if (recog_memoized (succ) == CODE_FOR_doloop_end_i
7732       && LABEL_P (prev_nonnote_insn (succ)))
7733     return 0;
7734   if (recog_memoized (succ) == CODE_FOR_doloop_begin_i)
7735     return 0;
7736   if (GET_CODE (PATTERN (pred)) == SEQUENCE)
7737     pred = as_a <rtx_sequence *> (PATTERN (pred))->insn (1);
7738   if (GET_CODE (PATTERN (succ)) == SEQUENCE)
7739     succ = as_a <rtx_sequence *> (PATTERN (succ))->insn (0);
7740   if (recog_memoized (pred) == CODE_FOR_mulsi_600
7741       || recog_memoized (pred) == CODE_FOR_umul_600
7742       || recog_memoized (pred) == CODE_FOR_mac_600
7743       || recog_memoized (pred) == CODE_FOR_mul64_600
7744       || recog_memoized (pred) == CODE_FOR_mac64_600
7745       || recog_memoized (pred) == CODE_FOR_umul64_600
7746       || recog_memoized (pred) == CODE_FOR_umac64_600)
7747     return 0;
7748   subrtx_iterator::array_type array;
7749   FOR_EACH_SUBRTX (iter, array, PATTERN (pred), NONCONST)
7750     {
7751       const_rtx x = *iter;
7752       switch (GET_CODE (x))
7753 	{
7754 	case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
7755 	  break;
7756 	default:
7757 	  /* This is also fine for PRE/POST_MODIFY, because they
7758 	     contain a SET.  */
7759 	  continue;
7760 	}
7761       rtx dest = XEXP (x, 0);
7762       /* Check if this sets a an extension register.  N.B. we use 61 for the
7763 	 condition codes, which is definitely not an extension register.  */
7764       if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61
7765 	  /* Check if the same register is used by the PAT.  */
7766 	  && (refers_to_regno_p
7767 	      (REGNO (dest),
7768 	       REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U,
7769 	       PATTERN (succ), 0)))
7770 	return 4;
7771     }
7772   return 0;
7773 }
7774 
7775 /* Given a rtx, check if it is an assembly instruction or not.  */
7776 
7777 static int
7778 arc_asm_insn_p (rtx x)
7779 {
7780   int i, j;
7781 
7782   if (x == 0)
7783     return 0;
7784 
7785   switch (GET_CODE (x))
7786     {
7787     case ASM_OPERANDS:
7788     case ASM_INPUT:
7789       return 1;
7790 
7791     case SET:
7792       return arc_asm_insn_p (SET_SRC (x));
7793 
7794     case PARALLEL:
7795       j = 0;
7796       for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
7797 	j += arc_asm_insn_p (XVECEXP (x, 0, i));
7798       if ( j > 0)
7799 	return 1;
7800       break;
7801 
7802     default:
7803       break;
7804     }
7805 
7806   return 0;
7807 }
7808 
7809 /* We might have a CALL to a non-returning function before a loop end.
7810    ??? Although the manual says that's OK (the target is outside the
7811    loop, and the loop counter unused there), the assembler barfs on
7812    this for ARC600, so we must insert a nop before such a call too.
7813    For ARC700, and ARCv2 is not allowed to have the last ZOL
7814    instruction a jump to a location where lp_count is modified.  */
7815 
7816 static bool
7817 arc_loop_hazard (rtx_insn *pred, rtx_insn *succ)
7818 {
7819   rtx_insn *jump  = NULL;
7820   rtx label_rtx = NULL_RTX;
7821   rtx_insn *label = NULL;
7822   basic_block succ_bb;
7823 
7824   if (recog_memoized (succ) != CODE_FOR_doloop_end_i)
7825     return false;
7826 
7827   /* Phase 1: ARC600 and ARCv2HS doesn't allow any control instruction
7828      (i.e., jump/call) as the last instruction of a ZOL.  */
7829   if (TARGET_ARC600 || TARGET_HS)
7830     if (JUMP_P (pred) || CALL_P (pred)
7831 	|| arc_asm_insn_p (PATTERN (pred))
7832 	|| GET_CODE (PATTERN (pred)) == SEQUENCE)
7833       return true;
7834 
7835   /* Phase 2: Any architecture, it is not allowed to have the last ZOL
7836      instruction a jump to a location where lp_count is modified.  */
7837 
7838   /* Phase 2a: Dig for the jump instruction.  */
7839   if (JUMP_P (pred))
7840     jump = pred;
7841   else if (GET_CODE (PATTERN (pred)) == SEQUENCE
7842 	   && JUMP_P (XVECEXP (PATTERN (pred), 0, 0)))
7843     jump = as_a <rtx_insn *> (XVECEXP (PATTERN (pred), 0, 0));
7844   else
7845     return false;
7846 
7847   /* Phase 2b: Make sure is not a millicode jump.  */
7848   if ((GET_CODE (PATTERN (jump)) == PARALLEL)
7849       && (XVECEXP (PATTERN (jump), 0, 0) == ret_rtx))
7850     return false;
7851 
7852   label_rtx = JUMP_LABEL (jump);
7853   if (!label_rtx)
7854     return false;
7855 
7856   /* Phase 2c: Make sure is not a return.  */
7857   if (ANY_RETURN_P (label_rtx))
7858     return false;
7859 
7860   /* Pahse 2d: Go to the target of the jump and check for aliveness of
7861      LP_COUNT register.  */
7862   label = safe_as_a <rtx_insn *> (label_rtx);
7863   succ_bb = BLOCK_FOR_INSN (label);
7864   if (!succ_bb)
7865     {
7866       gcc_assert (NEXT_INSN (label));
7867       if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (label)))
7868 	succ_bb = NOTE_BASIC_BLOCK (NEXT_INSN (label));
7869       else
7870 	succ_bb = BLOCK_FOR_INSN (NEXT_INSN (label));
7871     }
7872 
7873   if (succ_bb && REGNO_REG_SET_P (df_get_live_out (succ_bb), LP_COUNT))
7874     return true;
7875 
7876   return false;
7877 }
7878 
7879 /* For ARC600:
7880    A write to a core reg greater or equal to 32 must not be immediately
7881    followed by a use.  Anticipate the length requirement to insert a nop
7882    between PRED and SUCC to prevent a hazard.  */
7883 
7884 int
7885 arc_hazard (rtx_insn *pred, rtx_insn *succ)
7886 {
7887   if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
7888     return 0;
7889 
7890   if (arc_loop_hazard (pred, succ))
7891     return 4;
7892 
7893   if (TARGET_ARC600)
7894     return arc600_corereg_hazard (pred, succ);
7895 
7896   return 0;
7897 }
7898 
7899 /* Return length adjustment for INSN.  */
7900 
7901 int
7902 arc_adjust_insn_length (rtx_insn *insn, int len, bool)
7903 {
7904   if (!INSN_P (insn))
7905     return len;
7906   /* We already handle sequences by ignoring the delay sequence flag.  */
7907   if (GET_CODE (PATTERN (insn)) == SEQUENCE)
7908     return len;
7909 
7910   /* It is impossible to jump to the very end of a Zero-Overhead Loop, as
7911      the ZOL mechanism only triggers when advancing to the end address,
7912      so if there's a label at the end of a ZOL, we need to insert a nop.
7913      The ARC600 ZOL also has extra restrictions on jumps at the end of a
7914      loop.  */
7915   if (recog_memoized (insn) == CODE_FOR_doloop_end_i)
7916     {
7917       rtx_insn *prev = prev_nonnote_insn (insn);
7918 
7919       return ((LABEL_P (prev)
7920 	       || (TARGET_ARC600
7921 		   && (JUMP_P (prev)
7922 		       || CALL_P (prev) /* Could be a noreturn call.  */
7923 		       || (NONJUMP_INSN_P (prev)
7924 			   && GET_CODE (PATTERN (prev)) == SEQUENCE))))
7925 	      ? len + 4 : len);
7926     }
7927 
7928   /* Check for return with but one preceding insn since function
7929      start / call.  */
7930   if (TARGET_PAD_RETURN
7931       && JUMP_P (insn)
7932       && GET_CODE (PATTERN (insn)) != ADDR_VEC
7933       && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7934       && get_attr_type (insn) == TYPE_RETURN)
7935     {
7936       rtx_insn *prev = prev_active_insn (insn);
7937 
7938       if (!prev || !(prev = prev_active_insn (prev))
7939 	  || ((NONJUMP_INSN_P (prev)
7940 	       && GET_CODE (PATTERN (prev)) == SEQUENCE)
7941 	      ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
7942 			   NON_SIBCALL)
7943 	      : CALL_ATTR (prev, NON_SIBCALL)))
7944 	return len + 4;
7945     }
7946   if (TARGET_ARC600)
7947     {
7948       rtx_insn *succ = next_real_insn (insn);
7949 
7950       /* One the ARC600, a write to an extension register must be separated
7951 	 from a read.  */
7952       if (succ && INSN_P (succ))
7953 	len += arc600_corereg_hazard (insn, succ);
7954     }
7955 
7956   /* Restore extracted operands - otherwise splitters like the addsi3_mixed one
7957      can go awry.  */
7958   extract_constrain_insn_cached (insn);
7959 
7960   return len;
7961 }
7962 
7963 /* Values for length_sensitive.  */
7964 enum
7965 {
7966   ARC_LS_NONE,// Jcc
7967   ARC_LS_25, // 25 bit offset, B
7968   ARC_LS_21, // 21 bit offset, Bcc
7969   ARC_LS_U13,// 13 bit unsigned offset, LP
7970   ARC_LS_10, // 10 bit offset, B_s, Beq_s, Bne_s
7971   ARC_LS_9,  //  9 bit offset, BRcc
7972   ARC_LS_8,  //  8 bit offset, BRcc_s
7973   ARC_LS_U7, //  7 bit unsigned offset, LPcc
7974   ARC_LS_7   //  7 bit offset, Bcc_s
7975 };
7976 
7977 /* While the infrastructure patch is waiting for review, duplicate the
7978    struct definitions, to allow this file to compile.  */
7979 #if 1
7980 typedef struct
7981 {
7982   unsigned align_set;
7983   /* Cost as a branch / call target or call return address.  */
7984   int target_cost;
7985   int fallthrough_cost;
7986   int branch_cost;
7987   int length;
7988   /* 0 for not length sensitive, 1 for largest offset range,
7989  *      2 for next smaller etc.  */
7990   unsigned length_sensitive : 8;
7991   bool enabled;
7992 } insn_length_variant_t;
7993 
7994 typedef struct insn_length_parameters_s
7995 {
7996   int align_unit_log;
7997   int align_base_log;
7998   int max_variants;
7999   int (*get_variants) (rtx_insn *, int, bool, bool, insn_length_variant_t *);
8000 } insn_length_parameters_t;
8001 
8002 static void
8003 arc_insn_length_parameters (insn_length_parameters_t *ilp) ATTRIBUTE_UNUSED;
8004 #endif
8005 
8006 static int
8007 arc_get_insn_variants (rtx_insn *insn, int len, bool, bool target_p,
8008 		       insn_length_variant_t *ilv)
8009 {
8010   if (!NONDEBUG_INSN_P (insn))
8011     return 0;
8012   enum attr_type type;
8013   /* shorten_branches doesn't take optimize_size into account yet for the
8014      get_variants mechanism, so turn this off for now.  */
8015   if (optimize_size)
8016     return 0;
8017   if (rtx_sequence *pat = dyn_cast <rtx_sequence *> (PATTERN (insn)))
8018     {
8019       /* The interaction of a short delay slot insn with a short branch is
8020 	 too weird for shorten_branches to piece together, so describe the
8021 	 entire SEQUENCE.  */
8022       rtx_insn *inner;
8023       if (TARGET_UPSIZE_DBR
8024 	  && get_attr_length (pat->insn (1)) <= 2
8025 	  && (((type = get_attr_type (inner = pat->insn (0)))
8026 	       == TYPE_UNCOND_BRANCH)
8027 	      || type == TYPE_BRANCH)
8028 	  && get_attr_delay_slot_filled (inner) == DELAY_SLOT_FILLED_YES)
8029 	{
8030 	  int n_variants
8031 	    = arc_get_insn_variants (inner, get_attr_length (inner), true,
8032 				     target_p, ilv+1);
8033 	  /* The short variant gets split into a higher-cost aligned
8034 	     and a lower cost unaligned variant.  */
8035 	  gcc_assert (n_variants);
8036 	  gcc_assert (ilv[1].length_sensitive == ARC_LS_7
8037 		      || ilv[1].length_sensitive == ARC_LS_10);
8038 	  gcc_assert (ilv[1].align_set == 3);
8039 	  ilv[0] = ilv[1];
8040 	  ilv[0].align_set = 1;
8041 	  ilv[0].branch_cost += 1;
8042 	  ilv[1].align_set = 2;
8043 	  n_variants++;
8044 	  for (int i = 0; i < n_variants; i++)
8045 	    ilv[i].length += 2;
8046 	  /* In case an instruction with aligned size is wanted, and
8047 	     the short variants are unavailable / too expensive, add
8048 	     versions of long branch + long delay slot.  */
8049 	  for (int i = 2, end = n_variants; i < end; i++, n_variants++)
8050 	    {
8051 	      ilv[n_variants] = ilv[i];
8052 	      ilv[n_variants].length += 2;
8053 	    }
8054 	  return n_variants;
8055 	}
8056       return 0;
8057     }
8058   insn_length_variant_t *first_ilv = ilv;
8059   type = get_attr_type (insn);
8060   bool delay_filled
8061     = (get_attr_delay_slot_filled (insn) == DELAY_SLOT_FILLED_YES);
8062   int branch_align_cost = delay_filled ? 0 : 1;
8063   int branch_unalign_cost = delay_filled ? 0 : TARGET_UNALIGN_BRANCH ? 0 : 1;
8064   /* If the previous instruction is an sfunc call, this insn is always
8065      a target, even though the middle-end is unaware of this.  */
8066   bool force_target = false;
8067   rtx_insn *prev = prev_active_insn (insn);
8068   if (prev && arc_next_active_insn (prev, 0) == insn
8069       && ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE)
8070 	  ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
8071 		       NON_SIBCALL)
8072 	  : (CALL_ATTR (prev, NON_SIBCALL)
8073 	     && NEXT_INSN (PREV_INSN (prev)) == prev)))
8074     force_target = true;
8075 
8076   switch (type)
8077     {
8078     case TYPE_BRCC:
8079       /* Short BRCC only comes in no-delay-slot version, and without limm  */
8080       if (!delay_filled)
8081 	{
8082 	  ilv->align_set = 3;
8083 	  ilv->length = 2;
8084 	  ilv->branch_cost = 1;
8085 	  ilv->enabled = (len == 2);
8086 	  ilv->length_sensitive = ARC_LS_8;
8087 	  ilv++;
8088 	}
8089       /* Fall through.  */
8090     case TYPE_BRCC_NO_DELAY_SLOT:
8091       /* doloop_fallback* patterns are TYPE_BRCC_NO_DELAY_SLOT for
8092 	 (delay slot) scheduling purposes, but they are longer.  */
8093       if (GET_CODE (PATTERN (insn)) == PARALLEL
8094 	  && GET_CODE (XVECEXP (PATTERN (insn), 0, 1)) == SET)
8095 	return 0;
8096       /* Standard BRCC: 4 bytes, or 8 bytes with limm.  */
8097       ilv->length = ((type == TYPE_BRCC) ? 4 : 8);
8098       ilv->align_set = 3;
8099       ilv->branch_cost = branch_align_cost;
8100       ilv->enabled = (len <= ilv->length);
8101       ilv->length_sensitive = ARC_LS_9;
8102       if ((target_p || force_target)
8103 	  || (!delay_filled && TARGET_UNALIGN_BRANCH))
8104 	{
8105 	  ilv[1] = *ilv;
8106 	  ilv->align_set = 1;
8107 	  ilv++;
8108 	  ilv->align_set = 2;
8109 	  ilv->target_cost = 1;
8110 	  ilv->branch_cost = branch_unalign_cost;
8111 	}
8112       ilv++;
8113 
8114       rtx op, op0;
8115       op = XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 0);
8116       op0 = XEXP (op, 0);
8117 
8118       if (GET_CODE (op0) == ZERO_EXTRACT
8119 	  && satisfies_constraint_L (XEXP (op0, 2)))
8120 	op0 = XEXP (op0, 0);
8121       if (satisfies_constraint_Rcq (op0))
8122 	{
8123 	  ilv->length = ((type == TYPE_BRCC) ? 6 : 10);
8124 	  ilv->align_set = 3;
8125 	  ilv->branch_cost = 1 + branch_align_cost;
8126 	  ilv->fallthrough_cost = 1;
8127 	  ilv->enabled = true;
8128 	  ilv->length_sensitive = ARC_LS_21;
8129 	  if (!delay_filled && TARGET_UNALIGN_BRANCH)
8130 	    {
8131 	      ilv[1] = *ilv;
8132 	      ilv->align_set = 1;
8133 	      ilv++;
8134 	      ilv->align_set = 2;
8135 	      ilv->branch_cost = 1 + branch_unalign_cost;
8136 	    }
8137 	  ilv++;
8138 	}
8139       ilv->length = ((type == TYPE_BRCC) ? 8 : 12);
8140       ilv->align_set = 3;
8141       ilv->branch_cost = 1 + branch_align_cost;
8142       ilv->fallthrough_cost = 1;
8143       ilv->enabled = true;
8144       ilv->length_sensitive = ARC_LS_21;
8145       if ((target_p || force_target)
8146 	  || (!delay_filled && TARGET_UNALIGN_BRANCH))
8147 	{
8148 	  ilv[1] = *ilv;
8149 	  ilv->align_set = 1;
8150 	  ilv++;
8151 	  ilv->align_set = 2;
8152 	  ilv->target_cost = 1;
8153 	  ilv->branch_cost = 1 + branch_unalign_cost;
8154 	}
8155       ilv++;
8156       break;
8157 
8158     case TYPE_SFUNC:
8159       ilv->length = 12;
8160       goto do_call;
8161     case TYPE_CALL_NO_DELAY_SLOT:
8162       ilv->length = 8;
8163       goto do_call;
8164     case TYPE_CALL:
8165       ilv->length = 4;
8166       ilv->length_sensitive
8167 	= GET_CODE (PATTERN (insn)) == COND_EXEC ? ARC_LS_21 : ARC_LS_25;
8168     do_call:
8169       ilv->align_set = 3;
8170       ilv->fallthrough_cost = branch_align_cost;
8171       ilv->enabled = true;
8172       if ((target_p || force_target)
8173 	  || (!delay_filled && TARGET_UNALIGN_BRANCH))
8174 	{
8175 	  ilv[1] = *ilv;
8176 	  ilv->align_set = 1;
8177 	  ilv++;
8178 	  ilv->align_set = 2;
8179 	  ilv->target_cost = 1;
8180 	  ilv->fallthrough_cost = branch_unalign_cost;
8181 	}
8182       ilv++;
8183       break;
8184     case TYPE_UNCOND_BRANCH:
8185       /* Strictly speaking, this should be ARC_LS_10 for equality comparisons,
8186 	 but that makes no difference at the moment.  */
8187       ilv->length_sensitive = ARC_LS_7;
8188       ilv[1].length_sensitive = ARC_LS_25;
8189       goto do_branch;
8190     case TYPE_BRANCH:
8191       ilv->length_sensitive = ARC_LS_10;
8192       ilv[1].length_sensitive = ARC_LS_21;
8193     do_branch:
8194       ilv->align_set = 3;
8195       ilv->length = 2;
8196       ilv->branch_cost = branch_align_cost;
8197       ilv->enabled = (len == ilv->length);
8198       ilv++;
8199       ilv->length = 4;
8200       ilv->align_set = 3;
8201       ilv->branch_cost = branch_align_cost;
8202       ilv->enabled = true;
8203       if ((target_p || force_target)
8204 	  || (!delay_filled && TARGET_UNALIGN_BRANCH))
8205 	{
8206 	  ilv[1] = *ilv;
8207 	  ilv->align_set = 1;
8208 	  ilv++;
8209 	  ilv->align_set = 2;
8210 	  ilv->target_cost = 1;
8211 	  ilv->branch_cost = branch_unalign_cost;
8212 	}
8213       ilv++;
8214       break;
8215     case TYPE_JUMP:
8216       return 0;
8217     default:
8218       /* For every short insn, there is generally also a long insn.
8219 	 trap_s is an exception.  */
8220       if ((len & 2) == 0 || recog_memoized (insn) == CODE_FOR_trap_s)
8221 	return 0;
8222       ilv->align_set = 3;
8223       ilv->length = len;
8224       ilv->enabled = 1;
8225       ilv++;
8226       ilv->align_set = 3;
8227       ilv->length = len + 2;
8228       ilv->enabled = 1;
8229       if (target_p || force_target)
8230 	{
8231 	  ilv[1] = *ilv;
8232 	  ilv->align_set = 1;
8233 	  ilv++;
8234 	  ilv->align_set = 2;
8235 	  ilv->target_cost = 1;
8236 	}
8237       ilv++;
8238     }
8239   /* If the previous instruction is an sfunc call, this insn is always
8240      a target, even though the middle-end is unaware of this.
8241      Therefore, if we have a call predecessor, transfer the target cost
8242      to the fallthrough and branch costs.  */
8243   if (force_target)
8244     {
8245       for (insn_length_variant_t *p = first_ilv; p < ilv; p++)
8246 	{
8247 	  p->fallthrough_cost += p->target_cost;
8248 	  p->branch_cost += p->target_cost;
8249 	  p->target_cost = 0;
8250 	}
8251     }
8252 
8253   return ilv - first_ilv;
8254 }
8255 
8256 static void
8257 arc_insn_length_parameters (insn_length_parameters_t *ilp)
8258 {
8259   ilp->align_unit_log = 1;
8260   ilp->align_base_log = 1;
8261   ilp->max_variants = 7;
8262   ilp->get_variants = arc_get_insn_variants;
8263 }
8264 
8265 /* Return a copy of COND from *STATEP, inverted if that is indicated by the
8266    CC field of *STATEP.  */
8267 
8268 static rtx
8269 arc_get_ccfsm_cond (struct arc_ccfsm *statep, bool reverse)
8270 {
8271   rtx cond = statep->cond;
8272   int raw_cc = get_arc_condition_code (cond);
8273   if (reverse)
8274     raw_cc = ARC_INVERSE_CONDITION_CODE (raw_cc);
8275 
8276   if (statep->cc == raw_cc)
8277     return copy_rtx (cond);
8278 
8279   gcc_assert (ARC_INVERSE_CONDITION_CODE (raw_cc) == statep->cc);
8280 
8281   machine_mode ccm = GET_MODE (XEXP (cond, 0));
8282   enum rtx_code code = reverse_condition (GET_CODE (cond));
8283   if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode)
8284     code = reverse_condition_maybe_unordered (GET_CODE (cond));
8285 
8286   return gen_rtx_fmt_ee (code, GET_MODE (cond),
8287 			 copy_rtx (XEXP (cond, 0)), copy_rtx (XEXP (cond, 1)));
8288 }
8289 
8290 /* Return version of PAT conditionalized with COND, which is part of INSN.
8291    ANNULLED indicates if INSN is an annulled delay-slot insn.
8292    Register further changes if necessary.  */
8293 static rtx
8294 conditionalize_nonjump (rtx pat, rtx cond, rtx insn, bool annulled)
8295 {
8296   /* For commutative operators, we generally prefer to have
8297      the first source match the destination.  */
8298   if (GET_CODE (pat) == SET)
8299     {
8300       rtx src = SET_SRC (pat);
8301 
8302       if (COMMUTATIVE_P (src))
8303 	{
8304 	  rtx src0 = XEXP (src, 0);
8305 	  rtx src1 = XEXP (src, 1);
8306 	  rtx dst = SET_DEST (pat);
8307 
8308 	  if (rtx_equal_p (src1, dst) && !rtx_equal_p (src0, dst)
8309 	      /* Leave add_n alone - the canonical form is to
8310 		 have the complex summand first.  */
8311 	      && REG_P (src0))
8312 	    pat = gen_rtx_SET (dst,
8313 			       gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src),
8314 					       src1, src0));
8315 	}
8316     }
8317 
8318   /* dwarf2out.c:dwarf2out_frame_debug_expr doesn't know
8319      what to do with COND_EXEC.  */
8320   if (RTX_FRAME_RELATED_P (insn))
8321     {
8322       /* If this is the delay slot insn of an anulled branch,
8323 	 dwarf2out.c:scan_trace understands the anulling semantics
8324 	 without the COND_EXEC.  */
8325       gcc_assert (annulled);
8326       rtx note = alloc_reg_note (REG_FRAME_RELATED_EXPR, pat,
8327 				 REG_NOTES (insn));
8328       validate_change (insn, &REG_NOTES (insn), note, 1);
8329     }
8330   pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat);
8331   return pat;
8332 }
8333 
8334 /* Use the ccfsm machinery to do if conversion.  */
8335 
8336 static unsigned
8337 arc_ifcvt (void)
8338 {
8339   struct arc_ccfsm *statep = &cfun->machine->ccfsm_current;
8340   basic_block merge_bb = 0;
8341 
8342   memset (statep, 0, sizeof *statep);
8343   for (rtx_insn *insn = get_insns (); insn; insn = next_insn (insn))
8344     {
8345       arc_ccfsm_advance (insn, statep);
8346 
8347       switch (statep->state)
8348 	{
8349 	case 0:
8350 	  if (JUMP_P (insn))
8351 	    merge_bb = 0;
8352 	  break;
8353 	case 1: case 2:
8354 	  {
8355 	    /* Deleted branch.  */
8356 	    gcc_assert (!merge_bb);
8357 	    merge_bb = BLOCK_FOR_INSN (insn);
8358 	    basic_block succ_bb
8359 	      = BLOCK_FOR_INSN (NEXT_INSN (NEXT_INSN (PREV_INSN (insn))));
8360 	    arc_ccfsm_post_advance (insn, statep);
8361 	    gcc_assert (!IN_RANGE (statep->state, 1, 2));
8362 	    rtx_insn *seq = NEXT_INSN (PREV_INSN (insn));
8363 	    if (seq != insn)
8364 	      {
8365 		rtx slot = XVECEXP (PATTERN (seq), 0, 1);
8366 		rtx pat = PATTERN (slot);
8367 		if (INSN_ANNULLED_BRANCH_P (insn))
8368 		  {
8369 		    rtx cond
8370 		      = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (slot));
8371 		    pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat);
8372 		  }
8373 		if (!validate_change (seq, &PATTERN (seq), pat, 0))
8374 		  gcc_unreachable ();
8375 		PUT_CODE (slot, NOTE);
8376 		NOTE_KIND (slot) = NOTE_INSN_DELETED;
8377 		if (merge_bb && succ_bb)
8378 		  merge_blocks (merge_bb, succ_bb);
8379 	      }
8380 	    else if (merge_bb && succ_bb)
8381 	      {
8382 		set_insn_deleted (insn);
8383 		merge_blocks (merge_bb, succ_bb);
8384 	      }
8385 	    else
8386 	      {
8387 		PUT_CODE (insn, NOTE);
8388 		NOTE_KIND (insn) = NOTE_INSN_DELETED;
8389 	      }
8390 	    continue;
8391 	  }
8392 	case 3:
8393 	  if (LABEL_P (insn)
8394 	      && statep->target_label == CODE_LABEL_NUMBER (insn))
8395 	    {
8396 	      arc_ccfsm_post_advance (insn, statep);
8397 	      basic_block succ_bb = BLOCK_FOR_INSN (insn);
8398 	      if (merge_bb && succ_bb)
8399 		merge_blocks (merge_bb, succ_bb);
8400 	      else if (--LABEL_NUSES (insn) == 0)
8401 		{
8402 		  const char *name = LABEL_NAME (insn);
8403 		  PUT_CODE (insn, NOTE);
8404 		  NOTE_KIND (insn) = NOTE_INSN_DELETED_LABEL;
8405 		  NOTE_DELETED_LABEL_NAME (insn) = name;
8406 		}
8407 	      merge_bb = 0;
8408 	      continue;
8409 	    }
8410 	  /* Fall through.  */
8411 	case 4: case 5:
8412 	  if (!NONDEBUG_INSN_P (insn))
8413 	    break;
8414 
8415 	  /* Conditionalized insn.  */
8416 
8417 	  rtx_insn *prev, *pprev;
8418 	  rtx *patp, pat, cond;
8419 	  bool annulled; annulled = false;
8420 
8421 	  /* If this is a delay slot insn in a non-annulled branch,
8422 	     don't conditionalize it.  N.B., this should be fine for
8423 	     conditional return too.  However, don't do this for
8424 	     unconditional branches, as these would be encountered when
8425 	     processing an 'else' part.  */
8426 	  prev = PREV_INSN (insn);
8427 	  pprev = PREV_INSN (prev);
8428 	  if (pprev && NEXT_INSN (NEXT_INSN (pprev)) == NEXT_INSN (insn)
8429 	      && JUMP_P (prev) && get_attr_cond (prev) == COND_USE)
8430 	    {
8431 	      if (!INSN_ANNULLED_BRANCH_P (prev))
8432 		break;
8433 	      annulled = true;
8434 	    }
8435 
8436 	  patp = &PATTERN (insn);
8437 	  pat = *patp;
8438 	  cond = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (insn));
8439 	  if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8440 	    {
8441 	      /* ??? don't conditionalize if all side effects are dead
8442 		 in the not-execute case.  */
8443 
8444 	      pat = conditionalize_nonjump (pat, cond, insn, annulled);
8445 	    }
8446 	  else if (simplejump_p (insn))
8447 	    {
8448 	      patp = &SET_SRC (pat);
8449 	      pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, *patp, pc_rtx);
8450 	    }
8451 	  else if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
8452 	    {
8453 	      pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, pat, pc_rtx);
8454 	      pat = gen_rtx_SET (pc_rtx, pat);
8455 	    }
8456 	  else
8457 	    gcc_unreachable ();
8458 	  validate_change (insn, patp, pat, 1);
8459 	  if (!apply_change_group ())
8460 	    gcc_unreachable ();
8461 	  if (JUMP_P (insn))
8462 	    {
8463 	      rtx_insn *next = next_nonnote_insn (insn);
8464 	      if (GET_CODE (next) == BARRIER)
8465 		delete_insn (next);
8466 	      if (statep->state == 3)
8467 		continue;
8468 	    }
8469 	  break;
8470 	default:
8471 	  gcc_unreachable ();
8472 	}
8473       arc_ccfsm_post_advance (insn, statep);
8474     }
8475   return 0;
8476 }
8477 
8478 /* Find annulled delay insns and convert them to use the appropriate predicate.
8479    This allows branch shortening to size up these insns properly.  */
8480 
8481 static unsigned
8482 arc_predicate_delay_insns (void)
8483 {
8484   for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
8485     {
8486       rtx pat, jump, dlay, src, cond, *patp;
8487       int reverse;
8488 
8489       if (!NONJUMP_INSN_P (insn)
8490 	  || GET_CODE (pat = PATTERN (insn)) != SEQUENCE)
8491 	continue;
8492       jump = XVECEXP (pat, 0, 0);
8493       dlay = XVECEXP (pat, 0, 1);
8494       if (!JUMP_P (jump) || !INSN_ANNULLED_BRANCH_P (jump))
8495 	continue;
8496       /* If the branch insn does the annulling, leave the delay insn alone.  */
8497       if (!TARGET_AT_DBR_CONDEXEC && !INSN_FROM_TARGET_P (dlay))
8498 	continue;
8499       /* ??? Could also leave DLAY un-conditionalized if its target is dead
8500 	 on the other path.  */
8501       gcc_assert (GET_CODE (PATTERN (jump)) == SET);
8502       gcc_assert (SET_DEST (PATTERN (jump)) == pc_rtx);
8503       src = SET_SRC (PATTERN (jump));
8504       gcc_assert (GET_CODE (src) == IF_THEN_ELSE);
8505       cond = XEXP (src, 0);
8506       if (XEXP (src, 2) == pc_rtx)
8507 	reverse = 0;
8508       else if (XEXP (src, 1) == pc_rtx)
8509 	reverse = 1;
8510       else
8511 	gcc_unreachable ();
8512       if (reverse != !INSN_FROM_TARGET_P (dlay))
8513 	{
8514 	  machine_mode ccm = GET_MODE (XEXP (cond, 0));
8515 	  enum rtx_code code = reverse_condition (GET_CODE (cond));
8516 	  if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode)
8517 	    code = reverse_condition_maybe_unordered (GET_CODE (cond));
8518 
8519 	  cond = gen_rtx_fmt_ee (code, GET_MODE (cond),
8520 				 copy_rtx (XEXP (cond, 0)),
8521 				 copy_rtx (XEXP (cond, 1)));
8522 	}
8523       else
8524 	cond = copy_rtx (cond);
8525       patp = &PATTERN (dlay);
8526       pat = *patp;
8527       pat = conditionalize_nonjump (pat, cond, dlay, true);
8528       validate_change (dlay, patp, pat, 1);
8529       if (!apply_change_group ())
8530 	gcc_unreachable ();
8531     }
8532   return 0;
8533 }
8534 
8535 /* For ARC600: If a write to a core reg >=32 appears in a delay slot
8536   (other than of a forward brcc), it creates a hazard when there is a read
8537   of the same register at the branch target.  We can't know what is at the
8538   branch target of calls, and for branches, we don't really know before the
8539   end of delay slot scheduling, either.  Not only can individual instruction
8540   be hoisted out into a delay slot, a basic block can also be emptied this
8541   way, and branch and/or fall through targets be redirected.  Hence we don't
8542   want such writes in a delay slot.  */
8543 
8544 /* Return nonzreo iff INSN writes to an extension core register.  */
8545 
8546 int
8547 arc_write_ext_corereg (rtx insn)
8548 {
8549   subrtx_iterator::array_type array;
8550   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
8551     {
8552       const_rtx x = *iter;
8553       switch (GET_CODE (x))
8554 	{
8555 	case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
8556 	  break;
8557 	default:
8558 	  /* This is also fine for PRE/POST_MODIFY, because they
8559 	     contain a SET.  */
8560 	  continue;
8561 	}
8562       const_rtx dest = XEXP (x, 0);
8563       if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61)
8564 	return 1;
8565     }
8566   return 0;
8567 }
8568 
8569 /* This is like the hook, but returns NULL when it can't / won't generate
8570    a legitimate address.  */
8571 
8572 static rtx
8573 arc_legitimize_address_0 (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8574 			  machine_mode mode)
8575 {
8576   rtx addr, inner;
8577 
8578   if (flag_pic && SYMBOLIC_CONST (x))
8579      (x) =  arc_legitimize_pic_address (x, 0);
8580   addr = x;
8581   if (GET_CODE (addr) == CONST)
8582     addr = XEXP (addr, 0);
8583   if (GET_CODE (addr) == PLUS
8584       && CONST_INT_P (XEXP (addr, 1))
8585       && ((GET_CODE (XEXP (addr, 0)) == SYMBOL_REF
8586 	   && !SYMBOL_REF_FUNCTION_P (XEXP (addr, 0)))
8587 	  || (REG_P (XEXP (addr, 0))
8588 	      && (INTVAL (XEXP (addr, 1)) & 252))))
8589     {
8590       HOST_WIDE_INT offs, upper;
8591       int size = GET_MODE_SIZE (mode);
8592 
8593       offs = INTVAL (XEXP (addr, 1));
8594       upper = (offs + 256 * size) & ~511 * size;
8595       inner = plus_constant (Pmode, XEXP (addr, 0), upper);
8596 #if 0 /* ??? this produces worse code for EEMBC idctrn01  */
8597       if (GET_CODE (x) == CONST)
8598 	inner = gen_rtx_CONST (Pmode, inner);
8599 #endif
8600       addr = plus_constant (Pmode, force_reg (Pmode, inner), offs - upper);
8601       x = addr;
8602     }
8603   else if (GET_CODE (addr) == SYMBOL_REF && !SYMBOL_REF_FUNCTION_P (addr))
8604     x = force_reg (Pmode, x);
8605   if (memory_address_p ((machine_mode) mode, x))
8606      return x;
8607   return NULL_RTX;
8608 }
8609 
8610 static rtx
8611 arc_legitimize_address (rtx orig_x, rtx oldx, machine_mode mode)
8612 {
8613   if (GET_CODE (orig_x) == SYMBOL_REF)
8614     {
8615       enum tls_model model = SYMBOL_REF_TLS_MODEL (orig_x);
8616       if (model != 0)
8617 	return arc_legitimize_tls_address (orig_x, model);
8618     }
8619 
8620   rtx new_x = arc_legitimize_address_0 (orig_x, oldx, mode);
8621 
8622   if (new_x)
8623     return new_x;
8624   return orig_x;
8625 }
8626 
8627 static rtx
8628 arc_delegitimize_address_0 (rtx x)
8629 {
8630   rtx u, gp, p;
8631 
8632   if (GET_CODE (x) == CONST && GET_CODE (u = XEXP (x, 0)) == UNSPEC)
8633     {
8634       if (XINT (u, 1) == ARC_UNSPEC_GOT
8635 	  || XINT (u, 1) == ARC_UNSPEC_GOTOFFPC)
8636 	return XVECEXP (u, 0, 0);
8637     }
8638   else if (GET_CODE (x) == CONST && GET_CODE (p = XEXP (x, 0)) == PLUS
8639 	   && GET_CODE (u = XEXP (p, 0)) == UNSPEC
8640 	   && (XINT (u, 1) == ARC_UNSPEC_GOT
8641 	       || XINT (u, 1) == ARC_UNSPEC_GOTOFFPC))
8642     return gen_rtx_CONST
8643 	    (GET_MODE (x),
8644 	     gen_rtx_PLUS (GET_MODE (p), XVECEXP (u, 0, 0), XEXP (p, 1)));
8645   else if (GET_CODE (x) == PLUS
8646 	   && ((REG_P (gp = XEXP (x, 0))
8647 		&& REGNO (gp) == PIC_OFFSET_TABLE_REGNUM)
8648 	       || (GET_CODE (gp) == CONST
8649 		   && GET_CODE (u = XEXP (gp, 0)) == UNSPEC
8650 		   && XINT (u, 1) == ARC_UNSPEC_GOT
8651 		   && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF
8652 		   && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC")))
8653 	   && GET_CODE (XEXP (x, 1)) == CONST
8654 	   && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC
8655 	   && XINT (u, 1) == ARC_UNSPEC_GOTOFF)
8656     return XVECEXP (u, 0, 0);
8657   else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8658 	   && ((REG_P (gp = XEXP (XEXP (x, 0), 1))
8659 		&& REGNO (gp) == PIC_OFFSET_TABLE_REGNUM)
8660 	       || (GET_CODE (gp) == CONST
8661 		   && GET_CODE (u = XEXP (gp, 0)) == UNSPEC
8662 		   && XINT (u, 1) == ARC_UNSPEC_GOT
8663 		   && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF
8664 		   && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC")))
8665 	   && GET_CODE (XEXP (x, 1)) == CONST
8666 	   && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC
8667 	   && XINT (u, 1) == ARC_UNSPEC_GOTOFF)
8668     return gen_rtx_PLUS (GET_MODE (x), XEXP (XEXP (x, 0), 0),
8669 			 XVECEXP (u, 0, 0));
8670   else if (GET_CODE (x) == PLUS
8671 	   && (u = arc_delegitimize_address_0 (XEXP (x, 1))))
8672     return gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0), u);
8673   return NULL_RTX;
8674 }
8675 
8676 static rtx
8677 arc_delegitimize_address (rtx x)
8678 {
8679   rtx orig_x = x = delegitimize_mem_from_attrs (x);
8680   if (GET_CODE (x) == MEM)
8681     x = XEXP (x, 0);
8682   x = arc_delegitimize_address_0 (x);
8683   if (x)
8684     {
8685       if (MEM_P (orig_x))
8686 	x = replace_equiv_address_nv (orig_x, x);
8687       return x;
8688     }
8689   return orig_x;
8690 }
8691 
8692 /* Return a REG rtx for acc1.  N.B. the gcc-internal representation may
8693    differ from the hardware register number in order to allow the generic
8694    code to correctly split the concatenation of acc1 and acc2.  */
8695 
8696 rtx
8697 gen_acc1 (void)
8698 {
8699   return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 56: 57);
8700 }
8701 
8702 /* Return a REG rtx for acc2.  N.B. the gcc-internal representation may
8703    differ from the hardware register number in order to allow the generic
8704    code to correctly split the concatenation of acc1 and acc2.  */
8705 
8706 rtx
8707 gen_acc2 (void)
8708 {
8709   return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 57: 56);
8710 }
8711 
8712 /* Return a REG rtx for mlo.  N.B. the gcc-internal representation may
8713    differ from the hardware register number in order to allow the generic
8714    code to correctly split the concatenation of mhi and mlo.  */
8715 
8716 rtx
8717 gen_mlo (void)
8718 {
8719   return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 59: 58);
8720 }
8721 
8722 /* Return a REG rtx for mhi.  N.B. the gcc-internal representation may
8723    differ from the hardware register number in order to allow the generic
8724    code to correctly split the concatenation of mhi and mlo.  */
8725 
8726 rtx
8727 gen_mhi (void)
8728 {
8729   return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 58: 59);
8730 }
8731 
8732 /* FIXME: a parameter should be added, and code added to final.c,
8733    to reproduce this functionality in shorten_branches.  */
8734 #if 0
8735 /* Return nonzero iff BRANCH should be unaligned if possible by upsizing
8736    a previous instruction.  */
8737 int
8738 arc_unalign_branch_p (rtx branch)
8739 {
8740   rtx note;
8741 
8742   if (!TARGET_UNALIGN_BRANCH)
8743     return 0;
8744   /* Do not do this if we have a filled delay slot.  */
8745   if (get_attr_delay_slot_filled (branch) == DELAY_SLOT_FILLED_YES
8746       && !NEXT_INSN (branch)->deleted ())
8747     return 0;
8748   note = find_reg_note (branch, REG_BR_PROB, 0);
8749   return (!note
8750 	  || (arc_unalign_prob_threshold && !br_prob_note_reliable_p (note))
8751 	  || INTVAL (XEXP (note, 0)) < arc_unalign_prob_threshold);
8752 }
8753 #endif
8754 
8755 /* When estimating sizes during arc_reorg, when optimizing for speed, there
8756    are three reasons why we need to consider branches to be length 6:
8757    - annull-false delay slot insns are implemented using conditional execution,
8758      thus preventing short insn formation where used.
8759    - for ARC600: annul-true delay slot insns are implemented where possible
8760      using conditional execution, preventing short insn formation where used.
8761    - for ARC700: likely or somewhat likely taken branches are made long and
8762      unaligned if possible to avoid branch penalty.  */
8763 
8764 bool
8765 arc_branch_size_unknown_p (void)
8766 {
8767   return !optimize_size && arc_reorg_in_progress;
8768 }
8769 
8770 /* We are about to output a return insn.  Add padding if necessary to avoid
8771    a mispredict.  A return could happen immediately after the function
8772    start, but after a call we know that there will be at least a blink
8773    restore.  */
8774 
8775 void
8776 arc_pad_return (void)
8777 {
8778   rtx_insn *insn = current_output_insn;
8779   rtx_insn *prev = prev_active_insn (insn);
8780   int want_long;
8781 
8782   if (!prev)
8783     {
8784       fputs ("\tnop_s\n", asm_out_file);
8785       cfun->machine->unalign ^= 2;
8786       want_long = 1;
8787     }
8788   /* If PREV is a sequence, we know it must be a branch / jump or a tailcall,
8789      because after a call, we'd have to restore blink first.  */
8790   else if (GET_CODE (PATTERN (prev)) == SEQUENCE)
8791     return;
8792   else
8793     {
8794       want_long = (get_attr_length (prev) == 2);
8795       prev = prev_active_insn (prev);
8796     }
8797   if (!prev
8798       || ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE)
8799 	  ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
8800 		       NON_SIBCALL)
8801 	  : CALL_ATTR (prev, NON_SIBCALL)))
8802     {
8803       if (want_long)
8804 	cfun->machine->size_reason
8805 	  = "call/return and return/return must be 6 bytes apart to avoid mispredict";
8806       else if (TARGET_UNALIGN_BRANCH && cfun->machine->unalign)
8807 	{
8808 	  cfun->machine->size_reason
8809 	    = "Long unaligned jump avoids non-delay slot penalty";
8810 	  want_long = 1;
8811 	}
8812       /* Disgorge delay insn, if there is any, and it may be moved.  */
8813       if (final_sequence
8814 	  /* ??? Annulled would be OK if we can and do conditionalize
8815 	     the delay slot insn accordingly.  */
8816 	  && !INSN_ANNULLED_BRANCH_P (insn)
8817 	  && (get_attr_cond (insn) != COND_USE
8818 	      || !reg_set_p (gen_rtx_REG (CCmode, CC_REG),
8819 			     XVECEXP (final_sequence, 0, 1))))
8820 	{
8821 	  prev = as_a <rtx_insn *> (XVECEXP (final_sequence, 0, 1));
8822 	  gcc_assert (!prev_real_insn (insn)
8823 		      || !arc_hazard (prev_real_insn (insn), prev));
8824 	  cfun->machine->force_short_suffix = !want_long;
8825 	  rtx save_pred = current_insn_predicate;
8826 	  final_scan_insn (prev, asm_out_file, optimize, 1, NULL);
8827 	  cfun->machine->force_short_suffix = -1;
8828 	  prev->set_deleted ();
8829 	  current_output_insn = insn;
8830 	  current_insn_predicate = save_pred;
8831 	}
8832       else if (want_long)
8833 	fputs ("\tnop\n", asm_out_file);
8834       else
8835 	{
8836 	  fputs ("\tnop_s\n", asm_out_file);
8837 	  cfun->machine->unalign ^= 2;
8838 	}
8839     }
8840   return;
8841 }
8842 
8843 /* The usual; we set up our machine_function data.  */
8844 
8845 static struct machine_function *
8846 arc_init_machine_status (void)
8847 {
8848   struct machine_function *machine;
8849   machine = ggc_cleared_alloc<machine_function> ();
8850   machine->fn_type = ARC_FUNCTION_UNKNOWN;
8851   machine->force_short_suffix = -1;
8852 
8853   return machine;
8854 }
8855 
8856 /* Implements INIT_EXPANDERS.  We just set up to call the above
8857    function.  */
8858 
8859 void
8860 arc_init_expanders (void)
8861 {
8862   init_machine_status = arc_init_machine_status;
8863 }
8864 
8865 /* Check if OP is a proper parallel of a millicode call pattern.  OFFSET
8866    indicates a number of elements to ignore - that allows to have a
8867    sibcall pattern that starts with (return).  LOAD_P is zero for store
8868    multiple (for prologues), and one for load multiples (for epilogues),
8869    and two for load multiples where no final clobber of blink is required.
8870    We also skip the first load / store element since this is supposed to
8871    be checked in the instruction pattern.  */
8872 
8873 int
8874 arc_check_millicode (rtx op, int offset, int load_p)
8875 {
8876   int len = XVECLEN (op, 0) - offset;
8877   int i;
8878 
8879   if (load_p == 2)
8880     {
8881       if (len < 2 || len > 13)
8882 	return 0;
8883       load_p = 1;
8884     }
8885   else
8886     {
8887       rtx elt = XVECEXP (op, 0, --len);
8888 
8889       if (GET_CODE (elt) != CLOBBER
8890 	  || !REG_P (XEXP (elt, 0))
8891 	  || REGNO (XEXP (elt, 0)) != RETURN_ADDR_REGNUM
8892 	  || len < 3 || len > 13)
8893 	return 0;
8894     }
8895   for (i = 1; i < len; i++)
8896     {
8897       rtx elt = XVECEXP (op, 0, i + offset);
8898       rtx reg, mem, addr;
8899 
8900       if (GET_CODE (elt) != SET)
8901 	return 0;
8902       mem = XEXP (elt, load_p);
8903       reg = XEXP (elt, 1-load_p);
8904       if (!REG_P (reg) || REGNO (reg) != 13U+i || !MEM_P (mem))
8905 	return 0;
8906       addr = XEXP (mem, 0);
8907       if (GET_CODE (addr) != PLUS
8908 	  || !rtx_equal_p (stack_pointer_rtx, XEXP (addr, 0))
8909 	  || !CONST_INT_P (XEXP (addr, 1)) || INTVAL (XEXP (addr, 1)) != i*4)
8910 	return 0;
8911     }
8912   return 1;
8913 }
8914 
8915 /* Accessor functions for cfun->machine->unalign.  */
8916 
8917 int
8918 arc_get_unalign (void)
8919 {
8920   return cfun->machine->unalign;
8921 }
8922 
8923 void
8924 arc_clear_unalign (void)
8925 {
8926   if (cfun)
8927     cfun->machine->unalign = 0;
8928 }
8929 
8930 void
8931 arc_toggle_unalign (void)
8932 {
8933   cfun->machine->unalign ^= 2;
8934 }
8935 
8936 /* Operands 0..2 are the operands of a addsi which uses a 12 bit
8937    constant in operand 2, but which would require a LIMM because of
8938    operand mismatch.
8939    operands 3 and 4 are new SET_SRCs for operands 0.  */
8940 
8941 void
8942 split_addsi (rtx *operands)
8943 {
8944   int val = INTVAL (operands[2]);
8945 
8946   /* Try for two short insns first.  Lengths being equal, we prefer
8947      expansions with shorter register lifetimes.  */
8948   if (val > 127 && val <= 255
8949       && satisfies_constraint_Rcq (operands[0]))
8950     {
8951       operands[3] = operands[2];
8952       operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]);
8953     }
8954   else
8955     {
8956       operands[3] = operands[1];
8957       operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[2]);
8958     }
8959 }
8960 
8961 /* Operands 0..2 are the operands of a subsi which uses a 12 bit
8962    constant in operand 1, but which would require a LIMM because of
8963    operand mismatch.
8964    operands 3 and 4 are new SET_SRCs for operands 0.  */
8965 
8966 void
8967 split_subsi (rtx *operands)
8968 {
8969   int val = INTVAL (operands[1]);
8970 
8971   /* Try for two short insns first.  Lengths being equal, we prefer
8972      expansions with shorter register lifetimes.  */
8973   if (satisfies_constraint_Rcq (operands[0])
8974       && satisfies_constraint_Rcq (operands[2]))
8975     {
8976       if (val >= -31 && val <= 127)
8977 	{
8978 	  operands[3] = gen_rtx_NEG (SImode, operands[2]);
8979 	  operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]);
8980 	  return;
8981 	}
8982       else if (val >= 0 && val < 255)
8983 	{
8984 	  operands[3] = operands[1];
8985 	  operands[4] = gen_rtx_MINUS (SImode, operands[0], operands[2]);
8986 	  return;
8987 	}
8988     }
8989   /* If the destination is not an ARCompact16 register, we might
8990      still have a chance to make a short insn if the source is;
8991       we need to start with a reg-reg move for this.  */
8992   operands[3] = operands[2];
8993   operands[4] = gen_rtx_MINUS (SImode, operands[1], operands[0]);
8994 }
8995 
8996 /* Handle DOUBLE_REGS uses.
8997    Operand 0: destination register
8998    Operand 1: source register  */
8999 
9000 static bool
9001 arc_process_double_reg_moves (rtx *operands)
9002 {
9003   rtx dest = operands[0];
9004   rtx src  = operands[1];
9005 
9006   enum usesDxState { none, srcDx, destDx, maxDx };
9007   enum usesDxState state = none;
9008 
9009   if (refers_to_regno_p (40, 44, src, 0))
9010     state = srcDx;
9011   if (refers_to_regno_p (40, 44, dest, 0))
9012     {
9013       /* Via arc_register_move_cost, we should never see D,D moves.  */
9014       gcc_assert (state == none);
9015       state = destDx;
9016     }
9017 
9018   if (state == none)
9019     return false;
9020 
9021   if (state == srcDx)
9022     {
9023       /* Without the LR insn, we need to split this into a
9024 	 sequence of insns which will use the DEXCLx and DADDHxy
9025 	 insns to be able to read the Dx register in question.  */
9026       if (TARGET_DPFP_DISABLE_LRSR)
9027 	{
9028 	  /* gen *movdf_insn_nolrsr */
9029 	  rtx set = gen_rtx_SET (dest, src);
9030 	  rtx use1 = gen_rtx_USE (VOIDmode, const1_rtx);
9031 	  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, use1)));
9032 	}
9033       else
9034 	{
9035 	  /* When we have 'mov D, r' or 'mov D, D' then get the target
9036 	     register pair for use with LR insn.  */
9037 	  rtx destHigh = simplify_gen_subreg (SImode, dest, DFmode,
9038 					     TARGET_BIG_ENDIAN ? 0 : 4);
9039 	  rtx destLow  = simplify_gen_subreg (SImode, dest, DFmode,
9040 					     TARGET_BIG_ENDIAN ? 4 : 0);
9041 
9042 	  /* Produce the two LR insns to get the high and low parts.  */
9043 	  emit_insn (gen_rtx_SET (destHigh,
9044 				  gen_rtx_UNSPEC_VOLATILE (Pmode,
9045 							   gen_rtvec (1, src),
9046 				  VUNSPEC_ARC_LR_HIGH)));
9047 	  emit_insn (gen_rtx_SET (destLow,
9048 				  gen_rtx_UNSPEC_VOLATILE (Pmode,
9049 							   gen_rtvec (1, src),
9050 				  VUNSPEC_ARC_LR)));
9051 	}
9052     }
9053   else if (state == destDx)
9054     {
9055       /* When we have 'mov r, D' or 'mov D, D' and we have access to the
9056 	 LR insn get the target register pair.  */
9057       rtx srcHigh = simplify_gen_subreg (SImode, src, DFmode,
9058 					TARGET_BIG_ENDIAN ? 0 : 4);
9059       rtx srcLow  = simplify_gen_subreg (SImode, src, DFmode,
9060 					TARGET_BIG_ENDIAN ? 4 : 0);
9061 
9062       emit_insn (gen_dexcl_2op (dest, srcHigh, srcLow));
9063     }
9064   else
9065     gcc_unreachable ();
9066 
9067   return true;
9068 }
9069 
9070 /* operands 0..1 are the operands of a 64 bit move instruction.
9071    split it into two moves with operands 2/3 and 4/5.  */
9072 
9073 void
9074 arc_split_move (rtx *operands)
9075 {
9076   machine_mode mode = GET_MODE (operands[0]);
9077   int i;
9078   int swap = 0;
9079   rtx xop[4];
9080 
9081   if (TARGET_DPFP)
9082   {
9083     if (arc_process_double_reg_moves (operands))
9084       return;
9085   }
9086 
9087   if (TARGET_LL64
9088       && ((memory_operand (operands[0], mode)
9089 	   && even_register_operand (operands[1], mode))
9090 	  || (memory_operand (operands[1], mode)
9091 	      && even_register_operand (operands[0], mode))))
9092     {
9093       emit_move_insn (operands[0], operands[1]);
9094       return;
9095     }
9096 
9097   if (TARGET_PLUS_QMACW
9098       && GET_CODE (operands[1]) == CONST_VECTOR)
9099     {
9100       HOST_WIDE_INT intval0, intval1;
9101       if (GET_MODE (operands[1]) == V2SImode)
9102 	{
9103 	  intval0 = INTVAL (XVECEXP (operands[1], 0, 0));
9104 	  intval1 = INTVAL (XVECEXP (operands[1], 0, 1));
9105 	}
9106       else
9107 	{
9108 	  intval1  = INTVAL (XVECEXP (operands[1], 0, 3)) << 16;
9109 	  intval1 |= INTVAL (XVECEXP (operands[1], 0, 2)) & 0xFFFF;
9110 	  intval0  = INTVAL (XVECEXP (operands[1], 0, 1)) << 16;
9111 	  intval0 |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF;
9112 	}
9113       xop[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
9114       xop[3] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
9115       xop[2] = GEN_INT (trunc_int_for_mode (intval0, SImode));
9116       xop[1] = GEN_INT (trunc_int_for_mode (intval1, SImode));
9117       emit_move_insn (xop[0], xop[2]);
9118       emit_move_insn (xop[3], xop[1]);
9119       return;
9120     }
9121 
9122   for (i = 0; i < 2; i++)
9123     {
9124       if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))
9125 	{
9126 	  rtx addr = XEXP (operands[i], 0);
9127 	  rtx r, o;
9128 	  enum rtx_code code;
9129 
9130 	  gcc_assert (!reg_overlap_mentioned_p (operands[0], addr));
9131 	  switch (GET_CODE (addr))
9132 	    {
9133 	    case PRE_DEC: o = GEN_INT (-8); goto pre_modify;
9134 	    case PRE_INC: o = GEN_INT (8); goto pre_modify;
9135 	    case PRE_MODIFY: o = XEXP (XEXP (addr, 1), 1);
9136 	    pre_modify:
9137 	      code = PRE_MODIFY;
9138 	      break;
9139 	    case POST_DEC: o = GEN_INT (-8); goto post_modify;
9140 	    case POST_INC: o = GEN_INT (8); goto post_modify;
9141 	    case POST_MODIFY: o = XEXP (XEXP (addr, 1), 1);
9142 	    post_modify:
9143 	      code = POST_MODIFY;
9144 	      swap = 2;
9145 	      break;
9146 	    default:
9147 	      gcc_unreachable ();
9148 	    }
9149 	  r = XEXP (addr, 0);
9150 	  xop[0+i] = adjust_automodify_address_nv
9151 		      (operands[i], SImode,
9152 		       gen_rtx_fmt_ee (code, Pmode, r,
9153 				       gen_rtx_PLUS (Pmode, r, o)),
9154 		       0);
9155 	  xop[2+i] = adjust_automodify_address_nv
9156 		      (operands[i], SImode, plus_constant (Pmode, r, 4), 4);
9157 	}
9158       else
9159 	{
9160 	  xop[0+i] = operand_subword (operands[i], 0, 0, mode);
9161 	  xop[2+i] = operand_subword (operands[i], 1, 0, mode);
9162 	}
9163     }
9164   if (reg_overlap_mentioned_p (xop[0], xop[3]))
9165     {
9166       swap = 2;
9167       gcc_assert (!reg_overlap_mentioned_p (xop[2], xop[1]));
9168     }
9169 
9170   emit_move_insn (xop[0 + swap], xop[1 + swap]);
9171   emit_move_insn (xop[2 - swap], xop[3 - swap]);
9172 
9173 }
9174 
9175 /* Select between the instruction output templates s_tmpl (for short INSNs)
9176    and l_tmpl (for long INSNs).  */
9177 
9178 const char *
9179 arc_short_long (rtx_insn *insn, const char *s_tmpl, const char *l_tmpl)
9180 {
9181   int is_short = arc_verify_short (insn, cfun->machine->unalign, -1);
9182 
9183   extract_constrain_insn_cached (insn);
9184   return is_short ? s_tmpl : l_tmpl;
9185 }
9186 
9187 /* Searches X for any reference to REGNO, returning the rtx of the
9188    reference found if any.  Otherwise, returns NULL_RTX.  */
9189 
9190 rtx
9191 arc_regno_use_in (unsigned int regno, rtx x)
9192 {
9193   const char *fmt;
9194   int i, j;
9195   rtx tem;
9196 
9197   if (REG_P (x) && refers_to_regno_p (regno, x))
9198     return x;
9199 
9200   fmt = GET_RTX_FORMAT (GET_CODE (x));
9201   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9202     {
9203       if (fmt[i] == 'e')
9204 	{
9205 	  if ((tem = regno_use_in (regno, XEXP (x, i))))
9206 	    return tem;
9207 	}
9208       else if (fmt[i] == 'E')
9209 	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9210 	  if ((tem = regno_use_in (regno , XVECEXP (x, i, j))))
9211 	    return tem;
9212     }
9213 
9214   return NULL_RTX;
9215 }
9216 
9217 /* Return the integer value of the "type" attribute for INSN, or -1 if
9218    INSN can't have attributes.  */
9219 
9220 int
9221 arc_attr_type (rtx_insn *insn)
9222 {
9223   if (NONJUMP_INSN_P (insn)
9224       ? (GET_CODE (PATTERN (insn)) == USE
9225 	 || GET_CODE (PATTERN (insn)) == CLOBBER)
9226       : JUMP_P (insn)
9227       ? (GET_CODE (PATTERN (insn)) == ADDR_VEC
9228 	 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
9229       : !CALL_P (insn))
9230     return -1;
9231   return get_attr_type (insn);
9232 }
9233 
9234 /* Return true if insn sets the condition codes.  */
9235 
9236 bool
9237 arc_sets_cc_p (rtx_insn *insn)
9238 {
9239   if (NONJUMP_INSN_P (insn))
9240     if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
9241       insn = seq->insn (seq->len () - 1);
9242   return arc_attr_type (insn) == TYPE_COMPARE;
9243 }
9244 
9245 /* Return true if INSN is an instruction with a delay slot we may want
9246    to fill.  */
9247 
9248 bool
9249 arc_need_delay (rtx_insn *insn)
9250 {
9251   rtx_insn *next;
9252 
9253   if (!flag_delayed_branch)
9254     return false;
9255   /* The return at the end of a function needs a delay slot.  */
9256   if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
9257       && (!(next = next_active_insn (insn))
9258 	  || ((!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) != SEQUENCE)
9259 	      && arc_attr_type (next) == TYPE_RETURN))
9260       && (!TARGET_PAD_RETURN
9261 	  || (prev_active_insn (insn)
9262 	      && prev_active_insn (prev_active_insn (insn))
9263 	      && prev_active_insn (prev_active_insn (prev_active_insn (insn))))))
9264     return true;
9265   if (NONJUMP_INSN_P (insn)
9266       ? (GET_CODE (PATTERN (insn)) == USE
9267 	 || GET_CODE (PATTERN (insn)) == CLOBBER
9268 	 || GET_CODE (PATTERN (insn)) == SEQUENCE)
9269       : JUMP_P (insn)
9270       ? (GET_CODE (PATTERN (insn)) == ADDR_VEC
9271 	 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
9272       : !CALL_P (insn))
9273     return false;
9274   return num_delay_slots (insn) != 0;
9275 }
9276 
9277 /* Return true if the scheduling pass(es) has/have already run,
9278    i.e. where possible, we should try to mitigate high latencies
9279    by different instruction selection.  */
9280 
9281 bool
9282 arc_scheduling_not_expected (void)
9283 {
9284   return cfun->machine->arc_reorg_started;
9285 }
9286 
9287 /* Oddly enough, sometimes we get a zero overhead loop that branch
9288    shortening doesn't think is a loop - observed with compile/pr24883.c
9289    -O3 -fomit-frame-pointer -funroll-loops.  Make sure to include the
9290    alignment visible for branch shortening  (we actually align the loop
9291    insn before it, but that is equivalent since the loop insn is 4 byte
9292    long.)  */
9293 
9294 int
9295 arc_label_align (rtx_insn *label)
9296 {
9297   int loop_align = LOOP_ALIGN (LABEL);
9298 
9299   if (loop_align > align_labels_log)
9300     {
9301       rtx_insn *prev = prev_nonnote_insn (label);
9302 
9303       if (prev && NONJUMP_INSN_P (prev)
9304 	  && GET_CODE (PATTERN (prev)) == PARALLEL
9305 	  && recog_memoized (prev) == CODE_FOR_doloop_begin_i)
9306 	return loop_align;
9307     }
9308   /* Code has a minimum p2 alignment of 1, which we must restore after an
9309      ADDR_DIFF_VEC.  */
9310   if (align_labels_log < 1)
9311     {
9312       rtx_insn *next = next_nonnote_nondebug_insn (label);
9313       if (INSN_P (next) && recog_memoized (next) >= 0)
9314 	return 1;
9315     }
9316   return align_labels_log;
9317 }
9318 
9319 /* Return true if LABEL is in executable code.  */
9320 
9321 bool
9322 arc_text_label (rtx_insn *label)
9323 {
9324   rtx_insn *next;
9325 
9326   /* ??? We use deleted labels like they were still there, see
9327      gcc.c-torture/compile/20000326-2.c .  */
9328   gcc_assert (GET_CODE (label) == CODE_LABEL
9329 	      || (GET_CODE (label) == NOTE
9330 		  && NOTE_KIND (label) == NOTE_INSN_DELETED_LABEL));
9331   next = next_nonnote_insn (label);
9332   if (next)
9333     return (!JUMP_TABLE_DATA_P (next)
9334 	    || GET_CODE (PATTERN (next)) != ADDR_VEC);
9335   else if (!PREV_INSN (label))
9336     /* ??? sometimes text labels get inserted very late, see
9337        gcc.dg/torture/stackalign/comp-goto-1.c */
9338     return true;
9339   return false;
9340 }
9341 
9342 /* Without this, gcc.dg/tree-prof/bb-reorg.c fails to assemble
9343   when compiling with -O2 -freorder-blocks-and-partition -fprofile-use
9344   -D_PROFILE_USE; delay branch scheduling then follows a crossing jump
9345   to redirect two breqs.  */
9346 
9347 static bool
9348 arc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
9349 {
9350   /* ??? get_attr_type is declared to take an rtx.  */
9351   union { const rtx_insn *c; rtx_insn *r; } u;
9352 
9353   u.c = follower;
9354   if (CROSSING_JUMP_P (followee))
9355     switch (get_attr_type (u.r))
9356       {
9357       case TYPE_BRCC:
9358       case TYPE_BRCC_NO_DELAY_SLOT:
9359 	return false;
9360       default:
9361 	return true;
9362       }
9363   return true;
9364 }
9365 
9366 /* Implement EPILOGUE__USES.
9367    Return true if REGNO should be added to the deemed uses of the epilogue.
9368 
9369    We use the return address
9370    arc_return_address_regs[arc_compute_function_type (cfun)].  But
9371    also, we have to make sure all the register restore instructions
9372    are known to be live in interrupt functions, plus the blink
9373    register if it is clobbered by the isr.  */
9374 
9375 bool
9376 arc_epilogue_uses (int regno)
9377 {
9378   if (regno == arc_tp_regno)
9379     return true;
9380   if (reload_completed)
9381     {
9382       if (ARC_INTERRUPT_P (cfun->machine->fn_type))
9383 	{
9384 	  if (!fixed_regs[regno])
9385 	    return true;
9386 	  return ((regno == arc_return_address_regs[cfun->machine->fn_type])
9387 		  || (regno == RETURN_ADDR_REGNUM));
9388 	}
9389       else
9390 	return regno == RETURN_ADDR_REGNUM;
9391     }
9392   else
9393     return regno == arc_return_address_regs[arc_compute_function_type (cfun)];
9394 }
9395 
9396 /* Helper for EH_USES macro.  */
9397 
9398 bool
9399 arc_eh_uses (int regno)
9400 {
9401   if (regno == arc_tp_regno)
9402     return true;
9403   return false;
9404 }
9405 
9406 #ifndef TARGET_NO_LRA
9407 #define TARGET_NO_LRA !TARGET_LRA
9408 #endif
9409 
9410 static bool
9411 arc_lra_p (void)
9412 {
9413   return !TARGET_NO_LRA;
9414 }
9415 
9416 /* ??? Should we define TARGET_REGISTER_PRIORITY?  We might perfer to use
9417    Rcq registers, because some insn are shorter with them.  OTOH we already
9418    have separate alternatives for this purpose, and other insns don't
9419    mind, so maybe we should rather prefer the other registers?
9420    We need more data, and we can only get that if we allow people to
9421    try all options.  */
9422 static int
9423 arc_register_priority (int r)
9424 {
9425   switch (arc_lra_priority_tag)
9426     {
9427     case ARC_LRA_PRIORITY_NONE:
9428       return 0;
9429     case ARC_LRA_PRIORITY_NONCOMPACT:
9430       return ((((r & 7) ^ 4) - 4) & 15) != r;
9431     case ARC_LRA_PRIORITY_COMPACT:
9432       return ((((r & 7) ^ 4) - 4) & 15) == r;
9433     default:
9434       gcc_unreachable ();
9435     }
9436 }
9437 
9438 static reg_class_t
9439 arc_spill_class (reg_class_t /* orig_class */, machine_mode)
9440 {
9441   return GENERAL_REGS;
9442 }
9443 
9444 bool
9445 arc_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
9446 			       int itype)
9447 {
9448   rtx x = *p;
9449   enum reload_type type = (enum reload_type) itype;
9450 
9451   if (GET_CODE (x) == PLUS
9452       && CONST_INT_P (XEXP (x, 1))
9453       && (RTX_OK_FOR_BASE_P (XEXP (x, 0), true)
9454 	  || (REG_P (XEXP (x, 0))
9455 	      && reg_equiv_constant (REGNO (XEXP (x, 0))))))
9456     {
9457       int scale = GET_MODE_SIZE (mode);
9458       int shift;
9459       rtx index_rtx = XEXP (x, 1);
9460       HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9461       rtx reg, sum, sum2;
9462 
9463       if (scale > 4)
9464 	scale = 4;
9465       if ((scale-1) & offset)
9466 	scale = 1;
9467       shift = scale >> 1;
9468       offset_base
9469 	= ((offset + (256 << shift))
9470 	   & ((HOST_WIDE_INT)((unsigned HOST_WIDE_INT) -512 << shift)));
9471       /* Sometimes the normal form does not suit DImode.  We
9472 	 could avoid that by using smaller ranges, but that
9473 	 would give less optimized code when SImode is
9474 	 prevalent.  */
9475       if (GET_MODE_SIZE (mode) + offset - offset_base <= (256 << shift))
9476 	{
9477 	  int regno;
9478 
9479 	  reg = XEXP (x, 0);
9480 	  regno = REGNO (reg);
9481 	  sum2 = sum = plus_constant (Pmode, reg, offset_base);
9482 
9483 	  if (reg_equiv_constant (regno))
9484 	    {
9485 	      sum2 = plus_constant (Pmode, reg_equiv_constant (regno),
9486 				    offset_base);
9487 	      if (GET_CODE (sum2) == PLUS)
9488 		sum2 = gen_rtx_CONST (Pmode, sum2);
9489 	    }
9490 	  *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9491 	  push_reload (sum2, NULL_RTX, &XEXP (*p, 0), NULL,
9492 		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum,
9493 		       type);
9494 	  return true;
9495 	}
9496     }
9497   /* We must re-recognize what we created before.  */
9498   else if (GET_CODE (x) == PLUS
9499 	   && GET_CODE (XEXP (x, 0)) == PLUS
9500 	   && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9501 	   && REG_P  (XEXP (XEXP (x, 0), 0))
9502 	   && CONST_INT_P (XEXP (x, 1)))
9503     {
9504       /* Because this address is so complex, we know it must have
9505 	 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9506 	 it is already unshared, and needs no further unsharing.  */
9507       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9508 		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9509       return true;
9510     }
9511   return false;
9512 }
9513 
9514 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
9515 
9516 static bool
9517 arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
9518 				    unsigned int align,
9519 				    enum by_pieces_operation op,
9520 				    bool speed_p)
9521 {
9522   /* Let the movmem expander handle small block moves.  */
9523   if (op == MOVE_BY_PIECES)
9524     return false;
9525 
9526   return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
9527 }
9528 
9529 /* Emit a (pre) memory barrier around an atomic sequence according to
9530    MODEL.  */
9531 
9532 static void
9533 arc_pre_atomic_barrier (enum memmodel model)
9534 {
9535   if (need_atomic_barrier_p (model, true))
9536     emit_insn (gen_memory_barrier ());
9537 }
9538 
9539 /* Emit a (post) memory barrier around an atomic sequence according to
9540    MODEL.  */
9541 
9542 static void
9543 arc_post_atomic_barrier (enum memmodel model)
9544 {
9545   if (need_atomic_barrier_p (model, false))
9546     emit_insn (gen_memory_barrier ());
9547 }
9548 
9549 /* Expand a compare and swap pattern.  */
9550 
9551 static void
9552 emit_unlikely_jump (rtx insn)
9553 {
9554   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
9555 
9556   rtx_insn *jump = emit_jump_insn (insn);
9557   add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
9558 }
9559 
9560 /* Expand code to perform a 8 or 16-bit compare and swap by doing
9561    32-bit compare and swap on the word containing the byte or
9562    half-word.  The difference between a weak and a strong CAS is that
9563    the weak version may simply fail.  The strong version relies on two
9564    loops, one checks if the SCOND op is succsfully or not, the other
9565    checks if the 32 bit accessed location which contains the 8 or 16
9566    bit datum is not changed by other thread.  The first loop is
9567    implemented by the atomic_compare_and_swapsi_1 pattern.  The second
9568    loops is implemented by this routine.  */
9569 
9570 static void
9571 arc_expand_compare_and_swap_qh (rtx bool_result, rtx result, rtx mem,
9572 				rtx oldval, rtx newval, rtx weak,
9573 				rtx mod_s, rtx mod_f)
9574 {
9575   rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
9576   rtx addr = gen_reg_rtx (Pmode);
9577   rtx off = gen_reg_rtx (SImode);
9578   rtx oldv = gen_reg_rtx (SImode);
9579   rtx newv = gen_reg_rtx (SImode);
9580   rtx oldvalue = gen_reg_rtx (SImode);
9581   rtx newvalue = gen_reg_rtx (SImode);
9582   rtx res = gen_reg_rtx (SImode);
9583   rtx resv = gen_reg_rtx (SImode);
9584   rtx memsi, val, mask, end_label, loop_label, cc, x;
9585   machine_mode mode;
9586   bool is_weak = (weak != const0_rtx);
9587 
9588   /* Truncate the address.  */
9589   emit_insn (gen_rtx_SET (addr,
9590 			  gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
9591 
9592   /* Compute the datum offset.  */
9593   emit_insn (gen_rtx_SET (off,
9594 			  gen_rtx_AND (SImode, addr1, GEN_INT (3))));
9595   if (TARGET_BIG_ENDIAN)
9596     emit_insn (gen_rtx_SET (off,
9597 			    gen_rtx_MINUS (SImode,
9598 					   (GET_MODE (mem) == QImode) ?
9599 					   GEN_INT (3) : GEN_INT (2), off)));
9600 
9601   /* Normal read from truncated address.  */
9602   memsi = gen_rtx_MEM (SImode, addr);
9603   set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
9604   MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
9605 
9606   val = copy_to_reg (memsi);
9607 
9608   /* Convert the offset in bits.  */
9609   emit_insn (gen_rtx_SET (off,
9610 			  gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
9611 
9612   /* Get the proper mask.  */
9613   if (GET_MODE (mem) == QImode)
9614     mask = force_reg (SImode, GEN_INT (0xff));
9615   else
9616     mask = force_reg (SImode, GEN_INT (0xffff));
9617 
9618   emit_insn (gen_rtx_SET (mask,
9619 			  gen_rtx_ASHIFT (SImode, mask, off)));
9620 
9621   /* Prepare the old and new values.  */
9622   emit_insn (gen_rtx_SET (val,
9623 			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
9624 				       val)));
9625 
9626   oldval = gen_lowpart (SImode, oldval);
9627   emit_insn (gen_rtx_SET (oldv,
9628 			  gen_rtx_ASHIFT (SImode, oldval, off)));
9629 
9630   newval = gen_lowpart_common (SImode, newval);
9631   emit_insn (gen_rtx_SET (newv,
9632 			  gen_rtx_ASHIFT (SImode, newval, off)));
9633 
9634   emit_insn (gen_rtx_SET (oldv,
9635 			  gen_rtx_AND (SImode, oldv, mask)));
9636 
9637   emit_insn (gen_rtx_SET (newv,
9638 			  gen_rtx_AND (SImode, newv, mask)));
9639 
9640   if (!is_weak)
9641     {
9642       end_label = gen_label_rtx ();
9643       loop_label = gen_label_rtx ();
9644       emit_label (loop_label);
9645     }
9646 
9647   /* Make the old and new values.  */
9648   emit_insn (gen_rtx_SET (oldvalue,
9649 			  gen_rtx_IOR (SImode, oldv, val)));
9650 
9651   emit_insn (gen_rtx_SET (newvalue,
9652 			  gen_rtx_IOR (SImode, newv, val)));
9653 
9654   /* Try an 32bit atomic compare and swap.  It clobbers the CC
9655      register.  */
9656   emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue,
9657 					      weak, mod_s, mod_f));
9658 
9659   /* Regardless of the weakness of the operation, a proper boolean
9660      result needs to be provided.  */
9661   x = gen_rtx_REG (CC_Zmode, CC_REG);
9662   x = gen_rtx_EQ (SImode, x, const0_rtx);
9663   emit_insn (gen_rtx_SET (bool_result, x));
9664 
9665   if (!is_weak)
9666     {
9667       /* Check the results: if the atomic op is successfully the goto
9668 	 to end label.  */
9669       x = gen_rtx_REG (CC_Zmode, CC_REG);
9670       x = gen_rtx_EQ (VOIDmode, x, const0_rtx);
9671       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9672 				gen_rtx_LABEL_REF (Pmode, end_label), pc_rtx);
9673       emit_jump_insn (gen_rtx_SET (pc_rtx, x));
9674 
9675       /* Wait for the right moment when the accessed 32-bit location
9676 	 is stable.  */
9677       emit_insn (gen_rtx_SET (resv,
9678 			      gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
9679 					   res)));
9680       mode = SELECT_CC_MODE (NE, resv, val);
9681       cc = gen_rtx_REG (mode, CC_REG);
9682       emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, resv, val)));
9683 
9684       /* Set the new value of the 32 bit location, proper masked.  */
9685       emit_insn (gen_rtx_SET (val, resv));
9686 
9687       /* Try again if location is unstable.  Fall through if only
9688 	 scond op failed.  */
9689       x = gen_rtx_NE (VOIDmode, cc, const0_rtx);
9690       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9691 				gen_rtx_LABEL_REF (Pmode, loop_label), pc_rtx);
9692       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
9693 
9694       emit_label (end_label);
9695     }
9696 
9697   /* End: proper return the result for the given mode.  */
9698   emit_insn (gen_rtx_SET (res,
9699 			  gen_rtx_AND (SImode, res, mask)));
9700 
9701   emit_insn (gen_rtx_SET (res,
9702 			  gen_rtx_LSHIFTRT (SImode, res, off)));
9703 
9704   emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
9705 }
9706 
9707 /* Helper function used by "atomic_compare_and_swap" expand
9708    pattern.  */
9709 
9710 void
9711 arc_expand_compare_and_swap (rtx operands[])
9712 {
9713   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
9714   machine_mode mode;
9715 
9716   bval = operands[0];
9717   rval = operands[1];
9718   mem = operands[2];
9719   oldval = operands[3];
9720   newval = operands[4];
9721   is_weak = operands[5];
9722   mod_s = operands[6];
9723   mod_f = operands[7];
9724   mode = GET_MODE (mem);
9725 
9726   if (reg_overlap_mentioned_p (rval, oldval))
9727     oldval = copy_to_reg (oldval);
9728 
9729   if (mode == SImode)
9730     {
9731       emit_insn (gen_atomic_compare_and_swapsi_1 (rval, mem, oldval, newval,
9732 						  is_weak, mod_s, mod_f));
9733       x = gen_rtx_REG (CC_Zmode, CC_REG);
9734       x = gen_rtx_EQ (SImode, x, const0_rtx);
9735       emit_insn (gen_rtx_SET (bval, x));
9736     }
9737   else
9738     {
9739       arc_expand_compare_and_swap_qh (bval, rval, mem, oldval, newval,
9740 				      is_weak, mod_s, mod_f);
9741     }
9742 }
9743 
9744 /* Helper function used by the "atomic_compare_and_swapsi_1"
9745    pattern.  */
9746 
9747 void
9748 arc_split_compare_and_swap (rtx operands[])
9749 {
9750   rtx rval, mem, oldval, newval;
9751   machine_mode mode;
9752   enum memmodel mod_s, mod_f;
9753   bool is_weak;
9754   rtx label1, label2, x, cond;
9755 
9756   rval = operands[0];
9757   mem = operands[1];
9758   oldval = operands[2];
9759   newval = operands[3];
9760   is_weak = (operands[4] != const0_rtx);
9761   mod_s = (enum memmodel) INTVAL (operands[5]);
9762   mod_f = (enum memmodel) INTVAL (operands[6]);
9763   mode = GET_MODE (mem);
9764 
9765   /* ARC atomic ops work only with 32-bit aligned memories.  */
9766   gcc_assert (mode == SImode);
9767 
9768   arc_pre_atomic_barrier (mod_s);
9769 
9770   label1 = NULL_RTX;
9771   if (!is_weak)
9772     {
9773       label1 = gen_label_rtx ();
9774       emit_label (label1);
9775     }
9776   label2 = gen_label_rtx ();
9777 
9778   /* Load exclusive.  */
9779   emit_insn (gen_arc_load_exclusivesi (rval, mem));
9780 
9781   /* Check if it is oldval.  */
9782   mode = SELECT_CC_MODE (NE, rval, oldval);
9783   cond = gen_rtx_REG (mode, CC_REG);
9784   emit_insn (gen_rtx_SET (cond, gen_rtx_COMPARE (mode, rval, oldval)));
9785 
9786   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9787   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9788 			    gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
9789   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
9790 
9791   /* Exclusively store new item.  Store clobbers CC reg.  */
9792   emit_insn (gen_arc_store_exclusivesi (mem, newval));
9793 
9794   if (!is_weak)
9795     {
9796       /* Check the result of the store.  */
9797       cond = gen_rtx_REG (CC_Zmode, CC_REG);
9798       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9799       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9800 				gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
9801       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
9802     }
9803 
9804   if (mod_f != MEMMODEL_RELAXED)
9805     emit_label (label2);
9806 
9807   arc_post_atomic_barrier (mod_s);
9808 
9809   if (mod_f == MEMMODEL_RELAXED)
9810     emit_label (label2);
9811 }
9812 
9813 /* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
9814    to perform.  MEM is the memory on which to operate.  VAL is the second
9815    operand of the binary operator.  BEFORE and AFTER are optional locations to
9816    return the value of MEM either before of after the operation.  MODEL_RTX
9817    is a CONST_INT containing the memory model to use.  */
9818 
9819 void
9820 arc_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
9821 			 rtx orig_before, rtx orig_after, rtx model_rtx)
9822 {
9823   enum memmodel model = (enum memmodel) INTVAL (model_rtx);
9824   machine_mode mode = GET_MODE (mem);
9825   rtx label, x, cond;
9826   rtx before = orig_before, after = orig_after;
9827 
9828   /* ARC atomic ops work only with 32-bit aligned memories.  */
9829   gcc_assert (mode == SImode);
9830 
9831   arc_pre_atomic_barrier (model);
9832 
9833   label = gen_label_rtx ();
9834   emit_label (label);
9835   label = gen_rtx_LABEL_REF (VOIDmode, label);
9836 
9837   if (before == NULL_RTX)
9838     before = gen_reg_rtx (mode);
9839 
9840   if (after == NULL_RTX)
9841     after = gen_reg_rtx (mode);
9842 
9843   /* Load exclusive.  */
9844   emit_insn (gen_arc_load_exclusivesi (before, mem));
9845 
9846   switch (code)
9847     {
9848     case NOT:
9849       x = gen_rtx_AND (mode, before, val);
9850       emit_insn (gen_rtx_SET (after, x));
9851       x = gen_rtx_NOT (mode, after);
9852       emit_insn (gen_rtx_SET (after, x));
9853       break;
9854 
9855     case MINUS:
9856       if (CONST_INT_P (val))
9857 	{
9858 	  val = GEN_INT (-INTVAL (val));
9859 	  code = PLUS;
9860 	}
9861 
9862       /* FALLTHRU.  */
9863     default:
9864       x = gen_rtx_fmt_ee (code, mode, before, val);
9865       emit_insn (gen_rtx_SET (after, x));
9866       break;
9867    }
9868 
9869   /* Exclusively store new item.  Store clobbers CC reg.  */
9870   emit_insn (gen_arc_store_exclusivesi (mem, after));
9871 
9872   /* Check the result of the store.  */
9873   cond = gen_rtx_REG (CC_Zmode, CC_REG);
9874   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9875   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9876 			    label, pc_rtx);
9877   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
9878 
9879   arc_post_atomic_barrier (model);
9880 }
9881 
9882 /* Implement TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P.  */
9883 
9884 static bool
9885 arc_no_speculation_in_delay_slots_p ()
9886 {
9887   return true;
9888 }
9889 
9890 /* Return a parallel of registers to represent where to find the
9891    register pieces if required, otherwise NULL_RTX.  */
9892 
9893 static rtx
9894 arc_dwarf_register_span (rtx rtl)
9895 {
9896    machine_mode mode = GET_MODE (rtl);
9897    unsigned regno;
9898    rtx p;
9899 
9900    if (GET_MODE_SIZE (mode) != 8)
9901      return NULL_RTX;
9902 
9903    p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
9904    regno = REGNO (rtl);
9905    XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno);
9906    XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1);
9907 
9908    return p;
9909 }
9910 
9911 /* Return true if OP is an acceptable memory operand for ARCompact
9912    16-bit load instructions of MODE.
9913 
9914    AV2SHORT: TRUE if address needs to fit into the new ARCv2 short
9915    non scaled instructions.
9916 
9917    SCALED: TRUE if address can be scaled.  */
9918 
9919 bool
9920 compact_memory_operand_p (rtx op, machine_mode mode,
9921 			  bool av2short, bool scaled)
9922 {
9923   rtx addr, plus0, plus1;
9924   int size, off;
9925 
9926   /* Eliminate non-memory operations.  */
9927   if (GET_CODE (op) != MEM)
9928     return 0;
9929 
9930   /* .di instructions have no 16-bit form.  */
9931   if (MEM_VOLATILE_P (op) && !TARGET_VOLATILE_CACHE_SET)
9932     return false;
9933 
9934   if (mode == VOIDmode)
9935     mode = GET_MODE (op);
9936 
9937   size = GET_MODE_SIZE (mode);
9938 
9939   /* dword operations really put out 2 instructions, so eliminate
9940      them.  */
9941   if (size > UNITS_PER_WORD)
9942     return false;
9943 
9944   /* Decode the address now.  */
9945   addr = XEXP (op, 0);
9946   switch (GET_CODE (addr))
9947     {
9948     case REG:
9949       return (REGNO (addr) >= FIRST_PSEUDO_REGISTER
9950 	      || COMPACT_GP_REG_P (REGNO (addr))
9951 	      || (SP_REG_P (REGNO (addr)) && (size != 2)));
9952     case PLUS:
9953       plus0 = XEXP (addr, 0);
9954       plus1 = XEXP (addr, 1);
9955 
9956       if ((GET_CODE (plus0) == REG)
9957 	  && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER)
9958 	      || COMPACT_GP_REG_P (REGNO (plus0)))
9959 	  && ((GET_CODE (plus1) == REG)
9960 	      && ((REGNO (plus1) >= FIRST_PSEUDO_REGISTER)
9961 		  || COMPACT_GP_REG_P (REGNO (plus1)))))
9962 	{
9963 	  return !av2short;
9964 	}
9965 
9966       if ((GET_CODE (plus0) == REG)
9967 	  && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER)
9968 	      || (COMPACT_GP_REG_P (REGNO (plus0)) && !av2short)
9969 	      || (IN_RANGE (REGNO (plus0), 0, 31) && av2short))
9970 	  && (GET_CODE (plus1) == CONST_INT))
9971 	{
9972 	  bool valid = false;
9973 
9974 	  off = INTVAL (plus1);
9975 
9976 	  /* Negative offset is not supported in 16-bit load/store insns.  */
9977 	  if (off < 0)
9978 	    return 0;
9979 
9980 	  /* Only u5 immediates allowed in code density instructions.  */
9981 	  if (av2short)
9982 	    {
9983 	      switch (size)
9984 		{
9985 		case 1:
9986 		  return false;
9987 		case 2:
9988 		  /* This is an ldh_s.x instruction, check the u6
9989 		     immediate.  */
9990 		  if (COMPACT_GP_REG_P (REGNO (plus0)))
9991 		    valid = true;
9992 		  break;
9993 		case 4:
9994 		  /* Only u5 immediates allowed in 32bit access code
9995 		     density instructions.  */
9996 		  if (REGNO (plus0) <= 31)
9997 		    return ((off < 32) && (off % 4 == 0));
9998 		  break;
9999 		default:
10000 		  return false;
10001 		}
10002 	    }
10003 	  else
10004 	    if (COMPACT_GP_REG_P (REGNO (plus0)))
10005 	      valid = true;
10006 
10007 	  if (valid)
10008 	    {
10009 
10010 	      switch (size)
10011 		{
10012 		case 1:
10013 		  return (off < 32);
10014 		case 2:
10015 		  /* The 6-bit constant get shifted to fit the real
10016 		     5-bits field.  Check also for the alignment.  */
10017 		  return ((off < 64) && (off % 2 == 0));
10018 		case 4:
10019 		  return ((off < 128) && (off % 4 == 0));
10020 		default:
10021 		  return false;
10022 		}
10023 	    }
10024 	}
10025 
10026       if (REG_P (plus0) && CONST_INT_P (plus1)
10027 	  && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER)
10028 	      || SP_REG_P (REGNO (plus0)))
10029 	  && !av2short)
10030 	{
10031 	  off = INTVAL (plus1);
10032 	  return ((size != 2) && (off >= 0 && off < 128) && (off % 4 == 0));
10033 	}
10034 
10035       if ((GET_CODE (plus0) == MULT)
10036 	  && (GET_CODE (XEXP (plus0, 0)) == REG)
10037 	  && ((REGNO (XEXP (plus0, 0)) >= FIRST_PSEUDO_REGISTER)
10038 	      || COMPACT_GP_REG_P (REGNO (XEXP (plus0, 0))))
10039 	  && (GET_CODE (plus1) == REG)
10040 	  && ((REGNO (plus1) >= FIRST_PSEUDO_REGISTER)
10041 	      || COMPACT_GP_REG_P (REGNO (plus1))))
10042 	return scaled;
10043     default:
10044       break ;
10045       /* TODO: 'gp' and 'pcl' are to supported as base address operand
10046 	 for 16-bit load instructions.  */
10047     }
10048   return false;
10049 }
10050 
10051 struct gcc_target targetm = TARGET_INITIALIZER;
10052 
10053 #include "gt-arc.h"
10054