1 /* Subroutines used for code generation on the Synopsys DesignWare ARC cpu. 2 Copyright (C) 1994-2017 Free Software Foundation, Inc. 3 4 Sources derived from work done by Sankhya Technologies (www.sankhya.com) on 5 behalf of Synopsys Inc. 6 7 Position Independent Code support added,Code cleaned up, 8 Comments and Support For ARC700 instructions added by 9 Saurabh Verma (saurabh.verma@codito.com) 10 Ramana Radhakrishnan(ramana.radhakrishnan@codito.com) 11 12 Fixing ABI inconsistencies, optimizations for ARC600 / ARC700 pipelines, 13 profiling support added by Joern Rennecke <joern.rennecke@embecosm.com> 14 15 This file is part of GCC. 16 17 GCC is free software; you can redistribute it and/or modify 18 it under the terms of the GNU General Public License as published by 19 the Free Software Foundation; either version 3, or (at your option) 20 any later version. 21 22 GCC is distributed in the hope that it will be useful, 23 but WITHOUT ANY WARRANTY; without even the implied warranty of 24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 25 GNU General Public License for more details. 26 27 You should have received a copy of the GNU General Public License 28 along with GCC; see the file COPYING3. If not see 29 <http://www.gnu.org/licenses/>. */ 30 31 #include "config.h" 32 #include "system.h" 33 #include "coretypes.h" 34 #include "memmodel.h" 35 #include "backend.h" 36 #include "target.h" 37 #include "rtl.h" 38 #include "tree.h" 39 #include "cfghooks.h" 40 #include "df.h" 41 #include "tm_p.h" 42 #include "stringpool.h" 43 #include "optabs.h" 44 #include "regs.h" 45 #include "emit-rtl.h" 46 #include "recog.h" 47 #include "diagnostic.h" 48 #include "fold-const.h" 49 #include "varasm.h" 50 #include "stor-layout.h" 51 #include "calls.h" 52 #include "output.h" 53 #include "insn-attr.h" 54 #include "flags.h" 55 #include "explow.h" 56 #include "expr.h" 57 #include "langhooks.h" 58 #include "tm-constrs.h" 59 #include "reload.h" /* For operands_match_p */ 60 #include "cfgrtl.h" 61 #include "tree-pass.h" 62 #include "context.h" 63 #include "builtins.h" 64 #include "rtl-iter.h" 65 #include "alias.h" 66 67 /* Which cpu we're compiling for (ARC600, ARC601, ARC700). */ 68 static char arc_cpu_name[10] = ""; 69 static const char *arc_cpu_string = arc_cpu_name; 70 71 /* ??? Loads can handle any constant, stores can only handle small ones. */ 72 /* OTOH, LIMMs cost extra, so their usefulness is limited. */ 73 #define RTX_OK_FOR_OFFSET_P(MODE, X) \ 74 (GET_CODE (X) == CONST_INT \ 75 && SMALL_INT_RANGE (INTVAL (X), (GET_MODE_SIZE (MODE) - 1) & -4, \ 76 (INTVAL (X) & (GET_MODE_SIZE (MODE) - 1) & 3 \ 77 ? 0 \ 78 : -(-GET_MODE_SIZE (MODE) | -4) >> 1))) 79 80 #define LEGITIMATE_OFFSET_ADDRESS_P(MODE, X, INDEX, STRICT) \ 81 (GET_CODE (X) == PLUS \ 82 && RTX_OK_FOR_BASE_P (XEXP (X, 0), (STRICT)) \ 83 && ((INDEX && RTX_OK_FOR_INDEX_P (XEXP (X, 1), (STRICT)) \ 84 && GET_MODE_SIZE ((MODE)) <= 4) \ 85 || RTX_OK_FOR_OFFSET_P (MODE, XEXP (X, 1)))) 86 87 #define LEGITIMATE_SCALED_ADDRESS_P(MODE, X, STRICT) \ 88 (GET_CODE (X) == PLUS \ 89 && GET_CODE (XEXP (X, 0)) == MULT \ 90 && RTX_OK_FOR_INDEX_P (XEXP (XEXP (X, 0), 0), (STRICT)) \ 91 && GET_CODE (XEXP (XEXP (X, 0), 1)) == CONST_INT \ 92 && ((GET_MODE_SIZE (MODE) == 2 && INTVAL (XEXP (XEXP (X, 0), 1)) == 2) \ 93 || (GET_MODE_SIZE (MODE) == 4 && INTVAL (XEXP (XEXP (X, 0), 1)) == 4)) \ 94 && (RTX_OK_FOR_BASE_P (XEXP (X, 1), (STRICT)) \ 95 || (flag_pic ? CONST_INT_P (XEXP (X, 1)) : CONSTANT_P (XEXP (X, 1))))) 96 97 #define LEGITIMATE_SMALL_DATA_ADDRESS_P(X) \ 98 (GET_CODE (X) == PLUS \ 99 && (REG_P (XEXP ((X), 0)) && REGNO (XEXP ((X), 0)) == SDATA_BASE_REGNUM) \ 100 && ((GET_CODE (XEXP((X),1)) == SYMBOL_REF \ 101 && SYMBOL_REF_SMALL_P (XEXP ((X), 1))) \ 102 || (GET_CODE (XEXP ((X), 1)) == CONST \ 103 && GET_CODE (XEXP (XEXP ((X), 1), 0)) == PLUS \ 104 && GET_CODE (XEXP (XEXP (XEXP ((X), 1), 0), 0)) == SYMBOL_REF \ 105 && SYMBOL_REF_SMALL_P (XEXP (XEXP (XEXP ((X), 1), 0), 0)) \ 106 && GET_CODE (XEXP(XEXP (XEXP ((X), 1), 0), 1)) == CONST_INT))) 107 108 /* Array of valid operand punctuation characters. */ 109 char arc_punct_chars[256]; 110 111 /* State used by arc_ccfsm_advance to implement conditional execution. */ 112 struct GTY (()) arc_ccfsm 113 { 114 int state; 115 int cc; 116 rtx cond; 117 rtx_insn *target_insn; 118 int target_label; 119 }; 120 121 #define arc_ccfsm_current cfun->machine->ccfsm_current 122 123 #define ARC_CCFSM_BRANCH_DELETED_P(STATE) \ 124 ((STATE)->state == 1 || (STATE)->state == 2) 125 126 /* Indicate we're conditionalizing insns now. */ 127 #define ARC_CCFSM_RECORD_BRANCH_DELETED(STATE) \ 128 ((STATE)->state += 2) 129 130 #define ARC_CCFSM_COND_EXEC_P(STATE) \ 131 ((STATE)->state == 3 || (STATE)->state == 4 || (STATE)->state == 5 \ 132 || current_insn_predicate) 133 134 /* Check if INSN has a 16 bit opcode considering struct arc_ccfsm *STATE. */ 135 #define CCFSM_ISCOMPACT(INSN,STATE) \ 136 (ARC_CCFSM_COND_EXEC_P (STATE) \ 137 ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \ 138 || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \ 139 : get_attr_iscompact (INSN) != ISCOMPACT_FALSE) 140 141 /* Likewise, but also consider that INSN might be in a delay slot of JUMP. */ 142 #define CCFSM_DBR_ISCOMPACT(INSN,JUMP,STATE) \ 143 ((ARC_CCFSM_COND_EXEC_P (STATE) \ 144 || (JUMP_P (JUMP) \ 145 && INSN_ANNULLED_BRANCH_P (JUMP) \ 146 && (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (INSN)))) \ 147 ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \ 148 || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \ 149 : get_attr_iscompact (INSN) != ISCOMPACT_FALSE) 150 151 /* The maximum number of insns skipped which will be conditionalised if 152 possible. */ 153 /* When optimizing for speed: 154 Let p be the probability that the potentially skipped insns need to 155 be executed, pn the cost of a correctly predicted non-taken branch, 156 mt the cost of a mis/non-predicted taken branch, 157 mn mispredicted non-taken, pt correctly predicted taken ; 158 costs expressed in numbers of instructions like the ones considered 159 skipping. 160 Unfortunately we don't have a measure of predictability - this 161 is linked to probability only in that in the no-eviction-scenario 162 there is a lower bound 1 - 2 * min (p, 1-p), and a somewhat larger 163 value that can be assumed *if* the distribution is perfectly random. 164 A predictability of 1 is perfectly plausible not matter what p is, 165 because the decision could be dependent on an invocation parameter 166 of the program. 167 For large p, we want MAX_INSNS_SKIPPED == pn/(1-p) + mt - pn 168 For small p, we want MAX_INSNS_SKIPPED == pt 169 170 When optimizing for size: 171 We want to skip insn unless we could use 16 opcodes for the 172 non-conditionalized insn to balance the branch length or more. 173 Performance can be tie-breaker. */ 174 /* If the potentially-skipped insns are likely to be executed, we'll 175 generally save one non-taken branch 176 o 177 this to be no less than the 1/p */ 178 #define MAX_INSNS_SKIPPED 3 179 180 /* A nop is needed between a 4 byte insn that sets the condition codes and 181 a branch that uses them (the same isn't true for an 8 byte insn that sets 182 the condition codes). Set by arc_ccfsm_advance. Used by 183 arc_print_operand. */ 184 185 static int get_arc_condition_code (rtx); 186 187 static tree arc_handle_interrupt_attribute (tree *, tree, tree, int, bool *); 188 189 /* Initialized arc_attribute_table to NULL since arc doesnot have any 190 machine specific supported attributes. */ 191 const struct attribute_spec arc_attribute_table[] = 192 { 193 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, 194 affects_type_identity } */ 195 { "interrupt", 1, 1, true, false, false, arc_handle_interrupt_attribute, true }, 196 /* Function calls made to this symbol must be done indirectly, because 197 it may lie outside of the 21/25 bit addressing range of a normal function 198 call. */ 199 { "long_call", 0, 0, false, true, true, NULL, false }, 200 /* Whereas these functions are always known to reside within the 25 bit 201 addressing range of unconditionalized bl. */ 202 { "medium_call", 0, 0, false, true, true, NULL, false }, 203 /* And these functions are always known to reside within the 21 bit 204 addressing range of blcc. */ 205 { "short_call", 0, 0, false, true, true, NULL, false }, 206 { NULL, 0, 0, false, false, false, NULL, false } 207 }; 208 static int arc_comp_type_attributes (const_tree, const_tree); 209 static void arc_file_start (void); 210 static void arc_internal_label (FILE *, const char *, unsigned long); 211 static void arc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, 212 tree); 213 static int arc_address_cost (rtx, machine_mode, addr_space_t, bool); 214 static void arc_encode_section_info (tree decl, rtx rtl, int first); 215 216 static void arc_init_builtins (void); 217 static rtx arc_expand_builtin (tree, rtx, rtx, machine_mode, int); 218 219 static int branch_dest (rtx); 220 221 static void arc_output_pic_addr_const (FILE *, rtx, int); 222 bool arc_legitimate_pic_operand_p (rtx); 223 static bool arc_function_ok_for_sibcall (tree, tree); 224 static rtx arc_function_value (const_tree, const_tree, bool); 225 const char * output_shift (rtx *); 226 static void arc_reorg (void); 227 static bool arc_in_small_data_p (const_tree); 228 229 static void arc_init_reg_tables (void); 230 static bool arc_return_in_memory (const_tree, const_tree); 231 static bool arc_vector_mode_supported_p (machine_mode); 232 233 static bool arc_can_use_doloop_p (const widest_int &, const widest_int &, 234 unsigned int, bool); 235 static const char *arc_invalid_within_doloop (const rtx_insn *); 236 237 static void output_short_suffix (FILE *file); 238 239 static bool arc_frame_pointer_required (void); 240 241 static bool arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT, 242 unsigned int, 243 enum by_pieces_operation op, 244 bool); 245 246 /* Globally visible information about currently selected cpu. */ 247 const arc_cpu_t *arc_selected_cpu; 248 249 /* Implements target hook vector_mode_supported_p. */ 250 251 static bool 252 arc_vector_mode_supported_p (machine_mode mode) 253 { 254 switch (mode) 255 { 256 case V2HImode: 257 return TARGET_PLUS_DMPY; 258 case V4HImode: 259 case V2SImode: 260 return TARGET_PLUS_QMACW; 261 case V4SImode: 262 case V8HImode: 263 return TARGET_SIMD_SET; 264 265 default: 266 return false; 267 } 268 } 269 270 /* Implements target hook TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */ 271 272 static machine_mode 273 arc_preferred_simd_mode (machine_mode mode) 274 { 275 switch (mode) 276 { 277 case HImode: 278 return TARGET_PLUS_QMACW ? V4HImode : V2HImode; 279 case SImode: 280 return V2SImode; 281 282 default: 283 return word_mode; 284 } 285 } 286 287 /* Implements target hook 288 TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES. */ 289 290 static unsigned int 291 arc_autovectorize_vector_sizes (void) 292 { 293 return TARGET_PLUS_QMACW ? (8 | 4) : 0; 294 } 295 296 /* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / review. */ 297 static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED; 298 static rtx arc_delegitimize_address (rtx); 299 static bool arc_can_follow_jump (const rtx_insn *follower, 300 const rtx_insn *followee); 301 302 static rtx frame_insn (rtx); 303 static void arc_function_arg_advance (cumulative_args_t, machine_mode, 304 const_tree, bool); 305 static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode); 306 307 static void arc_finalize_pic (void); 308 309 /* initialize the GCC target structure. */ 310 #undef TARGET_COMP_TYPE_ATTRIBUTES 311 #define TARGET_COMP_TYPE_ATTRIBUTES arc_comp_type_attributes 312 #undef TARGET_ASM_FILE_START 313 #define TARGET_ASM_FILE_START arc_file_start 314 #undef TARGET_ATTRIBUTE_TABLE 315 #define TARGET_ATTRIBUTE_TABLE arc_attribute_table 316 #undef TARGET_ASM_INTERNAL_LABEL 317 #define TARGET_ASM_INTERNAL_LABEL arc_internal_label 318 #undef TARGET_RTX_COSTS 319 #define TARGET_RTX_COSTS arc_rtx_costs 320 #undef TARGET_ADDRESS_COST 321 #define TARGET_ADDRESS_COST arc_address_cost 322 323 #undef TARGET_ENCODE_SECTION_INFO 324 #define TARGET_ENCODE_SECTION_INFO arc_encode_section_info 325 326 #undef TARGET_CANNOT_FORCE_CONST_MEM 327 #define TARGET_CANNOT_FORCE_CONST_MEM arc_cannot_force_const_mem 328 329 #undef TARGET_INIT_BUILTINS 330 #define TARGET_INIT_BUILTINS arc_init_builtins 331 332 #undef TARGET_EXPAND_BUILTIN 333 #define TARGET_EXPAND_BUILTIN arc_expand_builtin 334 335 #undef TARGET_BUILTIN_DECL 336 #define TARGET_BUILTIN_DECL arc_builtin_decl 337 338 #undef TARGET_ASM_OUTPUT_MI_THUNK 339 #define TARGET_ASM_OUTPUT_MI_THUNK arc_output_mi_thunk 340 341 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 342 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true 343 344 #undef TARGET_FUNCTION_OK_FOR_SIBCALL 345 #define TARGET_FUNCTION_OK_FOR_SIBCALL arc_function_ok_for_sibcall 346 347 #undef TARGET_MACHINE_DEPENDENT_REORG 348 #define TARGET_MACHINE_DEPENDENT_REORG arc_reorg 349 350 #undef TARGET_IN_SMALL_DATA_P 351 #define TARGET_IN_SMALL_DATA_P arc_in_small_data_p 352 353 #undef TARGET_PROMOTE_FUNCTION_MODE 354 #define TARGET_PROMOTE_FUNCTION_MODE \ 355 default_promote_function_mode_always_promote 356 357 #undef TARGET_PROMOTE_PROTOTYPES 358 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true 359 360 #undef TARGET_RETURN_IN_MEMORY 361 #define TARGET_RETURN_IN_MEMORY arc_return_in_memory 362 #undef TARGET_PASS_BY_REFERENCE 363 #define TARGET_PASS_BY_REFERENCE arc_pass_by_reference 364 365 #undef TARGET_SETUP_INCOMING_VARARGS 366 #define TARGET_SETUP_INCOMING_VARARGS arc_setup_incoming_varargs 367 368 #undef TARGET_ARG_PARTIAL_BYTES 369 #define TARGET_ARG_PARTIAL_BYTES arc_arg_partial_bytes 370 371 #undef TARGET_MUST_PASS_IN_STACK 372 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 373 374 #undef TARGET_FUNCTION_VALUE 375 #define TARGET_FUNCTION_VALUE arc_function_value 376 377 #undef TARGET_SCHED_ADJUST_PRIORITY 378 #define TARGET_SCHED_ADJUST_PRIORITY arc_sched_adjust_priority 379 380 #undef TARGET_VECTOR_MODE_SUPPORTED_P 381 #define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p 382 383 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE 384 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arc_preferred_simd_mode 385 386 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES 387 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES arc_autovectorize_vector_sizes 388 389 #undef TARGET_CAN_USE_DOLOOP_P 390 #define TARGET_CAN_USE_DOLOOP_P arc_can_use_doloop_p 391 392 #undef TARGET_INVALID_WITHIN_DOLOOP 393 #define TARGET_INVALID_WITHIN_DOLOOP arc_invalid_within_doloop 394 395 #undef TARGET_PRESERVE_RELOAD_P 396 #define TARGET_PRESERVE_RELOAD_P arc_preserve_reload_p 397 398 #undef TARGET_CAN_FOLLOW_JUMP 399 #define TARGET_CAN_FOLLOW_JUMP arc_can_follow_jump 400 401 #undef TARGET_DELEGITIMIZE_ADDRESS 402 #define TARGET_DELEGITIMIZE_ADDRESS arc_delegitimize_address 403 404 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P 405 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \ 406 arc_use_by_pieces_infrastructure_p 407 408 /* Usually, we will be able to scale anchor offsets. 409 When this fails, we want LEGITIMIZE_ADDRESS to kick in. */ 410 #undef TARGET_MIN_ANCHOR_OFFSET 411 #define TARGET_MIN_ANCHOR_OFFSET (-1024) 412 #undef TARGET_MAX_ANCHOR_OFFSET 413 #define TARGET_MAX_ANCHOR_OFFSET (1020) 414 415 #undef TARGET_SECONDARY_RELOAD 416 #define TARGET_SECONDARY_RELOAD arc_secondary_reload 417 418 #define TARGET_OPTION_OVERRIDE arc_override_options 419 420 #define TARGET_CONDITIONAL_REGISTER_USAGE arc_conditional_register_usage 421 422 #define TARGET_TRAMPOLINE_INIT arc_initialize_trampoline 423 424 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arc_trampoline_adjust_address 425 426 #define TARGET_CAN_ELIMINATE arc_can_eliminate 427 428 #define TARGET_FRAME_POINTER_REQUIRED arc_frame_pointer_required 429 430 #define TARGET_FUNCTION_ARG arc_function_arg 431 432 #define TARGET_FUNCTION_ARG_ADVANCE arc_function_arg_advance 433 434 #define TARGET_LEGITIMATE_CONSTANT_P arc_legitimate_constant_p 435 436 #define TARGET_LEGITIMATE_ADDRESS_P arc_legitimate_address_p 437 438 #define TARGET_MODE_DEPENDENT_ADDRESS_P arc_mode_dependent_address_p 439 440 #define TARGET_LEGITIMIZE_ADDRESS arc_legitimize_address 441 442 #define TARGET_ADJUST_INSN_LENGTH arc_adjust_insn_length 443 444 #define TARGET_INSN_LENGTH_PARAMETERS arc_insn_length_parameters 445 446 #undef TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P 447 #define TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P \ 448 arc_no_speculation_in_delay_slots_p 449 450 #undef TARGET_LRA_P 451 #define TARGET_LRA_P arc_lra_p 452 #define TARGET_REGISTER_PRIORITY arc_register_priority 453 /* Stores with scaled offsets have different displacement ranges. */ 454 #define TARGET_DIFFERENT_ADDR_DISPLACEMENT_P hook_bool_void_true 455 #define TARGET_SPILL_CLASS arc_spill_class 456 457 #include "target-def.h" 458 459 #undef TARGET_ASM_ALIGNED_HI_OP 460 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t" 461 #undef TARGET_ASM_ALIGNED_SI_OP 462 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" 463 464 #ifdef HAVE_AS_TLS 465 #undef TARGET_HAVE_TLS 466 #define TARGET_HAVE_TLS HAVE_AS_TLS 467 #endif 468 469 #undef TARGET_DWARF_REGISTER_SPAN 470 #define TARGET_DWARF_REGISTER_SPAN arc_dwarf_register_span 471 472 /* Try to keep the (mov:DF _, reg) as early as possible so 473 that the d<add/sub/mul>h-lr insns appear together and can 474 use the peephole2 pattern. */ 475 476 static int 477 arc_sched_adjust_priority (rtx_insn *insn, int priority) 478 { 479 rtx set = single_set (insn); 480 if (set 481 && GET_MODE (SET_SRC(set)) == DFmode 482 && GET_CODE (SET_SRC(set)) == REG) 483 { 484 /* Incrementing priority by 20 (empirically derived). */ 485 return priority + 20; 486 } 487 488 return priority; 489 } 490 491 /* For ARC base register + offset addressing, the validity of the 492 address is mode-dependent for most of the offset range, as the 493 offset can be scaled by the access size. 494 We don't expose these as mode-dependent addresses in the 495 mode_dependent_address_p target hook, because that would disable 496 lots of optimizations, and most uses of these addresses are for 32 497 or 64 bit accesses anyways, which are fine. 498 However, that leaves some addresses for 8 / 16 bit values not 499 properly reloaded by the generic code, which is why we have to 500 schedule secondary reloads for these. */ 501 502 static reg_class_t 503 arc_secondary_reload (bool in_p, 504 rtx x, 505 reg_class_t cl, 506 machine_mode mode, 507 secondary_reload_info *sri) 508 { 509 enum rtx_code code = GET_CODE (x); 510 511 if (cl == DOUBLE_REGS) 512 return GENERAL_REGS; 513 514 /* The loop counter register can be stored, but not loaded directly. */ 515 if ((cl == LPCOUNT_REG || cl == WRITABLE_CORE_REGS) 516 && in_p && MEM_P (x)) 517 return GENERAL_REGS; 518 519 /* If we have a subreg (reg), where reg is a pseudo (that will end in 520 a memory location), then we may need a scratch register to handle 521 the fp/sp+largeoffset address. */ 522 if (code == SUBREG) 523 { 524 rtx addr = NULL_RTX; 525 x = SUBREG_REG (x); 526 527 if (REG_P (x)) 528 { 529 int regno = REGNO (x); 530 if (regno >= FIRST_PSEUDO_REGISTER) 531 regno = reg_renumber[regno]; 532 533 if (regno != -1) 534 return NO_REGS; 535 536 /* It is a pseudo that ends in a stack location. */ 537 if (reg_equiv_mem (REGNO (x))) 538 { 539 /* Get the equivalent address and check the range of the 540 offset. */ 541 rtx mem = reg_equiv_mem (REGNO (x)); 542 addr = find_replacement (&XEXP (mem, 0)); 543 } 544 } 545 else 546 { 547 gcc_assert (MEM_P (x)); 548 addr = XEXP (x, 0); 549 addr = simplify_rtx (addr); 550 } 551 if (addr && GET_CODE (addr) == PLUS 552 && CONST_INT_P (XEXP (addr, 1)) 553 && (!RTX_OK_FOR_OFFSET_P (mode, XEXP (addr, 1)))) 554 { 555 switch (mode) 556 { 557 case QImode: 558 sri->icode = 559 in_p ? CODE_FOR_reload_qi_load : CODE_FOR_reload_qi_store; 560 break; 561 case HImode: 562 sri->icode = 563 in_p ? CODE_FOR_reload_hi_load : CODE_FOR_reload_hi_store; 564 break; 565 default: 566 break; 567 } 568 } 569 } 570 return NO_REGS; 571 } 572 573 /* Convert reloads using offsets that are too large to use indirect 574 addressing. */ 575 576 void 577 arc_secondary_reload_conv (rtx reg, rtx mem, rtx scratch, bool store_p) 578 { 579 rtx addr; 580 581 gcc_assert (GET_CODE (mem) == MEM); 582 addr = XEXP (mem, 0); 583 584 /* Large offset: use a move. FIXME: ld ops accepts limms as 585 offsets. Hence, the following move insn is not required. */ 586 emit_move_insn (scratch, addr); 587 mem = replace_equiv_address_nv (mem, scratch); 588 589 /* Now create the move. */ 590 if (store_p) 591 emit_insn (gen_rtx_SET (mem, reg)); 592 else 593 emit_insn (gen_rtx_SET (reg, mem)); 594 595 return; 596 } 597 598 static unsigned arc_ifcvt (void); 599 600 namespace { 601 602 const pass_data pass_data_arc_ifcvt = 603 { 604 RTL_PASS, 605 "arc_ifcvt", /* name */ 606 OPTGROUP_NONE, /* optinfo_flags */ 607 TV_IFCVT2, /* tv_id */ 608 0, /* properties_required */ 609 0, /* properties_provided */ 610 0, /* properties_destroyed */ 611 0, /* todo_flags_start */ 612 TODO_df_finish /* todo_flags_finish */ 613 }; 614 615 class pass_arc_ifcvt : public rtl_opt_pass 616 { 617 public: 618 pass_arc_ifcvt(gcc::context *ctxt) 619 : rtl_opt_pass(pass_data_arc_ifcvt, ctxt) 620 {} 621 622 /* opt_pass methods: */ 623 opt_pass * clone () { return new pass_arc_ifcvt (m_ctxt); } 624 virtual unsigned int execute (function *) { return arc_ifcvt (); } 625 }; 626 627 } // anon namespace 628 629 rtl_opt_pass * 630 make_pass_arc_ifcvt (gcc::context *ctxt) 631 { 632 return new pass_arc_ifcvt (ctxt); 633 } 634 635 static unsigned arc_predicate_delay_insns (void); 636 637 namespace { 638 639 const pass_data pass_data_arc_predicate_delay_insns = 640 { 641 RTL_PASS, 642 "arc_predicate_delay_insns", /* name */ 643 OPTGROUP_NONE, /* optinfo_flags */ 644 TV_IFCVT2, /* tv_id */ 645 0, /* properties_required */ 646 0, /* properties_provided */ 647 0, /* properties_destroyed */ 648 0, /* todo_flags_start */ 649 TODO_df_finish /* todo_flags_finish */ 650 }; 651 652 class pass_arc_predicate_delay_insns : public rtl_opt_pass 653 { 654 public: 655 pass_arc_predicate_delay_insns(gcc::context *ctxt) 656 : rtl_opt_pass(pass_data_arc_predicate_delay_insns, ctxt) 657 {} 658 659 /* opt_pass methods: */ 660 virtual unsigned int execute (function *) 661 { 662 return arc_predicate_delay_insns (); 663 } 664 }; 665 666 } // anon namespace 667 668 rtl_opt_pass * 669 make_pass_arc_predicate_delay_insns (gcc::context *ctxt) 670 { 671 return new pass_arc_predicate_delay_insns (ctxt); 672 } 673 674 /* Called by OVERRIDE_OPTIONS to initialize various things. */ 675 676 static void 677 arc_init (void) 678 { 679 if (TARGET_V2) 680 { 681 /* I have the multiplier, then use it*/ 682 if (TARGET_MPYW || TARGET_MULTI) 683 arc_multcost = COSTS_N_INSNS (1); 684 } 685 /* Note: arc_multcost is only used in rtx_cost if speed is true. */ 686 if (arc_multcost < 0) 687 switch (arc_tune) 688 { 689 case TUNE_ARC700_4_2_STD: 690 /* latency 7; 691 max throughput (1 multiply + 4 other insns) / 5 cycles. */ 692 arc_multcost = COSTS_N_INSNS (4); 693 if (TARGET_NOMPY_SET) 694 arc_multcost = COSTS_N_INSNS (30); 695 break; 696 case TUNE_ARC700_4_2_XMAC: 697 /* latency 5; 698 max throughput (1 multiply + 2 other insns) / 3 cycles. */ 699 arc_multcost = COSTS_N_INSNS (3); 700 if (TARGET_NOMPY_SET) 701 arc_multcost = COSTS_N_INSNS (30); 702 break; 703 case TUNE_ARC600: 704 if (TARGET_MUL64_SET) 705 { 706 arc_multcost = COSTS_N_INSNS (4); 707 break; 708 } 709 /* Fall through. */ 710 default: 711 arc_multcost = COSTS_N_INSNS (30); 712 break; 713 } 714 715 /* MPY instructions valid only for ARC700 or ARCv2. */ 716 if (TARGET_NOMPY_SET && TARGET_ARC600_FAMILY) 717 error ("-mno-mpy supported only for ARC700 or ARCv2"); 718 719 if (!TARGET_DPFP && TARGET_DPFP_DISABLE_LRSR) 720 error ("-mno-dpfp-lrsr supported only with -mdpfp"); 721 722 /* FPX-1. No fast and compact together. */ 723 if ((TARGET_DPFP_FAST_SET && TARGET_DPFP_COMPACT_SET) 724 || (TARGET_SPFP_FAST_SET && TARGET_SPFP_COMPACT_SET)) 725 error ("FPX fast and compact options cannot be specified together"); 726 727 /* FPX-2. No fast-spfp for arc600 or arc601. */ 728 if (TARGET_SPFP_FAST_SET && TARGET_ARC600_FAMILY) 729 error ("-mspfp_fast not available on ARC600 or ARC601"); 730 731 /* FPX-4. No FPX extensions mixed with FPU extensions. */ 732 if ((TARGET_DPFP_FAST_SET || TARGET_DPFP_COMPACT_SET || TARGET_SPFP) 733 && TARGET_HARD_FLOAT) 734 error ("No FPX/FPU mixing allowed"); 735 736 /* Warn for unimplemented PIC in pre-ARC700 cores, and disable flag_pic. */ 737 if (flag_pic && TARGET_ARC600_FAMILY) 738 { 739 warning (DK_WARNING, 740 "PIC is not supported for %s. Generating non-PIC code only..", 741 arc_cpu_string); 742 flag_pic = 0; 743 } 744 745 arc_init_reg_tables (); 746 747 /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P. */ 748 memset (arc_punct_chars, 0, sizeof (arc_punct_chars)); 749 arc_punct_chars['#'] = 1; 750 arc_punct_chars['*'] = 1; 751 arc_punct_chars['?'] = 1; 752 arc_punct_chars['!'] = 1; 753 arc_punct_chars['^'] = 1; 754 arc_punct_chars['&'] = 1; 755 arc_punct_chars['+'] = 1; 756 arc_punct_chars['_'] = 1; 757 758 if (optimize > 1 && !TARGET_NO_COND_EXEC) 759 { 760 /* There are two target-independent ifcvt passes, and arc_reorg may do 761 one or more arc_ifcvt calls. */ 762 opt_pass *pass_arc_ifcvt_4 = make_pass_arc_ifcvt (g); 763 struct register_pass_info arc_ifcvt4_info 764 = { pass_arc_ifcvt_4, "dbr", 1, PASS_POS_INSERT_AFTER }; 765 struct register_pass_info arc_ifcvt5_info 766 = { pass_arc_ifcvt_4->clone (), "shorten", 1, PASS_POS_INSERT_BEFORE }; 767 768 register_pass (&arc_ifcvt4_info); 769 register_pass (&arc_ifcvt5_info); 770 } 771 772 if (flag_delayed_branch) 773 { 774 opt_pass *pass_arc_predicate_delay_insns 775 = make_pass_arc_predicate_delay_insns (g); 776 struct register_pass_info arc_predicate_delay_info 777 = { pass_arc_predicate_delay_insns, "dbr", 1, PASS_POS_INSERT_AFTER }; 778 779 register_pass (&arc_predicate_delay_info); 780 } 781 } 782 783 /* Check ARC options, generate derived target attributes. */ 784 785 static void 786 arc_override_options (void) 787 { 788 if (arc_cpu == PROCESSOR_NONE) 789 arc_cpu = TARGET_CPU_DEFAULT; 790 791 /* Set the default cpu options. */ 792 arc_selected_cpu = &arc_cpu_types[(int) arc_cpu]; 793 794 /* Set the architectures. */ 795 switch (arc_selected_cpu->arch_info->arch_id) 796 { 797 case BASE_ARCH_em: 798 arc_cpu_string = "EM"; 799 break; 800 case BASE_ARCH_hs: 801 arc_cpu_string = "HS"; 802 break; 803 case BASE_ARCH_700: 804 if (arc_selected_cpu->processor == PROCESSOR_nps400) 805 arc_cpu_string = "NPS400"; 806 else 807 arc_cpu_string = "ARC700"; 808 break; 809 case BASE_ARCH_6xx: 810 arc_cpu_string = "ARC600"; 811 break; 812 default: 813 gcc_unreachable (); 814 } 815 816 /* Set cpu flags accordingly to architecture/selected cpu. The cpu 817 specific flags are set in arc-common.c. The architecture forces 818 the default hardware configurations in, regardless what command 819 line options are saying. The CPU optional hw options can be 820 turned on or off. */ 821 #define ARC_OPT(NAME, CODE, MASK, DOC) \ 822 do { \ 823 if ((arc_selected_cpu->flags & CODE) \ 824 && ((target_flags_explicit & MASK) == 0)) \ 825 target_flags |= MASK; \ 826 if (arc_selected_cpu->arch_info->dflags & CODE) \ 827 target_flags |= MASK; \ 828 } while (0); 829 #define ARC_OPTX(NAME, CODE, VAR, VAL, DOC) \ 830 do { \ 831 if ((arc_selected_cpu->flags & CODE) \ 832 && (VAR == DEFAULT_##VAR)) \ 833 VAR = VAL; \ 834 if (arc_selected_cpu->arch_info->dflags & CODE) \ 835 VAR = VAL; \ 836 } while (0); 837 838 #include "arc-options.def" 839 840 #undef ARC_OPTX 841 #undef ARC_OPT 842 843 /* Check options against architecture options. Throw an error if 844 option is not allowed. */ 845 #define ARC_OPTX(NAME, CODE, VAR, VAL, DOC) \ 846 do { \ 847 if ((VAR == VAL) \ 848 && (!(arc_selected_cpu->arch_info->flags & CODE))) \ 849 { \ 850 error ("%s is not available for %s architecture", \ 851 DOC, arc_selected_cpu->arch_info->name); \ 852 } \ 853 } while (0); 854 #define ARC_OPT(NAME, CODE, MASK, DOC) \ 855 do { \ 856 if ((target_flags & MASK) \ 857 && (!(arc_selected_cpu->arch_info->flags & CODE))) \ 858 error ("%s is not available for %s architecture", \ 859 DOC, arc_selected_cpu->arch_info->name); \ 860 } while (0); 861 862 #include "arc-options.def" 863 864 #undef ARC_OPTX 865 #undef ARC_OPT 866 867 /* Set Tune option. */ 868 if (arc_tune == TUNE_NONE) 869 arc_tune = (enum attr_tune) arc_selected_cpu->tune; 870 871 if (arc_size_opt_level == 3) 872 optimize_size = 1; 873 874 /* Compact casesi is not a valid option for ARCv2 family. */ 875 if (TARGET_V2) 876 { 877 if (TARGET_COMPACT_CASESI) 878 { 879 warning (0, "compact-casesi is not applicable to ARCv2"); 880 TARGET_COMPACT_CASESI = 0; 881 } 882 } 883 else if (optimize_size == 1 884 && !global_options_set.x_TARGET_COMPACT_CASESI) 885 TARGET_COMPACT_CASESI = 1; 886 887 if (flag_pic) 888 target_flags |= MASK_NO_SDATA_SET; 889 890 if (flag_no_common == 255) 891 flag_no_common = !TARGET_NO_SDATA_SET; 892 893 /* TARGET_COMPACT_CASESI needs the "q" register class. */ 894 if (TARGET_MIXED_CODE) 895 TARGET_Q_CLASS = 1; 896 if (!TARGET_Q_CLASS) 897 TARGET_COMPACT_CASESI = 0; 898 if (TARGET_COMPACT_CASESI) 899 TARGET_CASE_VECTOR_PC_RELATIVE = 1; 900 901 /* These need to be done at start up. It's convenient to do them here. */ 902 arc_init (); 903 } 904 905 /* The condition codes of the ARC, and the inverse function. */ 906 /* For short branches, the "c" / "nc" names are not defined in the ARC 907 Programmers manual, so we have to use "lo" / "hs"" instead. */ 908 static const char *arc_condition_codes[] = 909 { 910 "al", 0, "eq", "ne", "p", "n", "lo", "hs", "v", "nv", 911 "gt", "le", "ge", "lt", "hi", "ls", "pnz", 0 912 }; 913 914 enum arc_cc_code_index 915 { 916 ARC_CC_AL, ARC_CC_EQ = ARC_CC_AL+2, ARC_CC_NE, ARC_CC_P, ARC_CC_N, 917 ARC_CC_C, ARC_CC_NC, ARC_CC_V, ARC_CC_NV, 918 ARC_CC_GT, ARC_CC_LE, ARC_CC_GE, ARC_CC_LT, ARC_CC_HI, ARC_CC_LS, ARC_CC_PNZ, 919 ARC_CC_LO = ARC_CC_C, ARC_CC_HS = ARC_CC_NC 920 }; 921 922 #define ARC_INVERSE_CONDITION_CODE(X) ((X) ^ 1) 923 924 /* Returns the index of the ARC condition code string in 925 `arc_condition_codes'. COMPARISON should be an rtx like 926 `(eq (...) (...))'. */ 927 928 static int 929 get_arc_condition_code (rtx comparison) 930 { 931 switch (GET_MODE (XEXP (comparison, 0))) 932 { 933 case CCmode: 934 case SImode: /* For BRcc. */ 935 switch (GET_CODE (comparison)) 936 { 937 case EQ : return ARC_CC_EQ; 938 case NE : return ARC_CC_NE; 939 case GT : return ARC_CC_GT; 940 case LE : return ARC_CC_LE; 941 case GE : return ARC_CC_GE; 942 case LT : return ARC_CC_LT; 943 case GTU : return ARC_CC_HI; 944 case LEU : return ARC_CC_LS; 945 case LTU : return ARC_CC_LO; 946 case GEU : return ARC_CC_HS; 947 default : gcc_unreachable (); 948 } 949 case CC_ZNmode: 950 switch (GET_CODE (comparison)) 951 { 952 case EQ : return ARC_CC_EQ; 953 case NE : return ARC_CC_NE; 954 case GE: return ARC_CC_P; 955 case LT: return ARC_CC_N; 956 case GT : return ARC_CC_PNZ; 957 default : gcc_unreachable (); 958 } 959 case CC_Zmode: 960 switch (GET_CODE (comparison)) 961 { 962 case EQ : return ARC_CC_EQ; 963 case NE : return ARC_CC_NE; 964 default : gcc_unreachable (); 965 } 966 case CC_Cmode: 967 switch (GET_CODE (comparison)) 968 { 969 case LTU : return ARC_CC_C; 970 case GEU : return ARC_CC_NC; 971 default : gcc_unreachable (); 972 } 973 case CC_FP_GTmode: 974 if (TARGET_ARGONAUT_SET && TARGET_SPFP) 975 switch (GET_CODE (comparison)) 976 { 977 case GT : return ARC_CC_N; 978 case UNLE: return ARC_CC_P; 979 default : gcc_unreachable (); 980 } 981 else 982 switch (GET_CODE (comparison)) 983 { 984 case GT : return ARC_CC_HI; 985 case UNLE : return ARC_CC_LS; 986 default : gcc_unreachable (); 987 } 988 case CC_FP_GEmode: 989 /* Same for FPX and non-FPX. */ 990 switch (GET_CODE (comparison)) 991 { 992 case GE : return ARC_CC_HS; 993 case UNLT : return ARC_CC_LO; 994 default : gcc_unreachable (); 995 } 996 case CC_FP_UNEQmode: 997 switch (GET_CODE (comparison)) 998 { 999 case UNEQ : return ARC_CC_EQ; 1000 case LTGT : return ARC_CC_NE; 1001 default : gcc_unreachable (); 1002 } 1003 case CC_FP_ORDmode: 1004 switch (GET_CODE (comparison)) 1005 { 1006 case UNORDERED : return ARC_CC_C; 1007 case ORDERED : return ARC_CC_NC; 1008 default : gcc_unreachable (); 1009 } 1010 case CC_FPXmode: 1011 switch (GET_CODE (comparison)) 1012 { 1013 case EQ : return ARC_CC_EQ; 1014 case NE : return ARC_CC_NE; 1015 case UNORDERED : return ARC_CC_C; 1016 case ORDERED : return ARC_CC_NC; 1017 case LTGT : return ARC_CC_HI; 1018 case UNEQ : return ARC_CC_LS; 1019 default : gcc_unreachable (); 1020 } 1021 case CC_FPUmode: 1022 switch (GET_CODE (comparison)) 1023 { 1024 case EQ : return ARC_CC_EQ; 1025 case NE : return ARC_CC_NE; 1026 case GT : return ARC_CC_GT; 1027 case GE : return ARC_CC_GE; 1028 case LT : return ARC_CC_C; 1029 case LE : return ARC_CC_LS; 1030 case UNORDERED : return ARC_CC_V; 1031 case ORDERED : return ARC_CC_NV; 1032 case UNGT : return ARC_CC_HI; 1033 case UNGE : return ARC_CC_HS; 1034 case UNLT : return ARC_CC_LT; 1035 case UNLE : return ARC_CC_LE; 1036 /* UNEQ and LTGT do not have representation. */ 1037 case LTGT : /* Fall through. */ 1038 case UNEQ : /* Fall through. */ 1039 default : gcc_unreachable (); 1040 } 1041 case CC_FPU_UNEQmode: 1042 switch (GET_CODE (comparison)) 1043 { 1044 case LTGT : return ARC_CC_NE; 1045 case UNEQ : return ARC_CC_EQ; 1046 default : gcc_unreachable (); 1047 } 1048 default : gcc_unreachable (); 1049 } 1050 /*NOTREACHED*/ 1051 return (42); 1052 } 1053 1054 /* Return true if COMPARISON has a short form that can accomodate OFFSET. */ 1055 1056 bool 1057 arc_short_comparison_p (rtx comparison, int offset) 1058 { 1059 gcc_assert (ARC_CC_NC == ARC_CC_HS); 1060 gcc_assert (ARC_CC_C == ARC_CC_LO); 1061 switch (get_arc_condition_code (comparison)) 1062 { 1063 case ARC_CC_EQ: case ARC_CC_NE: 1064 return offset >= -512 && offset <= 506; 1065 case ARC_CC_GT: case ARC_CC_LE: case ARC_CC_GE: case ARC_CC_LT: 1066 case ARC_CC_HI: case ARC_CC_LS: case ARC_CC_LO: case ARC_CC_HS: 1067 return offset >= -64 && offset <= 58; 1068 default: 1069 return false; 1070 } 1071 } 1072 1073 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, 1074 return the mode to be used for the comparison. */ 1075 1076 machine_mode 1077 arc_select_cc_mode (enum rtx_code op, rtx x, rtx y) 1078 { 1079 machine_mode mode = GET_MODE (x); 1080 rtx x1; 1081 1082 /* For an operation that sets the condition codes as a side-effect, the 1083 C and V flags is not set as for cmp, so we can only use comparisons where 1084 this doesn't matter. (For LT and GE we can use "mi" and "pl" 1085 instead.) */ 1086 /* ??? We could use "pnz" for greater than zero, however, we could then 1087 get into trouble because the comparison could not be reversed. */ 1088 if (GET_MODE_CLASS (mode) == MODE_INT 1089 && y == const0_rtx 1090 && (op == EQ || op == NE 1091 || ((op == LT || op == GE) && GET_MODE_SIZE (GET_MODE (x)) <= 4))) 1092 return CC_ZNmode; 1093 1094 /* add.f for if (a+b) */ 1095 if (mode == SImode 1096 && GET_CODE (y) == NEG 1097 && (op == EQ || op == NE)) 1098 return CC_ZNmode; 1099 1100 /* Check if this is a test suitable for bxor.f . */ 1101 if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y) 1102 && ((INTVAL (y) - 1) & INTVAL (y)) == 0 1103 && INTVAL (y)) 1104 return CC_Zmode; 1105 1106 /* Check if this is a test suitable for add / bmsk.f . */ 1107 if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y) 1108 && GET_CODE (x) == AND && CONST_INT_P ((x1 = XEXP (x, 1))) 1109 && ((INTVAL (x1) + 1) & INTVAL (x1)) == 0 1110 && (~INTVAL (x1) | INTVAL (y)) < 0 1111 && (~INTVAL (x1) | INTVAL (y)) > -0x800) 1112 return CC_Zmode; 1113 1114 if (GET_MODE (x) == SImode && (op == LTU || op == GEU) 1115 && GET_CODE (x) == PLUS 1116 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y))) 1117 return CC_Cmode; 1118 1119 if (TARGET_ARGONAUT_SET 1120 && ((mode == SFmode && TARGET_SPFP) || (mode == DFmode && TARGET_DPFP))) 1121 switch (op) 1122 { 1123 case EQ: case NE: case UNEQ: case LTGT: case ORDERED: case UNORDERED: 1124 return CC_FPXmode; 1125 case LT: case UNGE: case GT: case UNLE: 1126 return CC_FP_GTmode; 1127 case LE: case UNGT: case GE: case UNLT: 1128 return CC_FP_GEmode; 1129 default: gcc_unreachable (); 1130 } 1131 else if (TARGET_HARD_FLOAT 1132 && ((mode == SFmode && TARGET_FP_SP_BASE) 1133 || (mode == DFmode && TARGET_FP_DP_BASE))) 1134 switch (op) 1135 { 1136 case EQ: 1137 case NE: 1138 case UNORDERED: 1139 case ORDERED: 1140 case UNLT: 1141 case UNLE: 1142 case UNGT: 1143 case UNGE: 1144 case LT: 1145 case LE: 1146 case GT: 1147 case GE: 1148 return CC_FPUmode; 1149 1150 case LTGT: 1151 case UNEQ: 1152 return CC_FPU_UNEQmode; 1153 1154 default: 1155 gcc_unreachable (); 1156 } 1157 else if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_OPTFPE) 1158 { 1159 switch (op) 1160 { 1161 case EQ: case NE: return CC_Zmode; 1162 case LT: case UNGE: 1163 case GT: case UNLE: return CC_FP_GTmode; 1164 case LE: case UNGT: 1165 case GE: case UNLT: return CC_FP_GEmode; 1166 case UNEQ: case LTGT: return CC_FP_UNEQmode; 1167 case ORDERED: case UNORDERED: return CC_FP_ORDmode; 1168 default: gcc_unreachable (); 1169 } 1170 } 1171 return CCmode; 1172 } 1173 1174 /* Vectors to keep interesting information about registers where it can easily 1175 be got. We use to use the actual mode value as the bit number, but there 1176 is (or may be) more than 32 modes now. Instead we use two tables: one 1177 indexed by hard register number, and one indexed by mode. */ 1178 1179 /* The purpose of arc_mode_class is to shrink the range of modes so that 1180 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is 1181 mapped into one arc_mode_class mode. */ 1182 1183 enum arc_mode_class { 1184 C_MODE, 1185 S_MODE, D_MODE, T_MODE, O_MODE, 1186 SF_MODE, DF_MODE, TF_MODE, OF_MODE, 1187 V_MODE 1188 }; 1189 1190 /* Modes for condition codes. */ 1191 #define C_MODES (1 << (int) C_MODE) 1192 1193 /* Modes for single-word and smaller quantities. */ 1194 #define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE)) 1195 1196 /* Modes for double-word and smaller quantities. */ 1197 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE)) 1198 1199 /* Mode for 8-byte DF values only. */ 1200 #define DF_MODES (1 << DF_MODE) 1201 1202 /* Modes for quad-word and smaller quantities. */ 1203 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE)) 1204 1205 /* Modes for 128-bit vectors. */ 1206 #define V_MODES (1 << (int) V_MODE) 1207 1208 /* Value is 1 if register/mode pair is acceptable on arc. */ 1209 1210 unsigned int arc_hard_regno_mode_ok[] = { 1211 T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, 1212 T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, 1213 T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, D_MODES, 1214 D_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, 1215 1216 /* ??? Leave these as S_MODES for now. */ 1217 S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, 1218 DF_MODES, 0, DF_MODES, 0, S_MODES, S_MODES, S_MODES, S_MODES, 1219 S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, 1220 S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, C_MODES, S_MODES, 1221 1222 V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, 1223 V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, 1224 V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, 1225 V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, 1226 1227 V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, 1228 V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, 1229 V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, 1230 V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, 1231 1232 S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, 1233 S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES 1234 }; 1235 1236 unsigned int arc_mode_class [NUM_MACHINE_MODES]; 1237 1238 enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER]; 1239 1240 enum reg_class 1241 arc_preferred_reload_class (rtx, enum reg_class cl) 1242 { 1243 if ((cl) == CHEAP_CORE_REGS || (cl) == WRITABLE_CORE_REGS) 1244 return GENERAL_REGS; 1245 return cl; 1246 } 1247 1248 /* Initialize the arc_mode_class array. */ 1249 1250 static void 1251 arc_init_reg_tables (void) 1252 { 1253 int i; 1254 1255 for (i = 0; i < NUM_MACHINE_MODES; i++) 1256 { 1257 machine_mode m = (machine_mode) i; 1258 1259 switch (GET_MODE_CLASS (m)) 1260 { 1261 case MODE_INT: 1262 case MODE_PARTIAL_INT: 1263 case MODE_COMPLEX_INT: 1264 if (GET_MODE_SIZE (m) <= 4) 1265 arc_mode_class[i] = 1 << (int) S_MODE; 1266 else if (GET_MODE_SIZE (m) == 8) 1267 arc_mode_class[i] = 1 << (int) D_MODE; 1268 else if (GET_MODE_SIZE (m) == 16) 1269 arc_mode_class[i] = 1 << (int) T_MODE; 1270 else if (GET_MODE_SIZE (m) == 32) 1271 arc_mode_class[i] = 1 << (int) O_MODE; 1272 else 1273 arc_mode_class[i] = 0; 1274 break; 1275 case MODE_FLOAT: 1276 case MODE_COMPLEX_FLOAT: 1277 if (GET_MODE_SIZE (m) <= 4) 1278 arc_mode_class[i] = 1 << (int) SF_MODE; 1279 else if (GET_MODE_SIZE (m) == 8) 1280 arc_mode_class[i] = 1 << (int) DF_MODE; 1281 else if (GET_MODE_SIZE (m) == 16) 1282 arc_mode_class[i] = 1 << (int) TF_MODE; 1283 else if (GET_MODE_SIZE (m) == 32) 1284 arc_mode_class[i] = 1 << (int) OF_MODE; 1285 else 1286 arc_mode_class[i] = 0; 1287 break; 1288 case MODE_VECTOR_INT: 1289 if (GET_MODE_SIZE (m) == 4) 1290 arc_mode_class[i] = (1 << (int) S_MODE); 1291 else if (GET_MODE_SIZE (m) == 8) 1292 arc_mode_class[i] = (1 << (int) D_MODE); 1293 else 1294 arc_mode_class[i] = (1 << (int) V_MODE); 1295 break; 1296 case MODE_CC: 1297 default: 1298 /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so 1299 we must explicitly check for them here. */ 1300 if (i == (int) CCmode || i == (int) CC_ZNmode || i == (int) CC_Zmode 1301 || i == (int) CC_Cmode 1302 || i == CC_FP_GTmode || i == CC_FP_GEmode || i == CC_FP_ORDmode 1303 || i == CC_FPUmode || i == CC_FPU_UNEQmode) 1304 arc_mode_class[i] = 1 << (int) C_MODE; 1305 else 1306 arc_mode_class[i] = 0; 1307 break; 1308 } 1309 } 1310 } 1311 1312 /* Core registers 56..59 are used for multiply extension options. 1313 The dsp option uses r56 and r57, these are then named acc1 and acc2. 1314 acc1 is the highpart, and acc2 the lowpart, so which register gets which 1315 number depends on endianness. 1316 The mul64 multiplier options use r57 for mlo, r58 for mmid and r59 for mhi. 1317 Because mlo / mhi form a 64 bit value, we use different gcc internal 1318 register numbers to make them form a register pair as the gcc internals 1319 know it. mmid gets number 57, if still available, and mlo / mhi get 1320 number 58 and 59, depending on endianness. We use DBX_REGISTER_NUMBER 1321 to map this back. */ 1322 char rname56[5] = "r56"; 1323 char rname57[5] = "r57"; 1324 char rname58[5] = "r58"; 1325 char rname59[5] = "r59"; 1326 char rname29[7] = "ilink1"; 1327 char rname30[7] = "ilink2"; 1328 1329 static void 1330 arc_conditional_register_usage (void) 1331 { 1332 int regno; 1333 int i; 1334 int fix_start = 60, fix_end = 55; 1335 1336 if (TARGET_V2) 1337 { 1338 /* For ARCv2 the core register set is changed. */ 1339 strcpy (rname29, "ilink"); 1340 strcpy (rname30, "r30"); 1341 fixed_regs[30] = call_used_regs[30] = 1; 1342 } 1343 1344 if (TARGET_MUL64_SET) 1345 { 1346 fix_start = 57; 1347 fix_end = 59; 1348 1349 /* We don't provide a name for mmed. In rtl / assembly resource lists, 1350 you are supposed to refer to it as mlo & mhi, e.g 1351 (zero_extract:SI (reg:DI 58) (const_int 32) (16)) . 1352 In an actual asm instruction, you are of course use mmed. 1353 The point of avoiding having a separate register for mmed is that 1354 this way, we don't have to carry clobbers of that reg around in every 1355 isntruction that modifies mlo and/or mhi. */ 1356 strcpy (rname57, ""); 1357 strcpy (rname58, TARGET_BIG_ENDIAN ? "mhi" : "mlo"); 1358 strcpy (rname59, TARGET_BIG_ENDIAN ? "mlo" : "mhi"); 1359 } 1360 1361 /* The nature of arc_tp_regno is actually something more like a global 1362 register, however globalize_reg requires a declaration. 1363 We use EPILOGUE_USES to compensate so that sets from 1364 __builtin_set_frame_pointer are not deleted. */ 1365 if (arc_tp_regno != -1) 1366 fixed_regs[arc_tp_regno] = call_used_regs[arc_tp_regno] = 1; 1367 1368 if (TARGET_MULMAC_32BY16_SET) 1369 { 1370 fix_start = 56; 1371 fix_end = fix_end > 57 ? fix_end : 57; 1372 strcpy (rname56, TARGET_BIG_ENDIAN ? "acc1" : "acc2"); 1373 strcpy (rname57, TARGET_BIG_ENDIAN ? "acc2" : "acc1"); 1374 } 1375 for (regno = fix_start; regno <= fix_end; regno++) 1376 { 1377 if (!fixed_regs[regno]) 1378 warning (0, "multiply option implies r%d is fixed", regno); 1379 fixed_regs [regno] = call_used_regs[regno] = 1; 1380 } 1381 if (TARGET_Q_CLASS) 1382 { 1383 if (optimize_size) 1384 { 1385 reg_alloc_order[0] = 0; 1386 reg_alloc_order[1] = 1; 1387 reg_alloc_order[2] = 2; 1388 reg_alloc_order[3] = 3; 1389 reg_alloc_order[4] = 12; 1390 reg_alloc_order[5] = 13; 1391 reg_alloc_order[6] = 14; 1392 reg_alloc_order[7] = 15; 1393 reg_alloc_order[8] = 4; 1394 reg_alloc_order[9] = 5; 1395 reg_alloc_order[10] = 6; 1396 reg_alloc_order[11] = 7; 1397 reg_alloc_order[12] = 8; 1398 reg_alloc_order[13] = 9; 1399 reg_alloc_order[14] = 10; 1400 reg_alloc_order[15] = 11; 1401 } 1402 else 1403 { 1404 reg_alloc_order[2] = 12; 1405 reg_alloc_order[3] = 13; 1406 reg_alloc_order[4] = 14; 1407 reg_alloc_order[5] = 15; 1408 reg_alloc_order[6] = 1; 1409 reg_alloc_order[7] = 0; 1410 reg_alloc_order[8] = 4; 1411 reg_alloc_order[9] = 5; 1412 reg_alloc_order[10] = 6; 1413 reg_alloc_order[11] = 7; 1414 reg_alloc_order[12] = 8; 1415 reg_alloc_order[13] = 9; 1416 reg_alloc_order[14] = 10; 1417 reg_alloc_order[15] = 11; 1418 } 1419 } 1420 if (TARGET_SIMD_SET) 1421 { 1422 int i; 1423 for (i = ARC_FIRST_SIMD_VR_REG; i <= ARC_LAST_SIMD_VR_REG; i++) 1424 reg_alloc_order [i] = i; 1425 for (i = ARC_FIRST_SIMD_DMA_CONFIG_REG; 1426 i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++) 1427 reg_alloc_order [i] = i; 1428 } 1429 /* For ARC600, lp_count may not be read in an instruction 1430 following immediately after another one setting it to a new value. 1431 There was some discussion on how to enforce scheduling constraints for 1432 processors with missing interlocks on the gcc mailing list: 1433 http://gcc.gnu.org/ml/gcc/2008-05/msg00021.html . 1434 However, we can't actually use this approach, because for ARC the 1435 delay slot scheduling pass is active, which runs after 1436 machine_dependent_reorg. */ 1437 if (TARGET_ARC600) 1438 CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT); 1439 else if (!TARGET_LP_WR_INTERLOCK) 1440 fixed_regs[LP_COUNT] = 1; 1441 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 1442 if (!call_used_regs[regno]) 1443 CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno); 1444 for (regno = 32; regno < 60; regno++) 1445 if (!fixed_regs[regno]) 1446 SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], regno); 1447 if (!TARGET_ARC600_FAMILY) 1448 { 1449 for (regno = 32; regno <= 60; regno++) 1450 CLEAR_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], regno); 1451 1452 /* If they have used -ffixed-lp_count, make sure it takes 1453 effect. */ 1454 if (fixed_regs[LP_COUNT]) 1455 { 1456 CLEAR_HARD_REG_BIT (reg_class_contents[LPCOUNT_REG], LP_COUNT); 1457 CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT); 1458 CLEAR_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], LP_COUNT); 1459 1460 /* Instead of taking out SF_MODE like below, forbid it outright. */ 1461 arc_hard_regno_mode_ok[60] = 0; 1462 } 1463 else 1464 arc_hard_regno_mode_ok[60] = 1 << (int) S_MODE; 1465 } 1466 1467 /* ARCHS has 64-bit data-path which makes use of the even-odd paired 1468 registers. */ 1469 if (TARGET_HS) 1470 { 1471 for (regno = 1; regno < 32; regno +=2) 1472 { 1473 arc_hard_regno_mode_ok[regno] = S_MODES; 1474 } 1475 } 1476 1477 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 1478 { 1479 if (i < 29) 1480 { 1481 if ((TARGET_Q_CLASS || TARGET_RRQ_CLASS) 1482 && ((i <= 3) || ((i >= 12) && (i <= 15)))) 1483 arc_regno_reg_class[i] = ARCOMPACT16_REGS; 1484 else 1485 arc_regno_reg_class[i] = GENERAL_REGS; 1486 } 1487 else if (i < 60) 1488 arc_regno_reg_class[i] 1489 = (fixed_regs[i] 1490 ? (TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i) 1491 ? CHEAP_CORE_REGS : ALL_CORE_REGS) 1492 : (((!TARGET_ARC600_FAMILY) 1493 && TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i)) 1494 ? CHEAP_CORE_REGS : WRITABLE_CORE_REGS)); 1495 else 1496 arc_regno_reg_class[i] = NO_REGS; 1497 } 1498 1499 /* ARCOMPACT16_REGS is empty, if TARGET_Q_CLASS / TARGET_RRQ_CLASS 1500 has not been activated. */ 1501 if (!TARGET_Q_CLASS && !TARGET_RRQ_CLASS) 1502 CLEAR_HARD_REG_SET(reg_class_contents [ARCOMPACT16_REGS]); 1503 if (!TARGET_Q_CLASS) 1504 CLEAR_HARD_REG_SET(reg_class_contents [AC16_BASE_REGS]); 1505 1506 gcc_assert (FIRST_PSEUDO_REGISTER >= 144); 1507 1508 /* Handle Special Registers. */ 1509 arc_regno_reg_class[29] = LINK_REGS; /* ilink1 register. */ 1510 if (!TARGET_V2) 1511 arc_regno_reg_class[30] = LINK_REGS; /* ilink2 register. */ 1512 arc_regno_reg_class[31] = LINK_REGS; /* blink register. */ 1513 arc_regno_reg_class[60] = LPCOUNT_REG; 1514 arc_regno_reg_class[61] = NO_REGS; /* CC_REG: must be NO_REGS. */ 1515 arc_regno_reg_class[62] = GENERAL_REGS; 1516 1517 if (TARGET_DPFP) 1518 { 1519 for (i = 40; i < 44; ++i) 1520 { 1521 arc_regno_reg_class[i] = DOUBLE_REGS; 1522 1523 /* Unless they want us to do 'mov d1, 0x00000000' make sure 1524 no attempt is made to use such a register as a destination 1525 operand in *movdf_insn. */ 1526 if (!TARGET_ARGONAUT_SET) 1527 { 1528 /* Make sure no 'c', 'w', 'W', or 'Rac' constraint is 1529 interpreted to mean they can use D1 or D2 in their insn. */ 1530 CLEAR_HARD_REG_BIT(reg_class_contents[CHEAP_CORE_REGS ], i); 1531 CLEAR_HARD_REG_BIT(reg_class_contents[ALL_CORE_REGS ], i); 1532 CLEAR_HARD_REG_BIT(reg_class_contents[WRITABLE_CORE_REGS ], i); 1533 CLEAR_HARD_REG_BIT(reg_class_contents[MPY_WRITABLE_CORE_REGS], i); 1534 } 1535 } 1536 } 1537 else 1538 { 1539 /* Disable all DOUBLE_REGISTER settings, 1540 if not generating DPFP code. */ 1541 arc_regno_reg_class[40] = ALL_REGS; 1542 arc_regno_reg_class[41] = ALL_REGS; 1543 arc_regno_reg_class[42] = ALL_REGS; 1544 arc_regno_reg_class[43] = ALL_REGS; 1545 1546 arc_hard_regno_mode_ok[40] = 0; 1547 arc_hard_regno_mode_ok[42] = 0; 1548 1549 CLEAR_HARD_REG_SET(reg_class_contents [DOUBLE_REGS]); 1550 } 1551 1552 if (TARGET_SIMD_SET) 1553 { 1554 gcc_assert (ARC_FIRST_SIMD_VR_REG == 64); 1555 gcc_assert (ARC_LAST_SIMD_VR_REG == 127); 1556 1557 for (i = ARC_FIRST_SIMD_VR_REG; i <= ARC_LAST_SIMD_VR_REG; i++) 1558 arc_regno_reg_class [i] = SIMD_VR_REGS; 1559 1560 gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_REG == 128); 1561 gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_IN_REG == 128); 1562 gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG == 136); 1563 gcc_assert (ARC_LAST_SIMD_DMA_CONFIG_REG == 143); 1564 1565 for (i = ARC_FIRST_SIMD_DMA_CONFIG_REG; 1566 i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++) 1567 arc_regno_reg_class [i] = SIMD_DMA_CONFIG_REGS; 1568 } 1569 1570 /* pc : r63 */ 1571 arc_regno_reg_class[PROGRAM_COUNTER_REGNO] = GENERAL_REGS; 1572 1573 /*ARCV2 Accumulator. */ 1574 if (TARGET_V2 1575 && (TARGET_FP_DP_FUSED || TARGET_FP_SP_FUSED)) 1576 { 1577 arc_regno_reg_class[ACCL_REGNO] = WRITABLE_CORE_REGS; 1578 arc_regno_reg_class[ACCH_REGNO] = WRITABLE_CORE_REGS; 1579 SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], ACCL_REGNO); 1580 SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], ACCH_REGNO); 1581 SET_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], ACCL_REGNO); 1582 SET_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], ACCH_REGNO); 1583 arc_hard_regno_mode_ok[ACC_REG_FIRST] = D_MODES; 1584 } 1585 } 1586 1587 /* Handle an "interrupt" attribute; arguments as in 1588 struct attribute_spec.handler. */ 1589 1590 static tree 1591 arc_handle_interrupt_attribute (tree *, tree name, tree args, int, 1592 bool *no_add_attrs) 1593 { 1594 gcc_assert (args); 1595 1596 tree value = TREE_VALUE (args); 1597 1598 if (TREE_CODE (value) != STRING_CST) 1599 { 1600 warning (OPT_Wattributes, 1601 "argument of %qE attribute is not a string constant", 1602 name); 1603 *no_add_attrs = true; 1604 } 1605 else if (strcmp (TREE_STRING_POINTER (value), "ilink1") 1606 && strcmp (TREE_STRING_POINTER (value), "ilink2") 1607 && !TARGET_V2) 1608 { 1609 warning (OPT_Wattributes, 1610 "argument of %qE attribute is not \"ilink1\" or \"ilink2\"", 1611 name); 1612 *no_add_attrs = true; 1613 } 1614 else if (TARGET_V2 1615 && strcmp (TREE_STRING_POINTER (value), "ilink")) 1616 { 1617 warning (OPT_Wattributes, 1618 "argument of %qE attribute is not \"ilink\"", 1619 name); 1620 *no_add_attrs = true; 1621 } 1622 1623 return NULL_TREE; 1624 } 1625 1626 /* Return zero if TYPE1 and TYPE are incompatible, one if they are compatible, 1627 and two if they are nearly compatible (which causes a warning to be 1628 generated). */ 1629 1630 static int 1631 arc_comp_type_attributes (const_tree type1, 1632 const_tree type2) 1633 { 1634 int l1, l2, m1, m2, s1, s2; 1635 1636 /* Check for mismatch of non-default calling convention. */ 1637 if (TREE_CODE (type1) != FUNCTION_TYPE) 1638 return 1; 1639 1640 /* Check for mismatched call attributes. */ 1641 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL; 1642 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL; 1643 m1 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type1)) != NULL; 1644 m2 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type2)) != NULL; 1645 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL; 1646 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL; 1647 1648 /* Only bother to check if an attribute is defined. */ 1649 if (l1 | l2 | m1 | m2 | s1 | s2) 1650 { 1651 /* If one type has an attribute, the other must have the same attribute. */ 1652 if ((l1 != l2) || (m1 != m2) || (s1 != s2)) 1653 return 0; 1654 1655 /* Disallow mixed attributes. */ 1656 if (l1 + m1 + s1 > 1) 1657 return 0; 1658 } 1659 1660 1661 return 1; 1662 } 1663 1664 /* Set the default attributes for TYPE. */ 1665 1666 void 1667 arc_set_default_type_attributes (tree type ATTRIBUTE_UNUSED) 1668 { 1669 gcc_unreachable(); 1670 } 1671 1672 /* Misc. utilities. */ 1673 1674 /* X and Y are two things to compare using CODE. Emit the compare insn and 1675 return the rtx for the cc reg in the proper mode. */ 1676 1677 rtx 1678 gen_compare_reg (rtx comparison, machine_mode omode) 1679 { 1680 enum rtx_code code = GET_CODE (comparison); 1681 rtx x = XEXP (comparison, 0); 1682 rtx y = XEXP (comparison, 1); 1683 rtx tmp, cc_reg; 1684 machine_mode mode, cmode; 1685 1686 1687 cmode = GET_MODE (x); 1688 if (cmode == VOIDmode) 1689 cmode = GET_MODE (y); 1690 gcc_assert (cmode == SImode || cmode == SFmode || cmode == DFmode); 1691 if (cmode == SImode) 1692 { 1693 if (!register_operand (x, SImode)) 1694 { 1695 if (register_operand (y, SImode)) 1696 { 1697 tmp = x; 1698 x = y; 1699 y = tmp; 1700 code = swap_condition (code); 1701 } 1702 else 1703 x = copy_to_mode_reg (SImode, x); 1704 } 1705 if (GET_CODE (y) == SYMBOL_REF && flag_pic) 1706 y = copy_to_mode_reg (SImode, y); 1707 } 1708 else 1709 { 1710 x = force_reg (cmode, x); 1711 y = force_reg (cmode, y); 1712 } 1713 mode = SELECT_CC_MODE (code, x, y); 1714 1715 cc_reg = gen_rtx_REG (mode, CC_REG); 1716 1717 /* ??? FIXME (x-y)==0, as done by both cmpsfpx_raw and 1718 cmpdfpx_raw, is not a correct comparison for floats: 1719 http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm 1720 */ 1721 if (TARGET_ARGONAUT_SET 1722 && ((cmode == SFmode && TARGET_SPFP) || (cmode == DFmode && TARGET_DPFP))) 1723 { 1724 switch (code) 1725 { 1726 case NE: case EQ: case LT: case UNGE: case LE: case UNGT: 1727 case UNEQ: case LTGT: case ORDERED: case UNORDERED: 1728 break; 1729 case GT: case UNLE: case GE: case UNLT: 1730 code = swap_condition (code); 1731 tmp = x; 1732 x = y; 1733 y = tmp; 1734 break; 1735 default: 1736 gcc_unreachable (); 1737 } 1738 if (cmode == SFmode) 1739 { 1740 emit_insn (gen_cmpsfpx_raw (x, y)); 1741 } 1742 else /* DFmode */ 1743 { 1744 /* Accepts Dx regs directly by insns. */ 1745 emit_insn (gen_cmpdfpx_raw (x, y)); 1746 } 1747 1748 if (mode != CC_FPXmode) 1749 emit_insn (gen_rtx_SET (cc_reg, 1750 gen_rtx_COMPARE (mode, 1751 gen_rtx_REG (CC_FPXmode, 61), 1752 const0_rtx))); 1753 } 1754 else if (TARGET_FPX_QUARK && (cmode == SFmode)) 1755 { 1756 switch (code) 1757 { 1758 case NE: case EQ: case GT: case UNLE: case GE: case UNLT: 1759 case UNEQ: case LTGT: case ORDERED: case UNORDERED: 1760 break; 1761 case LT: case UNGE: case LE: case UNGT: 1762 code = swap_condition (code); 1763 tmp = x; 1764 x = y; 1765 y = tmp; 1766 break; 1767 default: 1768 gcc_unreachable (); 1769 } 1770 1771 emit_insn (gen_cmp_quark (cc_reg, 1772 gen_rtx_COMPARE (mode, x, y))); 1773 } 1774 else if (TARGET_HARD_FLOAT 1775 && ((cmode == SFmode && TARGET_FP_SP_BASE) 1776 || (cmode == DFmode && TARGET_FP_DP_BASE))) 1777 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y))); 1778 else if (GET_MODE_CLASS (cmode) == MODE_FLOAT && TARGET_OPTFPE) 1779 { 1780 rtx op0 = gen_rtx_REG (cmode, 0); 1781 rtx op1 = gen_rtx_REG (cmode, GET_MODE_SIZE (cmode) / UNITS_PER_WORD); 1782 bool swap = false; 1783 1784 switch (code) 1785 { 1786 case NE: case EQ: case GT: case UNLE: case GE: case UNLT: 1787 case UNEQ: case LTGT: case ORDERED: case UNORDERED: 1788 break; 1789 case LT: case UNGE: case LE: case UNGT: 1790 code = swap_condition (code); 1791 swap = true; 1792 break; 1793 default: 1794 gcc_unreachable (); 1795 } 1796 if (currently_expanding_to_rtl) 1797 { 1798 if (swap) 1799 { 1800 tmp = x; 1801 x = y; 1802 y = tmp; 1803 } 1804 emit_move_insn (op0, x); 1805 emit_move_insn (op1, y); 1806 } 1807 else 1808 { 1809 gcc_assert (rtx_equal_p (op0, x)); 1810 gcc_assert (rtx_equal_p (op1, y)); 1811 if (swap) 1812 { 1813 op0 = y; 1814 op1 = x; 1815 } 1816 } 1817 emit_insn (gen_cmp_float (cc_reg, gen_rtx_COMPARE (mode, op0, op1))); 1818 } 1819 else 1820 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y))); 1821 return gen_rtx_fmt_ee (code, omode, cc_reg, const0_rtx); 1822 } 1823 1824 /* Return true if VALUE, a const_double, will fit in a limm (4 byte number). 1825 We assume the value can be either signed or unsigned. */ 1826 1827 bool 1828 arc_double_limm_p (rtx value) 1829 { 1830 HOST_WIDE_INT low, high; 1831 1832 gcc_assert (GET_CODE (value) == CONST_DOUBLE); 1833 1834 if (TARGET_DPFP) 1835 return true; 1836 1837 low = CONST_DOUBLE_LOW (value); 1838 high = CONST_DOUBLE_HIGH (value); 1839 1840 if (low & 0x80000000) 1841 { 1842 return (((unsigned HOST_WIDE_INT) low <= 0xffffffff && high == 0) 1843 || (((low & - (unsigned HOST_WIDE_INT) 0x80000000) 1844 == - (unsigned HOST_WIDE_INT) 0x80000000) 1845 && high == -1)); 1846 } 1847 else 1848 { 1849 return (unsigned HOST_WIDE_INT) low <= 0x7fffffff && high == 0; 1850 } 1851 } 1852 1853 /* Do any needed setup for a variadic function. For the ARC, we must 1854 create a register parameter block, and then copy any anonymous arguments 1855 in registers to memory. 1856 1857 CUM has not been updated for the last named argument which has type TYPE 1858 and mode MODE, and we rely on this fact. */ 1859 1860 static void 1861 arc_setup_incoming_varargs (cumulative_args_t args_so_far, 1862 machine_mode mode, tree type, 1863 int *pretend_size, int no_rtl) 1864 { 1865 int first_anon_arg; 1866 CUMULATIVE_ARGS next_cum; 1867 1868 /* We must treat `__builtin_va_alist' as an anonymous arg. */ 1869 1870 next_cum = *get_cumulative_args (args_so_far); 1871 arc_function_arg_advance (pack_cumulative_args (&next_cum), 1872 mode, type, true); 1873 first_anon_arg = next_cum; 1874 1875 if (FUNCTION_ARG_REGNO_P (first_anon_arg)) 1876 { 1877 /* First anonymous (unnamed) argument is in a reg. */ 1878 1879 /* Note that first_reg_offset < MAX_ARC_PARM_REGS. */ 1880 int first_reg_offset = first_anon_arg; 1881 1882 if (!no_rtl) 1883 { 1884 rtx regblock 1885 = gen_rtx_MEM (BLKmode, plus_constant (Pmode, arg_pointer_rtx, 1886 FIRST_PARM_OFFSET (0))); 1887 move_block_from_reg (first_reg_offset, regblock, 1888 MAX_ARC_PARM_REGS - first_reg_offset); 1889 } 1890 1891 *pretend_size 1892 = ((MAX_ARC_PARM_REGS - first_reg_offset ) * UNITS_PER_WORD); 1893 } 1894 } 1895 1896 /* Cost functions. */ 1897 1898 /* Provide the costs of an addressing mode that contains ADDR. 1899 If ADDR is not a valid address, its cost is irrelevant. */ 1900 1901 int 1902 arc_address_cost (rtx addr, machine_mode, addr_space_t, bool speed) 1903 { 1904 switch (GET_CODE (addr)) 1905 { 1906 case REG : 1907 return speed || satisfies_constraint_Rcq (addr) ? 0 : 1; 1908 case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC: 1909 case PRE_MODIFY: case POST_MODIFY: 1910 return !speed; 1911 1912 case LABEL_REF : 1913 case SYMBOL_REF : 1914 case CONST : 1915 if (TARGET_NPS_CMEM && cmem_address (addr, SImode)) 1916 return 0; 1917 /* Most likely needs a LIMM. */ 1918 return COSTS_N_INSNS (1); 1919 1920 case PLUS : 1921 { 1922 register rtx plus0 = XEXP (addr, 0); 1923 register rtx plus1 = XEXP (addr, 1); 1924 1925 if (GET_CODE (plus0) != REG 1926 && (GET_CODE (plus0) != MULT 1927 || !CONST_INT_P (XEXP (plus0, 1)) 1928 || (INTVAL (XEXP (plus0, 1)) != 2 1929 && INTVAL (XEXP (plus0, 1)) != 4))) 1930 break; 1931 1932 switch (GET_CODE (plus1)) 1933 { 1934 case CONST_INT : 1935 return (!RTX_OK_FOR_OFFSET_P (SImode, plus1) 1936 ? COSTS_N_INSNS (1) 1937 : speed 1938 ? 0 1939 : (satisfies_constraint_Rcq (plus0) 1940 && satisfies_constraint_O (plus1)) 1941 ? 0 1942 : 1); 1943 case REG: 1944 return (speed < 1 ? 0 1945 : (satisfies_constraint_Rcq (plus0) 1946 && satisfies_constraint_Rcq (plus1)) 1947 ? 0 : 1); 1948 case CONST : 1949 case SYMBOL_REF : 1950 case LABEL_REF : 1951 return COSTS_N_INSNS (1); 1952 default: 1953 break; 1954 } 1955 break; 1956 } 1957 default: 1958 break; 1959 } 1960 1961 return 4; 1962 } 1963 1964 /* Emit instruction X with the frame related bit set. */ 1965 1966 static rtx 1967 frame_insn (rtx x) 1968 { 1969 x = emit_insn (x); 1970 RTX_FRAME_RELATED_P (x) = 1; 1971 return x; 1972 } 1973 1974 /* Emit a frame insn to move SRC to DST. */ 1975 1976 static rtx 1977 frame_move (rtx dst, rtx src) 1978 { 1979 rtx tmp = gen_rtx_SET (dst, src); 1980 RTX_FRAME_RELATED_P (tmp) = 1; 1981 return frame_insn (tmp); 1982 } 1983 1984 /* Like frame_move, but add a REG_INC note for REG if ADDR contains an 1985 auto increment address, or is zero. */ 1986 1987 static rtx 1988 frame_move_inc (rtx dst, rtx src, rtx reg, rtx addr) 1989 { 1990 rtx insn = frame_move (dst, src); 1991 1992 if (!addr 1993 || GET_CODE (addr) == PRE_DEC || GET_CODE (addr) == POST_INC 1994 || GET_CODE (addr) == PRE_MODIFY || GET_CODE (addr) == POST_MODIFY) 1995 add_reg_note (insn, REG_INC, reg); 1996 return insn; 1997 } 1998 1999 /* Emit a frame insn which adjusts a frame address register REG by OFFSET. */ 2000 2001 static rtx 2002 frame_add (rtx reg, HOST_WIDE_INT offset) 2003 { 2004 gcc_assert ((offset & 0x3) == 0); 2005 if (!offset) 2006 return NULL_RTX; 2007 return frame_move (reg, plus_constant (Pmode, reg, offset)); 2008 } 2009 2010 /* Emit a frame insn which adjusts stack pointer by OFFSET. */ 2011 2012 static rtx 2013 frame_stack_add (HOST_WIDE_INT offset) 2014 { 2015 return frame_add (stack_pointer_rtx, offset); 2016 } 2017 2018 /* Traditionally, we push saved registers first in the prologue, 2019 then we allocate the rest of the frame - and reverse in the epilogue. 2020 This has still its merits for ease of debugging, or saving code size 2021 or even execution time if the stack frame is so large that some accesses 2022 can't be encoded anymore with offsets in the instruction code when using 2023 a different scheme. 2024 Also, it would be a good starting point if we got instructions to help 2025 with register save/restore. 2026 2027 However, often stack frames are small, and the pushing / popping has 2028 some costs: 2029 - the stack modification prevents a lot of scheduling. 2030 - frame allocation / deallocation needs extra instructions. 2031 - unless we know that we compile ARC700 user code, we need to put 2032 a memory barrier after frame allocation / before deallocation to 2033 prevent interrupts clobbering our data in the frame. 2034 In particular, we don't have any such guarantees for library functions, 2035 which tend to, on the other hand, to have small frames. 2036 2037 Thus, for small frames, we'd like to use a different scheme: 2038 - The frame is allocated in full with the first prologue instruction, 2039 and deallocated in full with the last epilogue instruction. 2040 Thus, the instructions in-betwen can be freely scheduled. 2041 - If the function has no outgoing arguments on the stack, we can allocate 2042 one register save slot at the top of the stack. This register can then 2043 be saved simultanously with frame allocation, and restored with 2044 frame deallocation. 2045 This register can be picked depending on scheduling considerations, 2046 although same though should go into having some set of registers 2047 to be potentially lingering after a call, and others to be available 2048 immediately - i.e. in the absence of interprocedual optimization, we 2049 can use an ABI-like convention for register allocation to reduce 2050 stalls after function return. */ 2051 /* Function prologue/epilogue handlers. */ 2052 2053 /* ARCompact stack frames look like: 2054 2055 Before call After call 2056 high +-----------------------+ +-----------------------+ 2057 mem | reg parm save area | | reg parm save area | 2058 | only created for | | only created for | 2059 | variable arg fns | | variable arg fns | 2060 AP +-----------------------+ +-----------------------+ 2061 | return addr register | | return addr register | 2062 | (if required) | | (if required) | 2063 +-----------------------+ +-----------------------+ 2064 | | | | 2065 | reg save area | | reg save area | 2066 | | | | 2067 +-----------------------+ +-----------------------+ 2068 | frame pointer | | frame pointer | 2069 | (if required) | | (if required) | 2070 FP +-----------------------+ +-----------------------+ 2071 | | | | 2072 | local/temp variables | | local/temp variables | 2073 | | | | 2074 +-----------------------+ +-----------------------+ 2075 | | | | 2076 | arguments on stack | | arguments on stack | 2077 | | | | 2078 SP +-----------------------+ +-----------------------+ 2079 | reg parm save area | 2080 | only created for | 2081 | variable arg fns | 2082 AP +-----------------------+ 2083 | return addr register | 2084 | (if required) | 2085 +-----------------------+ 2086 | | 2087 | reg save area | 2088 | | 2089 +-----------------------+ 2090 | frame pointer | 2091 | (if required) | 2092 FP +-----------------------+ 2093 | | 2094 | local/temp variables | 2095 | | 2096 +-----------------------+ 2097 | | 2098 | arguments on stack | 2099 low | | 2100 mem SP +-----------------------+ 2101 2102 Notes: 2103 1) The "reg parm save area" does not exist for non variable argument fns. 2104 The "reg parm save area" can be eliminated completely if we created our 2105 own va-arc.h, but that has tradeoffs as well (so it's not done). */ 2106 2107 /* Structure to be filled in by arc_compute_frame_size with register 2108 save masks, and offsets for the current function. */ 2109 struct GTY (()) arc_frame_info 2110 { 2111 unsigned int total_size; /* # bytes that the entire frame takes up. */ 2112 unsigned int extra_size; /* # bytes of extra stuff. */ 2113 unsigned int pretend_size; /* # bytes we push and pretend caller did. */ 2114 unsigned int args_size; /* # bytes that outgoing arguments take up. */ 2115 unsigned int reg_size; /* # bytes needed to store regs. */ 2116 unsigned int var_size; /* # bytes that variables take up. */ 2117 unsigned int reg_offset; /* Offset from new sp to store regs. */ 2118 unsigned int gmask; /* Mask of saved gp registers. */ 2119 int initialized; /* Nonzero if frame size already calculated. */ 2120 short millicode_start_reg; 2121 short millicode_end_reg; 2122 bool save_return_addr; 2123 }; 2124 2125 /* Defining data structures for per-function information. */ 2126 2127 typedef struct GTY (()) machine_function 2128 { 2129 enum arc_function_type fn_type; 2130 struct arc_frame_info frame_info; 2131 /* To keep track of unalignment caused by short insns. */ 2132 int unalign; 2133 int force_short_suffix; /* Used when disgorging return delay slot insns. */ 2134 const char *size_reason; 2135 struct arc_ccfsm ccfsm_current; 2136 /* Map from uid to ccfsm state during branch shortening. */ 2137 rtx ccfsm_current_insn; 2138 char arc_reorg_started; 2139 char prescan_initialized; 2140 } machine_function; 2141 2142 /* Type of function DECL. 2143 2144 The result is cached. To reset the cache at the end of a function, 2145 call with DECL = NULL_TREE. */ 2146 2147 enum arc_function_type 2148 arc_compute_function_type (struct function *fun) 2149 { 2150 tree decl = fun->decl; 2151 tree a; 2152 enum arc_function_type fn_type = fun->machine->fn_type; 2153 2154 if (fn_type != ARC_FUNCTION_UNKNOWN) 2155 return fn_type; 2156 2157 /* Assume we have a normal function (not an interrupt handler). */ 2158 fn_type = ARC_FUNCTION_NORMAL; 2159 2160 /* Now see if this is an interrupt handler. */ 2161 for (a = DECL_ATTRIBUTES (decl); 2162 a; 2163 a = TREE_CHAIN (a)) 2164 { 2165 tree name = TREE_PURPOSE (a), args = TREE_VALUE (a); 2166 2167 if (name == get_identifier ("interrupt") 2168 && list_length (args) == 1 2169 && TREE_CODE (TREE_VALUE (args)) == STRING_CST) 2170 { 2171 tree value = TREE_VALUE (args); 2172 2173 if (!strcmp (TREE_STRING_POINTER (value), "ilink1") 2174 || !strcmp (TREE_STRING_POINTER (value), "ilink")) 2175 fn_type = ARC_FUNCTION_ILINK1; 2176 else if (!strcmp (TREE_STRING_POINTER (value), "ilink2")) 2177 fn_type = ARC_FUNCTION_ILINK2; 2178 else 2179 gcc_unreachable (); 2180 break; 2181 } 2182 } 2183 2184 return fun->machine->fn_type = fn_type; 2185 } 2186 2187 #define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM)) 2188 #define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM)) 2189 2190 /* Tell prologue and epilogue if register REGNO should be saved / restored. 2191 The return address and frame pointer are treated separately. 2192 Don't consider them here. 2193 Addition for pic: The gp register needs to be saved if the current 2194 function changes it to access gotoff variables. 2195 FIXME: This will not be needed if we used some arbitrary register 2196 instead of r26. 2197 */ 2198 #define MUST_SAVE_REGISTER(regno, interrupt_p) \ 2199 (((regno) != RETURN_ADDR_REGNUM && (regno) != FRAME_POINTER_REGNUM \ 2200 && (df_regs_ever_live_p (regno) && (!call_used_regs[regno] || interrupt_p))) \ 2201 || (flag_pic && crtl->uses_pic_offset_table \ 2202 && regno == PIC_OFFSET_TABLE_REGNUM) ) 2203 2204 #define MUST_SAVE_RETURN_ADDR \ 2205 (cfun->machine->frame_info.save_return_addr) 2206 2207 /* Return non-zero if there are registers to be saved or loaded using 2208 millicode thunks. We can only use consecutive sequences starting 2209 with r13, and not going beyond r25. 2210 GMASK is a bitmask of registers to save. This function sets 2211 FRAME->millicod_start_reg .. FRAME->millicode_end_reg to the range 2212 of registers to be saved / restored with a millicode call. */ 2213 2214 static int 2215 arc_compute_millicode_save_restore_regs (unsigned int gmask, 2216 struct arc_frame_info *frame) 2217 { 2218 int regno; 2219 2220 int start_reg = 13, end_reg = 25; 2221 2222 for (regno = start_reg; regno <= end_reg && (gmask & (1L << regno));) 2223 regno++; 2224 end_reg = regno - 1; 2225 /* There is no point in using millicode thunks if we don't save/restore 2226 at least three registers. For non-leaf functions we also have the 2227 blink restore. */ 2228 if (regno - start_reg >= 3 - (crtl->is_leaf == 0)) 2229 { 2230 frame->millicode_start_reg = 13; 2231 frame->millicode_end_reg = regno - 1; 2232 return 1; 2233 } 2234 return 0; 2235 } 2236 2237 /* Return the bytes needed to compute the frame pointer from the current 2238 stack pointer. 2239 2240 SIZE is the size needed for local variables. */ 2241 2242 unsigned int 2243 arc_compute_frame_size (int size) /* size = # of var. bytes allocated. */ 2244 { 2245 int regno; 2246 unsigned int total_size, var_size, args_size, pretend_size, extra_size; 2247 unsigned int reg_size, reg_offset; 2248 unsigned int gmask; 2249 enum arc_function_type fn_type; 2250 int interrupt_p; 2251 struct arc_frame_info *frame_info = &cfun->machine->frame_info; 2252 2253 size = ARC_STACK_ALIGN (size); 2254 2255 /* 1) Size of locals and temporaries */ 2256 var_size = size; 2257 2258 /* 2) Size of outgoing arguments */ 2259 args_size = crtl->outgoing_args_size; 2260 2261 /* 3) Calculate space needed for saved registers. 2262 ??? We ignore the extension registers for now. */ 2263 2264 /* See if this is an interrupt handler. Call used registers must be saved 2265 for them too. */ 2266 2267 reg_size = 0; 2268 gmask = 0; 2269 fn_type = arc_compute_function_type (cfun); 2270 interrupt_p = ARC_INTERRUPT_P (fn_type); 2271 2272 for (regno = 0; regno <= 31; regno++) 2273 { 2274 if (MUST_SAVE_REGISTER (regno, interrupt_p)) 2275 { 2276 reg_size += UNITS_PER_WORD; 2277 gmask |= 1 << regno; 2278 } 2279 } 2280 2281 /* 4) Space for back trace data structure. 2282 <return addr reg size> (if required) + <fp size> (if required). */ 2283 frame_info->save_return_addr 2284 = (!crtl->is_leaf || df_regs_ever_live_p (RETURN_ADDR_REGNUM)); 2285 /* Saving blink reg in case of leaf function for millicode thunk calls. */ 2286 if (optimize_size && !TARGET_NO_MILLICODE_THUNK_SET) 2287 { 2288 if (arc_compute_millicode_save_restore_regs (gmask, frame_info)) 2289 frame_info->save_return_addr = true; 2290 } 2291 2292 extra_size = 0; 2293 if (MUST_SAVE_RETURN_ADDR) 2294 extra_size = 4; 2295 if (frame_pointer_needed) 2296 extra_size += 4; 2297 2298 /* 5) Space for variable arguments passed in registers */ 2299 pretend_size = crtl->args.pretend_args_size; 2300 2301 /* Ensure everything before the locals is aligned appropriately. */ 2302 { 2303 unsigned int extra_plus_reg_size; 2304 unsigned int extra_plus_reg_size_aligned; 2305 2306 extra_plus_reg_size = extra_size + reg_size; 2307 extra_plus_reg_size_aligned = ARC_STACK_ALIGN(extra_plus_reg_size); 2308 reg_size = extra_plus_reg_size_aligned - extra_size; 2309 } 2310 2311 /* Compute total frame size. */ 2312 total_size = var_size + args_size + extra_size + pretend_size + reg_size; 2313 2314 total_size = ARC_STACK_ALIGN (total_size); 2315 2316 /* Compute offset of register save area from stack pointer: 2317 Frame: pretend_size <blink> reg_size <fp> var_size args_size <--sp 2318 */ 2319 reg_offset = (total_size - (pretend_size + reg_size + extra_size) 2320 + (frame_pointer_needed ? 4 : 0)); 2321 2322 /* Save computed information. */ 2323 frame_info->total_size = total_size; 2324 frame_info->extra_size = extra_size; 2325 frame_info->pretend_size = pretend_size; 2326 frame_info->var_size = var_size; 2327 frame_info->args_size = args_size; 2328 frame_info->reg_size = reg_size; 2329 frame_info->reg_offset = reg_offset; 2330 frame_info->gmask = gmask; 2331 frame_info->initialized = reload_completed; 2332 2333 /* Ok, we're done. */ 2334 return total_size; 2335 } 2336 2337 /* Common code to save/restore registers. */ 2338 /* BASE_REG is the base register to use for addressing and to adjust. 2339 GMASK is a bitmask of general purpose registers to save/restore. 2340 epilogue_p 0: prologue 1:epilogue 2:epilogue, sibling thunk 2341 If *FIRST_OFFSET is non-zero, add it first to BASE_REG - preferably 2342 using a pre-modify for the first memory access. *FIRST_OFFSET is then 2343 zeroed. */ 2344 2345 static void 2346 arc_save_restore (rtx base_reg, 2347 unsigned int gmask, int epilogue_p, int *first_offset) 2348 { 2349 unsigned int offset = 0; 2350 int regno; 2351 struct arc_frame_info *frame = &cfun->machine->frame_info; 2352 rtx sibthunk_insn = NULL_RTX; 2353 2354 if (gmask) 2355 { 2356 /* Millicode thunks implementation: 2357 Generates calls to millicodes for registers starting from r13 to r25 2358 Present Limitations: 2359 - Only one range supported. The remaining regs will have the ordinary 2360 st and ld instructions for store and loads. Hence a gmask asking 2361 to store r13-14, r16-r25 will only generate calls to store and 2362 load r13 to r14 while store and load insns will be generated for 2363 r16 to r25 in the prologue and epilogue respectively. 2364 2365 - Presently library only supports register ranges starting from r13. 2366 */ 2367 if (epilogue_p == 2 || frame->millicode_end_reg > 14) 2368 { 2369 int start_call = frame->millicode_start_reg; 2370 int end_call = frame->millicode_end_reg; 2371 int n_regs = end_call - start_call + 1; 2372 int i = 0, r, off = 0; 2373 rtx insn; 2374 rtx ret_addr = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); 2375 2376 if (*first_offset) 2377 { 2378 /* "reg_size" won't be more than 127 . */ 2379 gcc_assert (epilogue_p || abs (*first_offset) <= 127); 2380 frame_add (base_reg, *first_offset); 2381 *first_offset = 0; 2382 } 2383 insn = gen_rtx_PARALLEL 2384 (VOIDmode, rtvec_alloc ((epilogue_p == 2) + n_regs + 1)); 2385 if (epilogue_p == 2) 2386 i += 2; 2387 else 2388 XVECEXP (insn, 0, n_regs) = gen_rtx_CLOBBER (VOIDmode, ret_addr); 2389 for (r = start_call; r <= end_call; r++, off += UNITS_PER_WORD, i++) 2390 { 2391 rtx reg = gen_rtx_REG (SImode, r); 2392 rtx mem 2393 = gen_frame_mem (SImode, plus_constant (Pmode, base_reg, off)); 2394 2395 if (epilogue_p) 2396 XVECEXP (insn, 0, i) = gen_rtx_SET (reg, mem); 2397 else 2398 XVECEXP (insn, 0, i) = gen_rtx_SET (mem, reg); 2399 gmask = gmask & ~(1L << r); 2400 } 2401 if (epilogue_p == 2) 2402 sibthunk_insn = insn; 2403 else 2404 { 2405 insn = frame_insn (insn); 2406 if (epilogue_p) 2407 for (r = start_call; r <= end_call; r++) 2408 { 2409 rtx reg = gen_rtx_REG (SImode, r); 2410 add_reg_note (insn, REG_CFA_RESTORE, reg); 2411 } 2412 } 2413 offset += off; 2414 } 2415 2416 for (regno = 0; regno <= 31; regno++) 2417 { 2418 machine_mode mode = SImode; 2419 bool found = false; 2420 2421 if (TARGET_LL64 2422 && (regno % 2 == 0) 2423 && ((gmask & (1L << regno)) != 0) 2424 && ((gmask & (1L << (regno+1))) != 0)) 2425 { 2426 found = true; 2427 mode = DImode; 2428 } 2429 else if ((gmask & (1L << regno)) != 0) 2430 { 2431 found = true; 2432 mode = SImode; 2433 } 2434 2435 if (found) 2436 { 2437 rtx reg = gen_rtx_REG (mode, regno); 2438 rtx addr, mem; 2439 int cfa_adjust = *first_offset; 2440 2441 if (*first_offset) 2442 { 2443 gcc_assert (!offset); 2444 addr = plus_constant (Pmode, base_reg, *first_offset); 2445 addr = gen_rtx_PRE_MODIFY (Pmode, base_reg, addr); 2446 *first_offset = 0; 2447 } 2448 else 2449 { 2450 gcc_assert (SMALL_INT (offset)); 2451 addr = plus_constant (Pmode, base_reg, offset); 2452 } 2453 mem = gen_frame_mem (mode, addr); 2454 if (epilogue_p) 2455 { 2456 rtx insn = 2457 frame_move_inc (reg, mem, base_reg, addr); 2458 add_reg_note (insn, REG_CFA_RESTORE, reg); 2459 if (cfa_adjust) 2460 { 2461 enum reg_note note = REG_CFA_ADJUST_CFA; 2462 add_reg_note (insn, note, 2463 gen_rtx_SET (stack_pointer_rtx, 2464 plus_constant (Pmode, 2465 stack_pointer_rtx, 2466 cfa_adjust))); 2467 } 2468 } 2469 else 2470 frame_move_inc (mem, reg, base_reg, addr); 2471 offset += UNITS_PER_WORD; 2472 if (mode == DImode) 2473 { 2474 offset += UNITS_PER_WORD; 2475 ++regno; 2476 } 2477 } /* if */ 2478 } /* for */ 2479 }/* if */ 2480 if (sibthunk_insn) 2481 { 2482 int start_call = frame->millicode_start_reg; 2483 int end_call = frame->millicode_end_reg; 2484 int r; 2485 2486 rtx r12 = gen_rtx_REG (Pmode, 12); 2487 2488 frame_insn (gen_rtx_SET (r12, GEN_INT (offset))); 2489 XVECEXP (sibthunk_insn, 0, 0) = ret_rtx; 2490 XVECEXP (sibthunk_insn, 0, 1) 2491 = gen_rtx_SET (stack_pointer_rtx, 2492 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r12)); 2493 sibthunk_insn = emit_jump_insn (sibthunk_insn); 2494 RTX_FRAME_RELATED_P (sibthunk_insn) = 1; 2495 2496 /* Would be nice if we could do this earlier, when the PARALLEL 2497 is populated, but these need to be attached after the 2498 emit. */ 2499 for (r = start_call; r <= end_call; r++) 2500 { 2501 rtx reg = gen_rtx_REG (SImode, r); 2502 add_reg_note (sibthunk_insn, REG_CFA_RESTORE, reg); 2503 } 2504 } 2505 } /* arc_save_restore */ 2506 2507 2508 int arc_return_address_regs[4] 2509 = {0, RETURN_ADDR_REGNUM, ILINK1_REGNUM, ILINK2_REGNUM}; 2510 2511 /* Set up the stack and frame pointer (if desired) for the function. */ 2512 2513 void 2514 arc_expand_prologue (void) 2515 { 2516 int size = get_frame_size (); 2517 unsigned int gmask = cfun->machine->frame_info.gmask; 2518 /* unsigned int frame_pointer_offset;*/ 2519 unsigned int frame_size_to_allocate; 2520 /* (FIXME: The first store will use a PRE_MODIFY; this will usually be r13. 2521 Change the stack layout so that we rather store a high register with the 2522 PRE_MODIFY, thus enabling more short insn generation.) */ 2523 int first_offset = 0; 2524 2525 size = ARC_STACK_ALIGN (size); 2526 2527 /* Compute/get total frame size. */ 2528 size = (!cfun->machine->frame_info.initialized 2529 ? arc_compute_frame_size (size) 2530 : cfun->machine->frame_info.total_size); 2531 2532 if (flag_stack_usage_info) 2533 current_function_static_stack_size = size; 2534 2535 /* Keep track of frame size to be allocated. */ 2536 frame_size_to_allocate = size; 2537 2538 /* These cases shouldn't happen. Catch them now. */ 2539 gcc_assert (!(size == 0 && gmask)); 2540 2541 /* Allocate space for register arguments if this is a variadic function. */ 2542 if (cfun->machine->frame_info.pretend_size != 0) 2543 { 2544 /* Ensure pretend_size is maximum of 8 * word_size. */ 2545 gcc_assert (cfun->machine->frame_info.pretend_size <= 32); 2546 2547 frame_stack_add (-(HOST_WIDE_INT)cfun->machine->frame_info.pretend_size); 2548 frame_size_to_allocate -= cfun->machine->frame_info.pretend_size; 2549 } 2550 2551 /* The home-grown ABI says link register is saved first. */ 2552 if (MUST_SAVE_RETURN_ADDR) 2553 { 2554 rtx ra = gen_rtx_REG (SImode, RETURN_ADDR_REGNUM); 2555 rtx mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx)); 2556 2557 frame_move_inc (mem, ra, stack_pointer_rtx, 0); 2558 frame_size_to_allocate -= UNITS_PER_WORD; 2559 2560 } /* MUST_SAVE_RETURN_ADDR */ 2561 2562 /* Save any needed call-saved regs (and call-used if this is an 2563 interrupt handler) for ARCompact ISA. */ 2564 if (cfun->machine->frame_info.reg_size) 2565 { 2566 first_offset = -cfun->machine->frame_info.reg_size; 2567 /* N.B. FRAME_POINTER_MASK and RETURN_ADDR_MASK are cleared in gmask. */ 2568 arc_save_restore (stack_pointer_rtx, gmask, 0, &first_offset); 2569 frame_size_to_allocate -= cfun->machine->frame_info.reg_size; 2570 } 2571 2572 2573 /* Save frame pointer if needed. */ 2574 if (frame_pointer_needed) 2575 { 2576 rtx addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx, 2577 GEN_INT (-UNITS_PER_WORD + first_offset)); 2578 rtx mem = gen_frame_mem (Pmode, gen_rtx_PRE_MODIFY (Pmode, 2579 stack_pointer_rtx, 2580 addr)); 2581 frame_move_inc (mem, frame_pointer_rtx, stack_pointer_rtx, 0); 2582 frame_size_to_allocate -= UNITS_PER_WORD; 2583 first_offset = 0; 2584 frame_move (frame_pointer_rtx, stack_pointer_rtx); 2585 } 2586 2587 /* ??? We don't handle the case where the saved regs are more than 252 2588 bytes away from sp. This can be handled by decrementing sp once, saving 2589 the regs, and then decrementing it again. The epilogue doesn't have this 2590 problem as the `ld' insn takes reg+limm values (though it would be more 2591 efficient to avoid reg+limm). */ 2592 2593 frame_size_to_allocate -= first_offset; 2594 /* Allocate the stack frame. */ 2595 if (frame_size_to_allocate > 0) 2596 frame_stack_add ((HOST_WIDE_INT) 0 - frame_size_to_allocate); 2597 2598 /* Setup the gp register, if needed. */ 2599 if (crtl->uses_pic_offset_table) 2600 arc_finalize_pic (); 2601 } 2602 2603 /* Do any necessary cleanup after a function to restore stack, frame, 2604 and regs. */ 2605 2606 void 2607 arc_expand_epilogue (int sibcall_p) 2608 { 2609 int size = get_frame_size (); 2610 enum arc_function_type fn_type = arc_compute_function_type (cfun); 2611 2612 size = ARC_STACK_ALIGN (size); 2613 size = (!cfun->machine->frame_info.initialized 2614 ? arc_compute_frame_size (size) 2615 : cfun->machine->frame_info.total_size); 2616 2617 unsigned int pretend_size = cfun->machine->frame_info.pretend_size; 2618 unsigned int frame_size; 2619 unsigned int size_to_deallocate; 2620 int restored; 2621 int can_trust_sp_p = !cfun->calls_alloca; 2622 int first_offset = 0; 2623 int millicode_p = cfun->machine->frame_info.millicode_end_reg > 0; 2624 rtx insn; 2625 2626 size_to_deallocate = size; 2627 2628 frame_size = size - (pretend_size + 2629 cfun->machine->frame_info.reg_size + 2630 cfun->machine->frame_info.extra_size); 2631 2632 /* ??? There are lots of optimizations that can be done here. 2633 EG: Use fp to restore regs if it's closer. 2634 Maybe in time we'll do them all. For now, always restore regs from 2635 sp, but don't restore sp if we don't have to. */ 2636 2637 if (!can_trust_sp_p) 2638 gcc_assert (frame_pointer_needed); 2639 2640 /* Restore stack pointer to the beginning of saved register area for 2641 ARCompact ISA. */ 2642 if (frame_size) 2643 { 2644 if (frame_pointer_needed) 2645 frame_move (stack_pointer_rtx, frame_pointer_rtx); 2646 else 2647 first_offset = frame_size; 2648 size_to_deallocate -= frame_size; 2649 } 2650 else if (!can_trust_sp_p) 2651 frame_stack_add (-frame_size); 2652 2653 2654 /* Restore any saved registers. */ 2655 if (frame_pointer_needed) 2656 { 2657 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx); 2658 2659 insn = frame_move_inc (frame_pointer_rtx, gen_frame_mem (Pmode, addr), 2660 stack_pointer_rtx, 0); 2661 add_reg_note (insn, REG_CFA_RESTORE, frame_pointer_rtx); 2662 add_reg_note (insn, REG_CFA_DEF_CFA, 2663 plus_constant (SImode, stack_pointer_rtx, 2664 4)); 2665 size_to_deallocate -= UNITS_PER_WORD; 2666 } 2667 2668 /* Load blink after the calls to thunk calls in case of optimize size. */ 2669 if (millicode_p) 2670 { 2671 int sibthunk_p = (!sibcall_p 2672 && fn_type == ARC_FUNCTION_NORMAL 2673 && !cfun->machine->frame_info.pretend_size); 2674 2675 gcc_assert (!(cfun->machine->frame_info.gmask 2676 & (FRAME_POINTER_MASK | RETURN_ADDR_MASK))); 2677 arc_save_restore (stack_pointer_rtx, 2678 cfun->machine->frame_info.gmask, 2679 1 + sibthunk_p, &first_offset); 2680 if (sibthunk_p) 2681 return; 2682 } 2683 /* If we are to restore registers, and first_offset would require 2684 a limm to be encoded in a PRE_MODIFY, yet we can add it with a 2685 fast add to the stack pointer, do this now. */ 2686 if ((!SMALL_INT (first_offset) 2687 && cfun->machine->frame_info.gmask 2688 && ((TARGET_ARC700 && !optimize_size) 2689 ? first_offset <= 0x800 2690 : satisfies_constraint_C2a (GEN_INT (first_offset)))) 2691 /* Also do this if we have both gprs and return 2692 address to restore, and they both would need a LIMM. */ 2693 || (MUST_SAVE_RETURN_ADDR 2694 && !SMALL_INT ((cfun->machine->frame_info.reg_size + first_offset) >> 2) 2695 && cfun->machine->frame_info.gmask)) 2696 { 2697 frame_stack_add (first_offset); 2698 first_offset = 0; 2699 } 2700 if (MUST_SAVE_RETURN_ADDR) 2701 { 2702 rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); 2703 int ra_offs = cfun->machine->frame_info.reg_size + first_offset; 2704 rtx addr = plus_constant (Pmode, stack_pointer_rtx, ra_offs); 2705 HOST_WIDE_INT cfa_adjust = 0; 2706 2707 /* If the load of blink would need a LIMM, but we can add 2708 the offset quickly to sp, do the latter. */ 2709 if (!SMALL_INT (ra_offs >> 2) 2710 && !cfun->machine->frame_info.gmask 2711 && ((TARGET_ARC700 && !optimize_size) 2712 ? ra_offs <= 0x800 2713 : satisfies_constraint_C2a (GEN_INT (ra_offs)))) 2714 { 2715 size_to_deallocate -= ra_offs - first_offset; 2716 first_offset = 0; 2717 frame_stack_add (ra_offs); 2718 ra_offs = 0; 2719 addr = stack_pointer_rtx; 2720 } 2721 /* See if we can combine the load of the return address with the 2722 final stack adjustment. 2723 We need a separate load if there are still registers to 2724 restore. We also want a separate load if the combined insn 2725 would need a limm, but a separate load doesn't. */ 2726 if (ra_offs 2727 && !cfun->machine->frame_info.gmask 2728 && (SMALL_INT (ra_offs) || !SMALL_INT (ra_offs >> 2))) 2729 { 2730 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, addr); 2731 cfa_adjust = ra_offs; 2732 first_offset = 0; 2733 size_to_deallocate -= cfun->machine->frame_info.reg_size; 2734 } 2735 else if (!ra_offs && size_to_deallocate == UNITS_PER_WORD) 2736 { 2737 addr = gen_rtx_POST_INC (Pmode, addr); 2738 cfa_adjust = GET_MODE_SIZE (Pmode); 2739 size_to_deallocate = 0; 2740 } 2741 2742 insn = frame_move_inc (ra, gen_frame_mem (Pmode, addr), 2743 stack_pointer_rtx, addr); 2744 if (cfa_adjust) 2745 { 2746 enum reg_note note = REG_CFA_ADJUST_CFA; 2747 2748 add_reg_note (insn, note, 2749 gen_rtx_SET (stack_pointer_rtx, 2750 plus_constant (SImode, stack_pointer_rtx, 2751 cfa_adjust))); 2752 } 2753 add_reg_note (insn, REG_CFA_RESTORE, ra); 2754 } 2755 2756 if (!millicode_p) 2757 { 2758 if (cfun->machine->frame_info.reg_size) 2759 arc_save_restore (stack_pointer_rtx, 2760 /* The zeroing of these two bits is unnecessary, but leave this in for clarity. */ 2761 cfun->machine->frame_info.gmask 2762 & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK), 1, &first_offset); 2763 } 2764 2765 2766 /* The rest of this function does the following: 2767 ARCompact : handle epilogue_delay, restore sp (phase-2), return 2768 */ 2769 2770 /* Keep track of how much of the stack pointer we've restored. 2771 It makes the following a lot more readable. */ 2772 size_to_deallocate += first_offset; 2773 restored = size - size_to_deallocate; 2774 2775 if (size > restored) 2776 frame_stack_add (size - restored); 2777 2778 /* Emit the return instruction. */ 2779 if (sibcall_p == FALSE) 2780 emit_jump_insn (gen_simple_return ()); 2781 } 2782 2783 /* Return the offset relative to the stack pointer where the return address 2784 is stored, or -1 if it is not stored. */ 2785 2786 int 2787 arc_return_slot_offset () 2788 { 2789 struct arc_frame_info *afi = &cfun->machine->frame_info; 2790 2791 return (afi->save_return_addr 2792 ? afi->total_size - afi->pretend_size - afi->extra_size : -1); 2793 } 2794 2795 /* PIC */ 2796 2797 /* Helper to generate unspec constant. */ 2798 2799 static rtx 2800 arc_unspec_offset (rtx loc, int unspec) 2801 { 2802 return gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (Pmode, gen_rtvec (1, loc), 2803 unspec)); 2804 } 2805 2806 /* Emit special PIC prologues and epilogues. */ 2807 /* If the function has any GOTOFF relocations, then the GOTBASE 2808 register has to be setup in the prologue 2809 The instruction needed at the function start for setting up the 2810 GOTBASE register is 2811 add rdest, pc, 2812 ---------------------------------------------------------- 2813 The rtl to be emitted for this should be: 2814 set (reg basereg) 2815 (plus (reg pc) 2816 (const (unspec (symref _DYNAMIC) 3))) 2817 ---------------------------------------------------------- */ 2818 2819 static void 2820 arc_finalize_pic (void) 2821 { 2822 rtx pat; 2823 rtx baseptr_rtx = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM); 2824 2825 if (crtl->uses_pic_offset_table == 0) 2826 return; 2827 2828 gcc_assert (flag_pic != 0); 2829 2830 pat = gen_rtx_SYMBOL_REF (Pmode, "_DYNAMIC"); 2831 pat = arc_unspec_offset (pat, ARC_UNSPEC_GOT); 2832 pat = gen_rtx_SET (baseptr_rtx, pat); 2833 2834 emit_insn (pat); 2835 } 2836 2837 /* !TARGET_BARREL_SHIFTER support. */ 2838 /* Emit a shift insn to set OP0 to OP1 shifted by OP2; CODE specifies what 2839 kind of shift. */ 2840 2841 void 2842 emit_shift (enum rtx_code code, rtx op0, rtx op1, rtx op2) 2843 { 2844 rtx shift = gen_rtx_fmt_ee (code, SImode, op1, op2); 2845 rtx pat 2846 = ((shift4_operator (shift, SImode) ? gen_shift_si3 : gen_shift_si3_loop) 2847 (op0, op1, op2, shift)); 2848 emit_insn (pat); 2849 } 2850 2851 /* Output the assembler code for doing a shift. 2852 We go to a bit of trouble to generate efficient code as the ARC601 only has 2853 single bit shifts. This is taken from the h8300 port. We only have one 2854 mode of shifting and can't access individual bytes like the h8300 can, so 2855 this is greatly simplified (at the expense of not generating hyper- 2856 efficient code). 2857 2858 This function is not used if the variable shift insns are present. */ 2859 2860 /* FIXME: This probably can be done using a define_split in arc.md. 2861 Alternately, generate rtx rather than output instructions. */ 2862 2863 const char * 2864 output_shift (rtx *operands) 2865 { 2866 /* static int loopend_lab;*/ 2867 rtx shift = operands[3]; 2868 machine_mode mode = GET_MODE (shift); 2869 enum rtx_code code = GET_CODE (shift); 2870 const char *shift_one; 2871 2872 gcc_assert (mode == SImode); 2873 2874 switch (code) 2875 { 2876 case ASHIFT: shift_one = "add %0,%1,%1"; break; 2877 case ASHIFTRT: shift_one = "asr %0,%1"; break; 2878 case LSHIFTRT: shift_one = "lsr %0,%1"; break; 2879 default: gcc_unreachable (); 2880 } 2881 2882 if (GET_CODE (operands[2]) != CONST_INT) 2883 { 2884 output_asm_insn ("and.f lp_count,%2, 0x1f", operands); 2885 goto shiftloop; 2886 } 2887 else 2888 { 2889 int n; 2890 2891 n = INTVAL (operands[2]); 2892 2893 /* Only consider the lower 5 bits of the shift count. */ 2894 n = n & 0x1f; 2895 2896 /* First see if we can do them inline. */ 2897 /* ??? We could get better scheduling & shorter code (using short insns) 2898 by using splitters. Alas, that'd be even more verbose. */ 2899 if (code == ASHIFT && n <= 9 && n > 2 2900 && dest_reg_operand (operands[4], SImode)) 2901 { 2902 output_asm_insn ("mov %4,0\n\tadd3 %0,%4,%1", operands); 2903 for (n -=3 ; n >= 3; n -= 3) 2904 output_asm_insn ("add3 %0,%4,%0", operands); 2905 if (n == 2) 2906 output_asm_insn ("add2 %0,%4,%0", operands); 2907 else if (n) 2908 output_asm_insn ("add %0,%0,%0", operands); 2909 } 2910 else if (n <= 4) 2911 { 2912 while (--n >= 0) 2913 { 2914 output_asm_insn (shift_one, operands); 2915 operands[1] = operands[0]; 2916 } 2917 } 2918 /* See if we can use a rotate/and. */ 2919 else if (n == BITS_PER_WORD - 1) 2920 { 2921 switch (code) 2922 { 2923 case ASHIFT : 2924 output_asm_insn ("and %0,%1,1\n\tror %0,%0", operands); 2925 break; 2926 case ASHIFTRT : 2927 /* The ARC doesn't have a rol insn. Use something else. */ 2928 output_asm_insn ("add.f 0,%1,%1\n\tsbc %0,%0,%0", operands); 2929 break; 2930 case LSHIFTRT : 2931 /* The ARC doesn't have a rol insn. Use something else. */ 2932 output_asm_insn ("add.f 0,%1,%1\n\trlc %0,0", operands); 2933 break; 2934 default: 2935 break; 2936 } 2937 } 2938 else if (n == BITS_PER_WORD - 2 && dest_reg_operand (operands[4], SImode)) 2939 { 2940 switch (code) 2941 { 2942 case ASHIFT : 2943 output_asm_insn ("and %0,%1,3\n\tror %0,%0\n\tror %0,%0", operands); 2944 break; 2945 case ASHIFTRT : 2946 #if 1 /* Need some scheduling comparisons. */ 2947 output_asm_insn ("add.f %4,%1,%1\n\tsbc %0,%0,%0\n\t" 2948 "add.f 0,%4,%4\n\trlc %0,%0", operands); 2949 #else 2950 output_asm_insn ("add.f %4,%1,%1\n\tbxor %0,%4,31\n\t" 2951 "sbc.f %0,%0,%4\n\trlc %0,%0", operands); 2952 #endif 2953 break; 2954 case LSHIFTRT : 2955 #if 1 2956 output_asm_insn ("add.f %4,%1,%1\n\trlc %0,0\n\t" 2957 "add.f 0,%4,%4\n\trlc %0,%0", operands); 2958 #else 2959 output_asm_insn ("add.f %0,%1,%1\n\trlc.f %0,0\n\t" 2960 "and %0,%0,1\n\trlc %0,%0", operands); 2961 #endif 2962 break; 2963 default: 2964 break; 2965 } 2966 } 2967 else if (n == BITS_PER_WORD - 3 && code == ASHIFT) 2968 output_asm_insn ("and %0,%1,7\n\tror %0,%0\n\tror %0,%0\n\tror %0,%0", 2969 operands); 2970 /* Must loop. */ 2971 else 2972 { 2973 operands[2] = GEN_INT (n); 2974 output_asm_insn ("mov.f lp_count, %2", operands); 2975 2976 shiftloop: 2977 { 2978 output_asm_insn ("lpnz\t2f", operands); 2979 output_asm_insn (shift_one, operands); 2980 output_asm_insn ("nop", operands); 2981 fprintf (asm_out_file, "2:\t%s end single insn loop\n", 2982 ASM_COMMENT_START); 2983 } 2984 } 2985 } 2986 2987 return ""; 2988 } 2989 2990 /* Nested function support. */ 2991 2992 /* Directly store VALUE into memory object BLOCK at OFFSET. */ 2993 2994 static void 2995 emit_store_direct (rtx block, int offset, int value) 2996 { 2997 emit_insn (gen_store_direct (adjust_address (block, SImode, offset), 2998 force_reg (SImode, 2999 gen_int_mode (value, SImode)))); 3000 } 3001 3002 /* Emit RTL insns to initialize the variable parts of a trampoline. 3003 FNADDR is an RTX for the address of the function's pure code. 3004 CXT is an RTX for the static chain value for the function. */ 3005 /* With potentially multiple shared objects loaded, and multiple stacks 3006 present for multiple thereds where trampolines might reside, a simple 3007 range check will likely not suffice for the profiler to tell if a callee 3008 is a trampoline. We a speedier check by making the trampoline start at 3009 an address that is not 4-byte aligned. 3010 A trampoline looks like this: 3011 3012 nop_s 0x78e0 3013 entry: 3014 ld_s r12,[pcl,12] 0xd403 3015 ld r11,[pcl,12] 0x170c 700b 3016 j_s [r12] 0x7c00 3017 nop_s 0x78e0 3018 3019 The fastest trampoline to execute for trampolines within +-8KB of CTX 3020 would be: 3021 add2 r11,pcl,s12 3022 j [limm] 0x20200f80 limm 3023 and that would also be faster to write to the stack by computing the offset 3024 from CTX to TRAMP at compile time. However, it would really be better to 3025 get rid of the high cost of cache invalidation when generating trampolines, 3026 which requires that the code part of trampolines stays constant, and 3027 additionally either 3028 - making sure that no executable code but trampolines is on the stack, 3029 no icache entries linger for the area of the stack from when before the 3030 stack was allocated, and allocating trampolines in trampoline-only 3031 cache lines 3032 or 3033 - allocate trampolines fram a special pool of pre-allocated trampolines. */ 3034 3035 static void 3036 arc_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt) 3037 { 3038 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); 3039 3040 emit_store_direct (tramp, 0, TARGET_BIG_ENDIAN ? 0x78e0d403 : 0xd40378e0); 3041 emit_store_direct (tramp, 4, TARGET_BIG_ENDIAN ? 0x170c700b : 0x700b170c); 3042 emit_store_direct (tramp, 8, TARGET_BIG_ENDIAN ? 0x7c0078e0 : 0x78e07c00); 3043 emit_move_insn (adjust_address (tramp, SImode, 12), fnaddr); 3044 emit_move_insn (adjust_address (tramp, SImode, 16), cxt); 3045 emit_insn (gen_flush_icache (adjust_address (tramp, SImode, 0))); 3046 } 3047 3048 /* Allow the profiler to easily distinguish trampolines from normal 3049 functions. */ 3050 3051 static rtx 3052 arc_trampoline_adjust_address (rtx addr) 3053 { 3054 return plus_constant (Pmode, addr, 2); 3055 } 3056 3057 /* This is set briefly to 1 when we output a ".as" address modifer, and then 3058 reset when we output the scaled address. */ 3059 static int output_scaled = 0; 3060 3061 /* Print operand X (an rtx) in assembler syntax to file FILE. 3062 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. 3063 For `%' followed by punctuation, CODE is the punctuation and X is null. */ 3064 /* In final.c:output_asm_insn: 3065 'l' : label 3066 'a' : address 3067 'c' : constant address if CONSTANT_ADDRESS_P 3068 'n' : negative 3069 Here: 3070 'Z': log2(x+1)-1 3071 'z': log2 3072 'M': log2(~x) 3073 'p': bit Position of lsb 3074 's': size of bit field 3075 '#': condbranch delay slot suffix 3076 '*': jump delay slot suffix 3077 '?' : nonjump-insn suffix for conditional execution or short instruction 3078 '!' : jump / call suffix for conditional execution or short instruction 3079 '`': fold constant inside unary o-perator, re-recognize, and emit. 3080 'd' 3081 'D' 3082 'R': Second word 3083 'S' 3084 'B': Branch comparison operand - suppress sda reference 3085 'H': Most significant word 3086 'L': Least significant word 3087 'A': ASCII decimal representation of floating point value 3088 'U': Load/store update or scaling indicator 3089 'V': cache bypass indicator for volatile 3090 'P' 3091 'F' 3092 '^' 3093 'O': Operator 3094 'o': original symbol - no @ prepending. */ 3095 3096 void 3097 arc_print_operand (FILE *file, rtx x, int code) 3098 { 3099 switch (code) 3100 { 3101 case 'Z': 3102 if (GET_CODE (x) == CONST_INT) 3103 fprintf (file, "%d",exact_log2(INTVAL (x) + 1) - 1 ); 3104 else 3105 output_operand_lossage ("invalid operand to %%Z code"); 3106 3107 return; 3108 3109 case 'z': 3110 if (GET_CODE (x) == CONST_INT) 3111 fprintf (file, "%d",exact_log2(INTVAL (x)) ); 3112 else 3113 output_operand_lossage ("invalid operand to %%z code"); 3114 3115 return; 3116 3117 case 'M': 3118 if (GET_CODE (x) == CONST_INT) 3119 fprintf (file, "%d",exact_log2(~INTVAL (x)) ); 3120 else 3121 output_operand_lossage ("invalid operand to %%M code"); 3122 3123 return; 3124 3125 case 'p': 3126 if (GET_CODE (x) == CONST_INT) 3127 fprintf (file, "%d", exact_log2 (INTVAL (x) & -INTVAL (x))); 3128 else 3129 output_operand_lossage ("invalid operand to %%p code"); 3130 return; 3131 3132 case 's': 3133 if (GET_CODE (x) == CONST_INT) 3134 { 3135 HOST_WIDE_INT i = INTVAL (x); 3136 HOST_WIDE_INT s = exact_log2 (i & -i); 3137 fprintf (file, "%d", exact_log2 (((0xffffffffUL & i) >> s) + 1)); 3138 } 3139 else 3140 output_operand_lossage ("invalid operand to %%s code"); 3141 return; 3142 3143 case '#' : 3144 /* Conditional branches depending on condition codes. 3145 Note that this is only for branches that were known to depend on 3146 condition codes before delay slot scheduling; 3147 out-of-range brcc / bbit expansions should use '*'. 3148 This distinction is important because of the different 3149 allowable delay slot insns and the output of the delay suffix 3150 for TARGET_AT_DBR_COND_EXEC. */ 3151 case '*' : 3152 /* Unconditional branches / branches not depending on condition codes. 3153 This could also be a CALL_INSN. 3154 Output the appropriate delay slot suffix. */ 3155 if (final_sequence && final_sequence->len () != 1) 3156 { 3157 rtx_insn *jump = final_sequence->insn (0); 3158 rtx_insn *delay = final_sequence->insn (1); 3159 3160 /* For TARGET_PAD_RETURN we might have grabbed the delay insn. */ 3161 if (delay->deleted ()) 3162 return; 3163 if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump)) 3164 fputs (INSN_FROM_TARGET_P (delay) ? ".d" 3165 : TARGET_AT_DBR_CONDEXEC && code == '#' ? ".d" 3166 : get_attr_type (jump) == TYPE_RETURN && code == '#' ? "" 3167 : ".nd", 3168 file); 3169 else 3170 fputs (".d", file); 3171 } 3172 return; 3173 case '?' : /* with leading "." */ 3174 case '!' : /* without leading "." */ 3175 /* This insn can be conditionally executed. See if the ccfsm machinery 3176 says it should be conditionalized. 3177 If it shouldn't, we'll check the compact attribute if this insn 3178 has a short variant, which may be used depending on code size and 3179 alignment considerations. */ 3180 if (current_insn_predicate) 3181 arc_ccfsm_current.cc 3182 = get_arc_condition_code (current_insn_predicate); 3183 if (ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current)) 3184 { 3185 /* Is this insn in a delay slot sequence? */ 3186 if (!final_sequence || XVECLEN (final_sequence, 0) < 2 3187 || current_insn_predicate 3188 || CALL_P (final_sequence->insn (0)) 3189 || simplejump_p (final_sequence->insn (0))) 3190 { 3191 /* This insn isn't in a delay slot sequence, or conditionalized 3192 independently of its position in a delay slot. */ 3193 fprintf (file, "%s%s", 3194 code == '?' ? "." : "", 3195 arc_condition_codes[arc_ccfsm_current.cc]); 3196 /* If this is a jump, there are still short variants. However, 3197 only beq_s / bne_s have the same offset range as b_s, 3198 and the only short conditional returns are jeq_s and jne_s. */ 3199 if (code == '!' 3200 && (arc_ccfsm_current.cc == ARC_CC_EQ 3201 || arc_ccfsm_current.cc == ARC_CC_NE 3202 || 0 /* FIXME: check if branch in 7 bit range. */)) 3203 output_short_suffix (file); 3204 } 3205 else if (code == '!') /* Jump with delay slot. */ 3206 fputs (arc_condition_codes[arc_ccfsm_current.cc], file); 3207 else /* An Instruction in a delay slot of a jump or call. */ 3208 { 3209 rtx jump = XVECEXP (final_sequence, 0, 0); 3210 rtx insn = XVECEXP (final_sequence, 0, 1); 3211 3212 /* If the insn is annulled and is from the target path, we need 3213 to inverse the condition test. */ 3214 if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump)) 3215 { 3216 if (INSN_FROM_TARGET_P (insn)) 3217 fprintf (file, "%s%s", 3218 code == '?' ? "." : "", 3219 arc_condition_codes[ARC_INVERSE_CONDITION_CODE (arc_ccfsm_current.cc)]); 3220 else 3221 fprintf (file, "%s%s", 3222 code == '?' ? "." : "", 3223 arc_condition_codes[arc_ccfsm_current.cc]); 3224 if (arc_ccfsm_current.state == 5) 3225 arc_ccfsm_current.state = 0; 3226 } 3227 else 3228 /* This insn is executed for either path, so don't 3229 conditionalize it at all. */ 3230 output_short_suffix (file); 3231 3232 } 3233 } 3234 else 3235 output_short_suffix (file); 3236 return; 3237 case'`': 3238 /* FIXME: fold constant inside unary operator, re-recognize, and emit. */ 3239 gcc_unreachable (); 3240 case 'd' : 3241 fputs (arc_condition_codes[get_arc_condition_code (x)], file); 3242 return; 3243 case 'D' : 3244 fputs (arc_condition_codes[ARC_INVERSE_CONDITION_CODE 3245 (get_arc_condition_code (x))], 3246 file); 3247 return; 3248 case 'R' : 3249 /* Write second word of DImode or DFmode reference, 3250 register or memory. */ 3251 if (GET_CODE (x) == REG) 3252 fputs (reg_names[REGNO (x)+1], file); 3253 else if (GET_CODE (x) == MEM) 3254 { 3255 fputc ('[', file); 3256 3257 /* Handle possible auto-increment. For PRE_INC / PRE_DEC / 3258 PRE_MODIFY, we will have handled the first word already; 3259 For POST_INC / POST_DEC / POST_MODIFY, the access to the 3260 first word will be done later. In either case, the access 3261 to the first word will do the modify, and we only have 3262 to add an offset of four here. */ 3263 if (GET_CODE (XEXP (x, 0)) == PRE_INC 3264 || GET_CODE (XEXP (x, 0)) == PRE_DEC 3265 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY 3266 || GET_CODE (XEXP (x, 0)) == POST_INC 3267 || GET_CODE (XEXP (x, 0)) == POST_DEC 3268 || GET_CODE (XEXP (x, 0)) == POST_MODIFY) 3269 output_address (VOIDmode, 3270 plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 4)); 3271 else if (output_scaled) 3272 { 3273 rtx addr = XEXP (x, 0); 3274 int size = GET_MODE_SIZE (GET_MODE (x)); 3275 3276 output_address (VOIDmode, 3277 plus_constant (Pmode, XEXP (addr, 0), 3278 ((INTVAL (XEXP (addr, 1)) + 4) 3279 >> (size == 2 ? 1 : 2)))); 3280 output_scaled = 0; 3281 } 3282 else 3283 output_address (VOIDmode, 3284 plus_constant (Pmode, XEXP (x, 0), 4)); 3285 fputc (']', file); 3286 } 3287 else 3288 output_operand_lossage ("invalid operand to %%R code"); 3289 return; 3290 case 'S' : 3291 /* FIXME: remove %S option. */ 3292 break; 3293 case 'B' /* Branch or other LIMM ref - must not use sda references. */ : 3294 if (CONSTANT_P (x)) 3295 { 3296 output_addr_const (file, x); 3297 return; 3298 } 3299 break; 3300 case 'H' : 3301 case 'L' : 3302 if (GET_CODE (x) == REG) 3303 { 3304 /* L = least significant word, H = most significant word. */ 3305 if ((WORDS_BIG_ENDIAN != 0) ^ (code == 'L')) 3306 fputs (reg_names[REGNO (x)], file); 3307 else 3308 fputs (reg_names[REGNO (x)+1], file); 3309 } 3310 else if (GET_CODE (x) == CONST_INT 3311 || GET_CODE (x) == CONST_DOUBLE) 3312 { 3313 rtx first, second, word; 3314 3315 split_double (x, &first, &second); 3316 3317 if((WORDS_BIG_ENDIAN) == 0) 3318 word = (code == 'L' ? first : second); 3319 else 3320 word = (code == 'L' ? second : first); 3321 3322 fprintf (file, "0x%08" PRIx32, ((uint32_t) INTVAL (word))); 3323 } 3324 else 3325 output_operand_lossage ("invalid operand to %%H/%%L code"); 3326 return; 3327 case 'A' : 3328 { 3329 char str[30]; 3330 3331 gcc_assert (GET_CODE (x) == CONST_DOUBLE 3332 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT); 3333 3334 real_to_decimal (str, CONST_DOUBLE_REAL_VALUE (x), sizeof (str), 0, 1); 3335 fprintf (file, "%s", str); 3336 return; 3337 } 3338 case 'U' : 3339 /* Output a load/store with update indicator if appropriate. */ 3340 if (GET_CODE (x) == MEM) 3341 { 3342 rtx addr = XEXP (x, 0); 3343 switch (GET_CODE (addr)) 3344 { 3345 case PRE_INC: case PRE_DEC: case PRE_MODIFY: 3346 fputs (".a", file); break; 3347 case POST_INC: case POST_DEC: case POST_MODIFY: 3348 fputs (".ab", file); break; 3349 case PLUS: 3350 /* Are we using a scaled index? */ 3351 if (GET_CODE (XEXP (addr, 0)) == MULT) 3352 fputs (".as", file); 3353 /* Can we use a scaled offset? */ 3354 else if (CONST_INT_P (XEXP (addr, 1)) 3355 && GET_MODE_SIZE (GET_MODE (x)) > 1 3356 && (!(INTVAL (XEXP (addr, 1)) 3357 & (GET_MODE_SIZE (GET_MODE (x)) - 1) & 3)) 3358 /* Does it make a difference? */ 3359 && !SMALL_INT_RANGE(INTVAL (XEXP (addr, 1)), 3360 GET_MODE_SIZE (GET_MODE (x)) - 2, 0)) 3361 { 3362 fputs (".as", file); 3363 output_scaled = 1; 3364 } 3365 break; 3366 case REG: 3367 break; 3368 default: 3369 gcc_assert (CONSTANT_P (addr)); break; 3370 } 3371 } 3372 else 3373 output_operand_lossage ("invalid operand to %%U code"); 3374 return; 3375 case 'V' : 3376 /* Output cache bypass indicator for a load/store insn. Volatile memory 3377 refs are defined to use the cache bypass mechanism. */ 3378 if (GET_CODE (x) == MEM) 3379 { 3380 if (MEM_VOLATILE_P (x) && !TARGET_VOLATILE_CACHE_SET ) 3381 fputs (".di", file); 3382 } 3383 else 3384 output_operand_lossage ("invalid operand to %%V code"); 3385 return; 3386 /* plt code. */ 3387 case 'P': 3388 case 0 : 3389 /* Do nothing special. */ 3390 break; 3391 case 'F': 3392 fputs (reg_names[REGNO (x)]+1, file); 3393 return; 3394 case '^': 3395 /* This punctuation character is needed because label references are 3396 printed in the output template using %l. This is a front end 3397 character, and when we want to emit a '@' before it, we have to use 3398 this '^'. */ 3399 3400 fputc('@',file); 3401 return; 3402 case 'O': 3403 /* Output an operator. */ 3404 switch (GET_CODE (x)) 3405 { 3406 case PLUS: fputs ("add", file); return; 3407 case SS_PLUS: fputs ("adds", file); return; 3408 case AND: fputs ("and", file); return; 3409 case IOR: fputs ("or", file); return; 3410 case XOR: fputs ("xor", file); return; 3411 case MINUS: fputs ("sub", file); return; 3412 case SS_MINUS: fputs ("subs", file); return; 3413 case ASHIFT: fputs ("asl", file); return; 3414 case ASHIFTRT: fputs ("asr", file); return; 3415 case LSHIFTRT: fputs ("lsr", file); return; 3416 case ROTATERT: fputs ("ror", file); return; 3417 case MULT: fputs ("mpy", file); return; 3418 case ABS: fputs ("abs", file); return; /* Unconditional. */ 3419 case NEG: fputs ("neg", file); return; 3420 case SS_NEG: fputs ("negs", file); return; 3421 case NOT: fputs ("not", file); return; /* Unconditional. */ 3422 case ZERO_EXTEND: 3423 fputs ("ext", file); /* bmsk allows predication. */ 3424 goto size_suffix; 3425 case SIGN_EXTEND: /* Unconditional. */ 3426 fputs ("sex", file); 3427 size_suffix: 3428 switch (GET_MODE (XEXP (x, 0))) 3429 { 3430 case QImode: fputs ("b", file); return; 3431 case HImode: fputs ("w", file); return; 3432 default: break; 3433 } 3434 break; 3435 case SS_TRUNCATE: 3436 if (GET_MODE (x) != HImode) 3437 break; 3438 fputs ("sat16", file); 3439 default: break; 3440 } 3441 output_operand_lossage ("invalid operand to %%O code"); return; 3442 case 'o': 3443 if (GET_CODE (x) == SYMBOL_REF) 3444 { 3445 assemble_name (file, XSTR (x, 0)); 3446 return; 3447 } 3448 break; 3449 case '&': 3450 if (TARGET_ANNOTATE_ALIGN && cfun->machine->size_reason) 3451 fprintf (file, "; unalign: %d", cfun->machine->unalign); 3452 return; 3453 case '+': 3454 if (TARGET_V2) 3455 fputs ("m", file); 3456 else 3457 fputs ("h", file); 3458 return; 3459 case '_': 3460 if (TARGET_V2) 3461 fputs ("h", file); 3462 else 3463 fputs ("w", file); 3464 return; 3465 default : 3466 /* Unknown flag. */ 3467 output_operand_lossage ("invalid operand output code"); 3468 } 3469 3470 switch (GET_CODE (x)) 3471 { 3472 case REG : 3473 fputs (reg_names[REGNO (x)], file); 3474 break; 3475 case MEM : 3476 { 3477 rtx addr = XEXP (x, 0); 3478 int size = GET_MODE_SIZE (GET_MODE (x)); 3479 3480 fputc ('[', file); 3481 3482 switch (GET_CODE (addr)) 3483 { 3484 case PRE_INC: case POST_INC: 3485 output_address (VOIDmode, 3486 plus_constant (Pmode, XEXP (addr, 0), size)); break; 3487 case PRE_DEC: case POST_DEC: 3488 output_address (VOIDmode, 3489 plus_constant (Pmode, XEXP (addr, 0), -size)); 3490 break; 3491 case PRE_MODIFY: case POST_MODIFY: 3492 output_address (VOIDmode, XEXP (addr, 1)); break; 3493 case PLUS: 3494 if (output_scaled) 3495 { 3496 output_address (VOIDmode, 3497 plus_constant (Pmode, XEXP (addr, 0), 3498 (INTVAL (XEXP (addr, 1)) 3499 >> (size == 2 ? 1 : 2)))); 3500 output_scaled = 0; 3501 } 3502 else 3503 output_address (VOIDmode, addr); 3504 break; 3505 default: 3506 if (flag_pic && CONSTANT_ADDRESS_P (addr)) 3507 arc_output_pic_addr_const (file, addr, code); 3508 else 3509 output_address (VOIDmode, addr); 3510 break; 3511 } 3512 fputc (']', file); 3513 break; 3514 } 3515 case CONST_DOUBLE : 3516 /* We handle SFmode constants here as output_addr_const doesn't. */ 3517 if (GET_MODE (x) == SFmode) 3518 { 3519 long l; 3520 3521 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); 3522 fprintf (file, "0x%08lx", l); 3523 break; 3524 } 3525 /* FALLTHRU */ 3526 /* Let output_addr_const deal with it. */ 3527 default : 3528 if (flag_pic 3529 || (GET_CODE (x) == CONST 3530 && GET_CODE (XEXP (x, 0)) == UNSPEC 3531 && (XINT (XEXP (x, 0), 1) == UNSPEC_TLS_OFF 3532 || XINT (XEXP (x, 0), 1) == UNSPEC_TLS_GD)) 3533 || (GET_CODE (x) == CONST 3534 && GET_CODE (XEXP (x, 0)) == PLUS 3535 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC 3536 && (XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_TLS_OFF 3537 || XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_TLS_GD))) 3538 arc_output_pic_addr_const (file, x, code); 3539 else 3540 { 3541 /* FIXME: Dirty way to handle @var@sda+const. Shd be handled 3542 with asm_output_symbol_ref */ 3543 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS) 3544 { 3545 x = XEXP (x, 0); 3546 output_addr_const (file, XEXP (x, 0)); 3547 if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF && SYMBOL_REF_SMALL_P (XEXP (x, 0))) 3548 fprintf (file, "@sda"); 3549 3550 if (GET_CODE (XEXP (x, 1)) != CONST_INT 3551 || INTVAL (XEXP (x, 1)) >= 0) 3552 fprintf (file, "+"); 3553 output_addr_const (file, XEXP (x, 1)); 3554 } 3555 else 3556 output_addr_const (file, x); 3557 } 3558 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_SMALL_P (x)) 3559 fprintf (file, "@sda"); 3560 break; 3561 } 3562 } 3563 3564 /* Print a memory address as an operand to reference that memory location. */ 3565 3566 void 3567 arc_print_operand_address (FILE *file , rtx addr) 3568 { 3569 register rtx base, index = 0; 3570 3571 switch (GET_CODE (addr)) 3572 { 3573 case REG : 3574 fputs (reg_names[REGNO (addr)], file); 3575 break; 3576 case SYMBOL_REF : 3577 output_addr_const (file, addr); 3578 if (SYMBOL_REF_SMALL_P (addr)) 3579 fprintf (file, "@sda"); 3580 break; 3581 case PLUS : 3582 if (GET_CODE (XEXP (addr, 0)) == MULT) 3583 index = XEXP (XEXP (addr, 0), 0), base = XEXP (addr, 1); 3584 else if (CONST_INT_P (XEXP (addr, 0))) 3585 index = XEXP (addr, 0), base = XEXP (addr, 1); 3586 else 3587 base = XEXP (addr, 0), index = XEXP (addr, 1); 3588 3589 gcc_assert (OBJECT_P (base)); 3590 arc_print_operand_address (file, base); 3591 if (CONSTANT_P (base) && CONST_INT_P (index)) 3592 fputc ('+', file); 3593 else 3594 fputc (',', file); 3595 gcc_assert (OBJECT_P (index)); 3596 arc_print_operand_address (file, index); 3597 break; 3598 case CONST: 3599 { 3600 rtx c = XEXP (addr, 0); 3601 3602 if ((GET_CODE (c) == UNSPEC 3603 && (XINT (c, 1) == UNSPEC_TLS_OFF 3604 || XINT (c, 1) == UNSPEC_TLS_IE)) 3605 || (GET_CODE (c) == PLUS 3606 && GET_CODE (XEXP (c, 0)) == UNSPEC 3607 && (XINT (XEXP (c, 0), 1) == UNSPEC_TLS_OFF 3608 || XINT (XEXP (c, 0), 1) == ARC_UNSPEC_GOTOFFPC))) 3609 { 3610 arc_output_pic_addr_const (file, c, 0); 3611 break; 3612 } 3613 gcc_assert (GET_CODE (c) == PLUS); 3614 gcc_assert (GET_CODE (XEXP (c, 0)) == SYMBOL_REF); 3615 gcc_assert (GET_CODE (XEXP (c, 1)) == CONST_INT); 3616 3617 output_address (VOIDmode, XEXP (addr, 0)); 3618 3619 break; 3620 } 3621 case PRE_INC : 3622 case PRE_DEC : 3623 /* We shouldn't get here as we've lost the mode of the memory object 3624 (which says how much to inc/dec by. */ 3625 gcc_unreachable (); 3626 break; 3627 default : 3628 if (flag_pic) 3629 arc_output_pic_addr_const (file, addr, 0); 3630 else 3631 output_addr_const (file, addr); 3632 break; 3633 } 3634 } 3635 3636 /* Conditional execution support. 3637 3638 This is based on the ARM port but for now is much simpler. 3639 3640 A finite state machine takes care of noticing whether or not instructions 3641 can be conditionally executed, and thus decrease execution time and code 3642 size by deleting branch instructions. The fsm is controlled by 3643 arc_ccfsm_advance (called by arc_final_prescan_insn), and controls the 3644 actions of PRINT_OPERAND. The patterns in the .md file for the branch 3645 insns also have a hand in this. */ 3646 /* The way we leave dealing with non-anulled or annull-false delay slot 3647 insns to the consumer is awkward. */ 3648 3649 /* The state of the fsm controlling condition codes are: 3650 0: normal, do nothing special 3651 1: don't output this insn 3652 2: don't output this insn 3653 3: make insns conditional 3654 4: make insns conditional 3655 5: make insn conditional (only for outputting anulled delay slot insns) 3656 3657 special value for cfun->machine->uid_ccfsm_state: 3658 6: return with but one insn before it since function start / call 3659 3660 State transitions (state->state by whom, under what condition): 3661 0 -> 1 arc_ccfsm_advance, if insn is a conditional branch skipping over 3662 some instructions. 3663 0 -> 2 arc_ccfsm_advance, if insn is a conditional branch followed 3664 by zero or more non-jump insns and an unconditional branch with 3665 the same target label as the condbranch. 3666 1 -> 3 branch patterns, after having not output the conditional branch 3667 2 -> 4 branch patterns, after having not output the conditional branch 3668 0 -> 5 branch patterns, for anulled delay slot insn. 3669 3 -> 0 ASM_OUTPUT_INTERNAL_LABEL, if the `target' label is reached 3670 (the target label has CODE_LABEL_NUMBER equal to 3671 arc_ccfsm_target_label). 3672 4 -> 0 arc_ccfsm_advance, if `target' unconditional branch is reached 3673 3 -> 1 arc_ccfsm_advance, finding an 'else' jump skipping over some insns. 3674 5 -> 0 when outputting the delay slot insn 3675 3676 If the jump clobbers the conditions then we use states 2 and 4. 3677 3678 A similar thing can be done with conditional return insns. 3679 3680 We also handle separating branches from sets of the condition code. 3681 This is done here because knowledge of the ccfsm state is required, 3682 we may not be outputting the branch. */ 3683 3684 /* arc_final_prescan_insn calls arc_ccfsm_advance to adjust arc_ccfsm_current, 3685 before letting final output INSN. */ 3686 3687 static void 3688 arc_ccfsm_advance (rtx_insn *insn, struct arc_ccfsm *state) 3689 { 3690 /* BODY will hold the body of INSN. */ 3691 register rtx body; 3692 3693 /* This will be 1 if trying to repeat the trick (ie: do the `else' part of 3694 an if/then/else), and things need to be reversed. */ 3695 int reverse = 0; 3696 3697 /* If we start with a return insn, we only succeed if we find another one. */ 3698 int seeking_return = 0; 3699 3700 /* START_INSN will hold the insn from where we start looking. This is the 3701 first insn after the following code_label if REVERSE is true. */ 3702 rtx_insn *start_insn = insn; 3703 3704 /* Type of the jump_insn. Brcc insns don't affect ccfsm changes, 3705 since they don't rely on a cmp preceding the. */ 3706 enum attr_type jump_insn_type; 3707 3708 /* Allow -mdebug-ccfsm to turn this off so we can see how well it does. 3709 We can't do this in macro FINAL_PRESCAN_INSN because its called from 3710 final_scan_insn which has `optimize' as a local. */ 3711 if (optimize < 2 || TARGET_NO_COND_EXEC) 3712 return; 3713 3714 /* Ignore notes and labels. */ 3715 if (!INSN_P (insn)) 3716 return; 3717 body = PATTERN (insn); 3718 /* If in state 4, check if the target branch is reached, in order to 3719 change back to state 0. */ 3720 if (state->state == 4) 3721 { 3722 if (insn == state->target_insn) 3723 { 3724 state->target_insn = NULL; 3725 state->state = 0; 3726 } 3727 return; 3728 } 3729 3730 /* If in state 3, it is possible to repeat the trick, if this insn is an 3731 unconditional branch to a label, and immediately following this branch 3732 is the previous target label which is only used once, and the label this 3733 branch jumps to is not too far off. Or in other words "we've done the 3734 `then' part, see if we can do the `else' part." */ 3735 if (state->state == 3) 3736 { 3737 if (simplejump_p (insn)) 3738 { 3739 start_insn = next_nonnote_insn (start_insn); 3740 if (GET_CODE (start_insn) == BARRIER) 3741 { 3742 /* ??? Isn't this always a barrier? */ 3743 start_insn = next_nonnote_insn (start_insn); 3744 } 3745 if (GET_CODE (start_insn) == CODE_LABEL 3746 && CODE_LABEL_NUMBER (start_insn) == state->target_label 3747 && LABEL_NUSES (start_insn) == 1) 3748 reverse = TRUE; 3749 else 3750 return; 3751 } 3752 else if (GET_CODE (body) == SIMPLE_RETURN) 3753 { 3754 start_insn = next_nonnote_insn (start_insn); 3755 if (GET_CODE (start_insn) == BARRIER) 3756 start_insn = next_nonnote_insn (start_insn); 3757 if (GET_CODE (start_insn) == CODE_LABEL 3758 && CODE_LABEL_NUMBER (start_insn) == state->target_label 3759 && LABEL_NUSES (start_insn) == 1) 3760 { 3761 reverse = TRUE; 3762 seeking_return = 1; 3763 } 3764 else 3765 return; 3766 } 3767 else 3768 return; 3769 } 3770 3771 if (GET_CODE (insn) != JUMP_INSN 3772 || GET_CODE (PATTERN (insn)) == ADDR_VEC 3773 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) 3774 return; 3775 3776 /* We can't predicate BRCC or loop ends. 3777 Also, when generating PIC code, and considering a medium range call, 3778 we can't predicate the call. */ 3779 jump_insn_type = get_attr_type (insn); 3780 if (jump_insn_type == TYPE_BRCC 3781 || jump_insn_type == TYPE_BRCC_NO_DELAY_SLOT 3782 || jump_insn_type == TYPE_LOOP_END 3783 || (jump_insn_type == TYPE_CALL && !get_attr_predicable (insn))) 3784 return; 3785 3786 /* This jump might be paralleled with a clobber of the condition codes, 3787 the jump should always come first. */ 3788 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0) 3789 body = XVECEXP (body, 0, 0); 3790 3791 if (reverse 3792 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC 3793 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE)) 3794 { 3795 int insns_skipped = 0, fail = FALSE, succeed = FALSE; 3796 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */ 3797 int then_not_else = TRUE; 3798 /* Nonzero if next insn must be the target label. */ 3799 int next_must_be_target_label_p; 3800 rtx_insn *this_insn = start_insn; 3801 rtx label = 0; 3802 3803 /* Register the insn jumped to. */ 3804 if (reverse) 3805 { 3806 if (!seeking_return) 3807 label = XEXP (SET_SRC (body), 0); 3808 } 3809 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF) 3810 label = XEXP (XEXP (SET_SRC (body), 1), 0); 3811 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF) 3812 { 3813 label = XEXP (XEXP (SET_SRC (body), 2), 0); 3814 then_not_else = FALSE; 3815 } 3816 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == SIMPLE_RETURN) 3817 seeking_return = 1; 3818 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == SIMPLE_RETURN) 3819 { 3820 seeking_return = 1; 3821 then_not_else = FALSE; 3822 } 3823 else 3824 gcc_unreachable (); 3825 3826 /* If this is a non-annulled branch with a delay slot, there is 3827 no need to conditionalize the delay slot. */ 3828 if (NEXT_INSN (PREV_INSN (insn)) != insn 3829 && state->state == 0 && !INSN_ANNULLED_BRANCH_P (insn)) 3830 { 3831 this_insn = NEXT_INSN (this_insn); 3832 gcc_assert (NEXT_INSN (NEXT_INSN (PREV_INSN (start_insn))) 3833 == NEXT_INSN (this_insn)); 3834 } 3835 /* See how many insns this branch skips, and what kind of insns. If all 3836 insns are okay, and the label or unconditional branch to the same 3837 label is not too far away, succeed. */ 3838 for (insns_skipped = 0, next_must_be_target_label_p = FALSE; 3839 !fail && !succeed && insns_skipped < MAX_INSNS_SKIPPED; 3840 insns_skipped++) 3841 { 3842 rtx scanbody; 3843 3844 this_insn = next_nonnote_insn (this_insn); 3845 if (!this_insn) 3846 break; 3847 3848 if (next_must_be_target_label_p) 3849 { 3850 if (GET_CODE (this_insn) == BARRIER) 3851 continue; 3852 if (GET_CODE (this_insn) == CODE_LABEL 3853 && this_insn == label) 3854 { 3855 state->state = 1; 3856 succeed = TRUE; 3857 } 3858 else 3859 fail = TRUE; 3860 break; 3861 } 3862 3863 switch (GET_CODE (this_insn)) 3864 { 3865 case CODE_LABEL: 3866 /* Succeed if it is the target label, otherwise fail since 3867 control falls in from somewhere else. */ 3868 if (this_insn == label) 3869 { 3870 state->state = 1; 3871 succeed = TRUE; 3872 } 3873 else 3874 fail = TRUE; 3875 break; 3876 3877 case BARRIER: 3878 /* Succeed if the following insn is the target label. 3879 Otherwise fail. 3880 If return insns are used then the last insn in a function 3881 will be a barrier. */ 3882 next_must_be_target_label_p = TRUE; 3883 break; 3884 3885 case CALL_INSN: 3886 /* Can handle a call insn if there are no insns after it. 3887 IE: The next "insn" is the target label. We don't have to 3888 worry about delay slots as such insns are SEQUENCE's inside 3889 INSN's. ??? It is possible to handle such insns though. */ 3890 if (get_attr_cond (this_insn) == COND_CANUSE) 3891 next_must_be_target_label_p = TRUE; 3892 else 3893 fail = TRUE; 3894 break; 3895 3896 case JUMP_INSN: 3897 scanbody = PATTERN (this_insn); 3898 3899 /* If this is an unconditional branch to the same label, succeed. 3900 If it is to another label, do nothing. If it is conditional, 3901 fail. */ 3902 /* ??? Probably, the test for the SET and the PC are 3903 unnecessary. */ 3904 3905 if (GET_CODE (scanbody) == SET 3906 && GET_CODE (SET_DEST (scanbody)) == PC) 3907 { 3908 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF 3909 && XEXP (SET_SRC (scanbody), 0) == label && !reverse) 3910 { 3911 state->state = 2; 3912 succeed = TRUE; 3913 } 3914 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE) 3915 fail = TRUE; 3916 else if (get_attr_cond (this_insn) != COND_CANUSE) 3917 fail = TRUE; 3918 } 3919 else if (GET_CODE (scanbody) == SIMPLE_RETURN 3920 && seeking_return) 3921 { 3922 state->state = 2; 3923 succeed = TRUE; 3924 } 3925 else if (GET_CODE (scanbody) == PARALLEL) 3926 { 3927 if (get_attr_cond (this_insn) != COND_CANUSE) 3928 fail = TRUE; 3929 } 3930 break; 3931 3932 case INSN: 3933 scanbody = PATTERN (this_insn); 3934 3935 /* We can only do this with insns that can use the condition 3936 codes (and don't set them). */ 3937 if (GET_CODE (scanbody) == SET 3938 || GET_CODE (scanbody) == PARALLEL) 3939 { 3940 if (get_attr_cond (this_insn) != COND_CANUSE) 3941 fail = TRUE; 3942 } 3943 /* We can't handle other insns like sequences. */ 3944 else 3945 fail = TRUE; 3946 break; 3947 3948 default: 3949 break; 3950 } 3951 } 3952 3953 if (succeed) 3954 { 3955 if ((!seeking_return) && (state->state == 1 || reverse)) 3956 state->target_label = CODE_LABEL_NUMBER (label); 3957 else if (seeking_return || state->state == 2) 3958 { 3959 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE) 3960 { 3961 this_insn = next_nonnote_insn (this_insn); 3962 3963 gcc_assert (!this_insn || 3964 (GET_CODE (this_insn) != BARRIER 3965 && GET_CODE (this_insn) != CODE_LABEL)); 3966 } 3967 if (!this_insn) 3968 { 3969 /* Oh dear! we ran off the end, give up. */ 3970 extract_insn_cached (insn); 3971 state->state = 0; 3972 state->target_insn = NULL; 3973 return; 3974 } 3975 state->target_insn = this_insn; 3976 } 3977 else 3978 gcc_unreachable (); 3979 3980 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from 3981 what it was. */ 3982 if (!reverse) 3983 { 3984 state->cond = XEXP (SET_SRC (body), 0); 3985 state->cc = get_arc_condition_code (XEXP (SET_SRC (body), 0)); 3986 } 3987 3988 if (reverse || then_not_else) 3989 state->cc = ARC_INVERSE_CONDITION_CODE (state->cc); 3990 } 3991 3992 /* Restore recog_operand. Getting the attributes of other insns can 3993 destroy this array, but final.c assumes that it remains intact 3994 across this call; since the insn has been recognized already we 3995 call insn_extract direct. */ 3996 extract_insn_cached (insn); 3997 } 3998 } 3999 4000 /* Record that we are currently outputting label NUM with prefix PREFIX. 4001 It it's the label we're looking for, reset the ccfsm machinery. 4002 4003 Called from ASM_OUTPUT_INTERNAL_LABEL. */ 4004 4005 static void 4006 arc_ccfsm_at_label (const char *prefix, int num, struct arc_ccfsm *state) 4007 { 4008 if (state->state == 3 && state->target_label == num 4009 && !strcmp (prefix, "L")) 4010 { 4011 state->state = 0; 4012 state->target_insn = NULL; 4013 } 4014 } 4015 4016 /* We are considering a conditional branch with the condition COND. 4017 Check if we want to conditionalize a delay slot insn, and if so modify 4018 the ccfsm state accordingly. 4019 REVERSE says branch will branch when the condition is false. */ 4020 void 4021 arc_ccfsm_record_condition (rtx cond, bool reverse, rtx_insn *jump, 4022 struct arc_ccfsm *state) 4023 { 4024 rtx_insn *seq_insn = NEXT_INSN (PREV_INSN (jump)); 4025 if (!state) 4026 state = &arc_ccfsm_current; 4027 4028 gcc_assert (state->state == 0); 4029 if (seq_insn != jump) 4030 { 4031 rtx insn = XVECEXP (PATTERN (seq_insn), 0, 1); 4032 4033 if (!as_a<rtx_insn *> (insn)->deleted () 4034 && INSN_ANNULLED_BRANCH_P (jump) 4035 && (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (insn))) 4036 { 4037 state->cond = cond; 4038 state->cc = get_arc_condition_code (cond); 4039 if (!reverse) 4040 arc_ccfsm_current.cc 4041 = ARC_INVERSE_CONDITION_CODE (state->cc); 4042 rtx pat = PATTERN (insn); 4043 if (GET_CODE (pat) == COND_EXEC) 4044 gcc_assert ((INSN_FROM_TARGET_P (insn) 4045 ? ARC_INVERSE_CONDITION_CODE (state->cc) : state->cc) 4046 == get_arc_condition_code (XEXP (pat, 0))); 4047 else 4048 state->state = 5; 4049 } 4050 } 4051 } 4052 4053 /* Update *STATE as we would when we emit INSN. */ 4054 4055 static void 4056 arc_ccfsm_post_advance (rtx_insn *insn, struct arc_ccfsm *state) 4057 { 4058 enum attr_type type; 4059 4060 if (LABEL_P (insn)) 4061 arc_ccfsm_at_label ("L", CODE_LABEL_NUMBER (insn), state); 4062 else if (JUMP_P (insn) 4063 && GET_CODE (PATTERN (insn)) != ADDR_VEC 4064 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC 4065 && ((type = get_attr_type (insn)) == TYPE_BRANCH 4066 || ((type == TYPE_UNCOND_BRANCH 4067 || type == TYPE_RETURN) 4068 && ARC_CCFSM_BRANCH_DELETED_P (state)))) 4069 { 4070 if (ARC_CCFSM_BRANCH_DELETED_P (state)) 4071 ARC_CCFSM_RECORD_BRANCH_DELETED (state); 4072 else 4073 { 4074 rtx src = SET_SRC (PATTERN (insn)); 4075 arc_ccfsm_record_condition (XEXP (src, 0), XEXP (src, 1) == pc_rtx, 4076 insn, state); 4077 } 4078 } 4079 else if (arc_ccfsm_current.state == 5) 4080 arc_ccfsm_current.state = 0; 4081 } 4082 4083 /* Return true if the current insn, which is a conditional branch, is to be 4084 deleted. */ 4085 4086 bool 4087 arc_ccfsm_branch_deleted_p (void) 4088 { 4089 return ARC_CCFSM_BRANCH_DELETED_P (&arc_ccfsm_current); 4090 } 4091 4092 /* Record a branch isn't output because subsequent insns can be 4093 conditionalized. */ 4094 4095 void 4096 arc_ccfsm_record_branch_deleted (void) 4097 { 4098 ARC_CCFSM_RECORD_BRANCH_DELETED (&arc_ccfsm_current); 4099 } 4100 4101 /* During insn output, indicate if the current insn is predicated. */ 4102 4103 bool 4104 arc_ccfsm_cond_exec_p (void) 4105 { 4106 return (cfun->machine->prescan_initialized 4107 && ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current)); 4108 } 4109 4110 /* Like next_active_insn, but return NULL if we find an ADDR_(DIFF_)VEC, 4111 and look inside SEQUENCEs. */ 4112 4113 static rtx_insn * 4114 arc_next_active_insn (rtx_insn *insn, struct arc_ccfsm *statep) 4115 { 4116 rtx pat; 4117 4118 do 4119 { 4120 if (statep) 4121 arc_ccfsm_post_advance (insn, statep); 4122 insn = NEXT_INSN (insn); 4123 if (!insn || BARRIER_P (insn)) 4124 return NULL; 4125 if (statep) 4126 arc_ccfsm_advance (insn, statep); 4127 } 4128 while (NOTE_P (insn) 4129 || (cfun->machine->arc_reorg_started 4130 && LABEL_P (insn) && !label_to_alignment (insn)) 4131 || (NONJUMP_INSN_P (insn) 4132 && (GET_CODE (PATTERN (insn)) == USE 4133 || GET_CODE (PATTERN (insn)) == CLOBBER))); 4134 if (!LABEL_P (insn)) 4135 { 4136 gcc_assert (INSN_P (insn)); 4137 pat = PATTERN (insn); 4138 if (GET_CODE (pat) == ADDR_VEC || GET_CODE (pat) == ADDR_DIFF_VEC) 4139 return NULL; 4140 if (GET_CODE (pat) == SEQUENCE) 4141 return as_a <rtx_insn *> (XVECEXP (pat, 0, 0)); 4142 } 4143 return insn; 4144 } 4145 4146 /* When deciding if an insn should be output short, we want to know something 4147 about the following insns: 4148 - if another insn follows which we know we can output as a short insn 4149 before an alignment-sensitive point, we can output this insn short: 4150 the decision about the eventual alignment can be postponed. 4151 - if a to-be-aligned label comes next, we should output this insn such 4152 as to get / preserve 4-byte alignment. 4153 - if a likely branch without delay slot insn, or a call with an immediately 4154 following short insn comes next, we should out output this insn such as to 4155 get / preserve 2 mod 4 unalignment. 4156 - do the same for a not completely unlikely branch with a short insn 4157 following before any other branch / label. 4158 - in order to decide if we are actually looking at a branch, we need to 4159 call arc_ccfsm_advance. 4160 - in order to decide if we are looking at a short insn, we should know 4161 if it is conditionalized. To a first order of approximation this is 4162 the case if the state from arc_ccfsm_advance from before this insn 4163 indicates the insn is conditionalized. However, a further refinement 4164 could be to not conditionalize an insn if the destination register(s) 4165 is/are dead in the non-executed case. */ 4166 /* Return non-zero if INSN should be output as a short insn. UNALIGN is 4167 zero if the current insn is aligned to a 4-byte-boundary, two otherwise. 4168 If CHECK_ATTR is greater than 0, check the iscompact attribute first. */ 4169 4170 int 4171 arc_verify_short (rtx_insn *insn, int, int check_attr) 4172 { 4173 enum attr_iscompact iscompact; 4174 struct machine_function *machine; 4175 4176 if (check_attr > 0) 4177 { 4178 iscompact = get_attr_iscompact (insn); 4179 if (iscompact == ISCOMPACT_FALSE) 4180 return 0; 4181 } 4182 machine = cfun->machine; 4183 4184 if (machine->force_short_suffix >= 0) 4185 return machine->force_short_suffix; 4186 4187 return (get_attr_length (insn) & 2) != 0; 4188 } 4189 4190 /* When outputting an instruction (alternative) that can potentially be short, 4191 output the short suffix if the insn is in fact short, and update 4192 cfun->machine->unalign accordingly. */ 4193 4194 static void 4195 output_short_suffix (FILE *file) 4196 { 4197 rtx_insn *insn = current_output_insn; 4198 4199 if (arc_verify_short (insn, cfun->machine->unalign, 1)) 4200 { 4201 fprintf (file, "_s"); 4202 cfun->machine->unalign ^= 2; 4203 } 4204 /* Restore recog_operand. */ 4205 extract_insn_cached (insn); 4206 } 4207 4208 /* Implement FINAL_PRESCAN_INSN. */ 4209 4210 void 4211 arc_final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED, 4212 int noperands ATTRIBUTE_UNUSED) 4213 { 4214 if (TARGET_DUMPISIZE) 4215 fprintf (asm_out_file, "\n; at %04x\n", INSN_ADDRESSES (INSN_UID (insn))); 4216 4217 /* Output a nop if necessary to prevent a hazard. 4218 Don't do this for delay slots: inserting a nop would 4219 alter semantics, and the only time we would find a hazard is for a 4220 call function result - and in that case, the hazard is spurious to 4221 start with. */ 4222 if (PREV_INSN (insn) 4223 && PREV_INSN (NEXT_INSN (insn)) == insn 4224 && arc_hazard (prev_real_insn (insn), insn)) 4225 { 4226 current_output_insn = 4227 emit_insn_before (gen_nop (), NEXT_INSN (PREV_INSN (insn))); 4228 final_scan_insn (current_output_insn, asm_out_file, optimize, 1, NULL); 4229 current_output_insn = insn; 4230 } 4231 /* Restore extraction data which might have been clobbered by arc_hazard. */ 4232 extract_constrain_insn_cached (insn); 4233 4234 if (!cfun->machine->prescan_initialized) 4235 { 4236 /* Clear lingering state from branch shortening. */ 4237 memset (&arc_ccfsm_current, 0, sizeof arc_ccfsm_current); 4238 cfun->machine->prescan_initialized = 1; 4239 } 4240 arc_ccfsm_advance (insn, &arc_ccfsm_current); 4241 4242 cfun->machine->size_reason = 0; 4243 } 4244 4245 /* Given FROM and TO register numbers, say whether this elimination is allowed. 4246 Frame pointer elimination is automatically handled. 4247 4248 All eliminations are permissible. If we need a frame 4249 pointer, we must eliminate ARG_POINTER_REGNUM into 4250 FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */ 4251 4252 static bool 4253 arc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) 4254 { 4255 return to == FRAME_POINTER_REGNUM || !arc_frame_pointer_required (); 4256 } 4257 4258 /* Define the offset between two registers, one to be eliminated, and 4259 the other its replacement, at the start of a routine. */ 4260 4261 int 4262 arc_initial_elimination_offset (int from, int to) 4263 { 4264 if (! cfun->machine->frame_info.initialized) 4265 arc_compute_frame_size (get_frame_size ()); 4266 4267 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM) 4268 { 4269 return (cfun->machine->frame_info.extra_size 4270 + cfun->machine->frame_info.reg_size); 4271 } 4272 4273 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 4274 { 4275 return (cfun->machine->frame_info.total_size 4276 - cfun->machine->frame_info.pretend_size); 4277 } 4278 4279 if ((from == FRAME_POINTER_REGNUM) && (to == STACK_POINTER_REGNUM)) 4280 { 4281 return (cfun->machine->frame_info.total_size 4282 - (cfun->machine->frame_info.pretend_size 4283 + cfun->machine->frame_info.extra_size 4284 + cfun->machine->frame_info.reg_size)); 4285 } 4286 4287 gcc_unreachable (); 4288 } 4289 4290 static bool 4291 arc_frame_pointer_required (void) 4292 { 4293 return cfun->calls_alloca; 4294 } 4295 4296 4297 /* Return the destination address of a branch. */ 4298 4299 int 4300 branch_dest (rtx branch) 4301 { 4302 rtx pat = PATTERN (branch); 4303 rtx dest = (GET_CODE (pat) == PARALLEL 4304 ? SET_SRC (XVECEXP (pat, 0, 0)) : SET_SRC (pat)); 4305 int dest_uid; 4306 4307 if (GET_CODE (dest) == IF_THEN_ELSE) 4308 dest = XEXP (dest, XEXP (dest, 1) == pc_rtx ? 2 : 1); 4309 4310 dest = XEXP (dest, 0); 4311 dest_uid = INSN_UID (dest); 4312 4313 return INSN_ADDRESSES (dest_uid); 4314 } 4315 4316 4317 /* Implement TARGET_ENCODE_SECTION_INFO hook. */ 4318 4319 static void 4320 arc_encode_section_info (tree decl, rtx rtl, int first) 4321 { 4322 /* For sdata, SYMBOL_FLAG_LOCAL and SYMBOL_FLAG_FUNCTION. 4323 This clears machine specific flags, so has to come first. */ 4324 default_encode_section_info (decl, rtl, first); 4325 4326 /* Check if it is a function, and whether it has the 4327 [long/medium/short]_call attribute specified. */ 4328 if (TREE_CODE (decl) == FUNCTION_DECL) 4329 { 4330 rtx symbol = XEXP (rtl, 0); 4331 int flags = SYMBOL_REF_FLAGS (symbol); 4332 4333 tree attr = (TREE_TYPE (decl) != error_mark_node 4334 ? TYPE_ATTRIBUTES (TREE_TYPE (decl)) : NULL_TREE); 4335 tree long_call_attr = lookup_attribute ("long_call", attr); 4336 tree medium_call_attr = lookup_attribute ("medium_call", attr); 4337 tree short_call_attr = lookup_attribute ("short_call", attr); 4338 4339 if (long_call_attr != NULL_TREE) 4340 flags |= SYMBOL_FLAG_LONG_CALL; 4341 else if (medium_call_attr != NULL_TREE) 4342 flags |= SYMBOL_FLAG_MEDIUM_CALL; 4343 else if (short_call_attr != NULL_TREE) 4344 flags |= SYMBOL_FLAG_SHORT_CALL; 4345 4346 SYMBOL_REF_FLAGS (symbol) = flags; 4347 } 4348 else if (TREE_CODE (decl) == VAR_DECL) 4349 { 4350 rtx symbol = XEXP (rtl, 0); 4351 4352 tree attr = (TREE_TYPE (decl) != error_mark_node 4353 ? DECL_ATTRIBUTES (decl) : NULL_TREE); 4354 4355 tree sec_attr = lookup_attribute ("section", attr); 4356 if (sec_attr) 4357 { 4358 const char *sec_name 4359 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (sec_attr))); 4360 if (strcmp (sec_name, ".cmem") == 0 4361 || strcmp (sec_name, ".cmem_shared") == 0 4362 || strcmp (sec_name, ".cmem_private") == 0) 4363 SYMBOL_REF_FLAGS (symbol) |= SYMBOL_FLAG_CMEM; 4364 } 4365 } 4366 } 4367 4368 /* This is how to output a definition of an internal numbered label where 4369 PREFIX is the class of label and NUM is the number within the class. */ 4370 4371 static void arc_internal_label (FILE *stream, const char *prefix, unsigned long labelno) 4372 { 4373 if (cfun) 4374 arc_ccfsm_at_label (prefix, labelno, &arc_ccfsm_current); 4375 default_internal_label (stream, prefix, labelno); 4376 } 4377 4378 /* Set the cpu type and print out other fancy things, 4379 at the top of the file. */ 4380 4381 static void arc_file_start (void) 4382 { 4383 default_file_start (); 4384 fprintf (asm_out_file, "\t.cpu %s\n", arc_cpu_string); 4385 } 4386 4387 /* Cost functions. */ 4388 4389 /* Compute a (partial) cost for rtx X. Return true if the complete 4390 cost has been computed, and false if subexpressions should be 4391 scanned. In either case, *TOTAL contains the cost result. */ 4392 4393 static bool 4394 arc_rtx_costs (rtx x, machine_mode mode, int outer_code, 4395 int opno ATTRIBUTE_UNUSED, int *total, bool speed) 4396 { 4397 int code = GET_CODE (x); 4398 4399 switch (code) 4400 { 4401 /* Small integers are as cheap as registers. */ 4402 case CONST_INT: 4403 { 4404 bool nolimm = false; /* Can we do without long immediate? */ 4405 bool fast = false; /* Is the result available immediately? */ 4406 bool condexec = false; /* Does this allow conditiobnal execution? */ 4407 bool compact = false; /* Is a 16 bit opcode available? */ 4408 /* CONDEXEC also implies that we can have an unconditional 4409 3-address operation. */ 4410 4411 nolimm = compact = condexec = false; 4412 if (UNSIGNED_INT6 (INTVAL (x))) 4413 nolimm = condexec = compact = true; 4414 else 4415 { 4416 if (SMALL_INT (INTVAL (x))) 4417 nolimm = fast = true; 4418 switch (outer_code) 4419 { 4420 case AND: /* bclr, bmsk, ext[bw] */ 4421 if (satisfies_constraint_Ccp (x) /* bclr */ 4422 || satisfies_constraint_C1p (x) /* bmsk */) 4423 nolimm = fast = condexec = compact = true; 4424 break; 4425 case IOR: /* bset */ 4426 if (satisfies_constraint_C0p (x)) /* bset */ 4427 nolimm = fast = condexec = compact = true; 4428 break; 4429 case XOR: 4430 if (satisfies_constraint_C0p (x)) /* bxor */ 4431 nolimm = fast = condexec = true; 4432 break; 4433 case SET: 4434 if (satisfies_constraint_Crr (x)) /* ror b,u6 */ 4435 nolimm = true; 4436 default: 4437 break; 4438 } 4439 } 4440 /* FIXME: Add target options to attach a small cost if 4441 condexec / compact is not true. */ 4442 if (nolimm) 4443 { 4444 *total = 0; 4445 return true; 4446 } 4447 } 4448 /* FALLTHRU */ 4449 4450 /* 4 byte values can be fetched as immediate constants - 4451 let's give that the cost of an extra insn. */ 4452 case CONST: 4453 case LABEL_REF: 4454 case SYMBOL_REF: 4455 *total = COSTS_N_INSNS (1); 4456 return true; 4457 4458 case CONST_DOUBLE: 4459 { 4460 rtx first, second; 4461 4462 if (TARGET_DPFP) 4463 { 4464 *total = COSTS_N_INSNS (1); 4465 return true; 4466 } 4467 split_double (x, &first, &second); 4468 *total = COSTS_N_INSNS (!SMALL_INT (INTVAL (first)) 4469 + !SMALL_INT (INTVAL (second))); 4470 return true; 4471 } 4472 4473 /* Encourage synth_mult to find a synthetic multiply when reasonable. 4474 If we need more than 12 insns to do a multiply, then go out-of-line, 4475 since the call overhead will be < 10% of the cost of the multiply. */ 4476 case ASHIFT: 4477 case ASHIFTRT: 4478 case LSHIFTRT: 4479 if (TARGET_BARREL_SHIFTER) 4480 { 4481 /* If we want to shift a constant, we need a LIMM. */ 4482 /* ??? when the optimizers want to know if a constant should be 4483 hoisted, they ask for the cost of the constant. OUTER_CODE is 4484 insufficient context for shifts since we don't know which operand 4485 we are looking at. */ 4486 if (CONSTANT_P (XEXP (x, 0))) 4487 { 4488 *total += (COSTS_N_INSNS (2) 4489 + rtx_cost (XEXP (x, 1), mode, (enum rtx_code) code, 4490 0, speed)); 4491 return true; 4492 } 4493 *total = COSTS_N_INSNS (1); 4494 } 4495 else if (GET_CODE (XEXP (x, 1)) != CONST_INT) 4496 *total = COSTS_N_INSNS (16); 4497 else 4498 { 4499 *total = COSTS_N_INSNS (INTVAL (XEXP ((x), 1))); 4500 /* ??? want_to_gcse_p can throw negative shift counts at us, 4501 and then panics when it gets a negative cost as result. 4502 Seen for gcc.c-torture/compile/20020710-1.c -Os . */ 4503 if (*total < 0) 4504 *total = 0; 4505 } 4506 return false; 4507 4508 case DIV: 4509 case UDIV: 4510 if (speed) 4511 *total = COSTS_N_INSNS(30); 4512 else 4513 *total = COSTS_N_INSNS(1); 4514 return false; 4515 4516 case MULT: 4517 if ((TARGET_DPFP && GET_MODE (x) == DFmode)) 4518 *total = COSTS_N_INSNS (1); 4519 else if (speed) 4520 *total= arc_multcost; 4521 /* We do not want synth_mult sequences when optimizing 4522 for size. */ 4523 else if (TARGET_MUL64_SET || TARGET_ARC700_MPY) 4524 *total = COSTS_N_INSNS (1); 4525 else 4526 *total = COSTS_N_INSNS (2); 4527 return false; 4528 case PLUS: 4529 if (GET_CODE (XEXP (x, 0)) == MULT 4530 && _2_4_8_operand (XEXP (XEXP (x, 0), 1), VOIDmode)) 4531 { 4532 *total += (rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed) 4533 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, PLUS, 1, speed)); 4534 return true; 4535 } 4536 return false; 4537 case MINUS: 4538 if (GET_CODE (XEXP (x, 1)) == MULT 4539 && _2_4_8_operand (XEXP (XEXP (x, 1), 1), VOIDmode)) 4540 { 4541 *total += (rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed) 4542 + rtx_cost (XEXP (XEXP (x, 1), 0), mode, PLUS, 1, speed)); 4543 return true; 4544 } 4545 return false; 4546 case COMPARE: 4547 { 4548 rtx op0 = XEXP (x, 0); 4549 rtx op1 = XEXP (x, 1); 4550 4551 if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx 4552 && XEXP (op0, 1) == const1_rtx) 4553 { 4554 /* btst / bbit0 / bbit1: 4555 Small integers and registers are free; everything else can 4556 be put in a register. */ 4557 mode = GET_MODE (XEXP (op0, 0)); 4558 *total = (rtx_cost (XEXP (op0, 0), mode, SET, 1, speed) 4559 + rtx_cost (XEXP (op0, 2), mode, SET, 1, speed)); 4560 return true; 4561 } 4562 if (GET_CODE (op0) == AND && op1 == const0_rtx 4563 && satisfies_constraint_C1p (XEXP (op0, 1))) 4564 { 4565 /* bmsk.f */ 4566 *total = rtx_cost (XEXP (op0, 0), VOIDmode, SET, 1, speed); 4567 return true; 4568 } 4569 /* add.f */ 4570 if (GET_CODE (op1) == NEG) 4571 { 4572 /* op0 might be constant, the inside of op1 is rather 4573 unlikely to be so. So swapping the operands might lower 4574 the cost. */ 4575 mode = GET_MODE (op0); 4576 *total = (rtx_cost (op0, mode, PLUS, 1, speed) 4577 + rtx_cost (XEXP (op1, 0), mode, PLUS, 0, speed)); 4578 } 4579 return false; 4580 } 4581 case EQ: case NE: 4582 if (outer_code == IF_THEN_ELSE 4583 && GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT 4584 && XEXP (x, 1) == const0_rtx 4585 && XEXP (XEXP (x, 0), 1) == const1_rtx) 4586 { 4587 /* btst / bbit0 / bbit1: 4588 Small integers and registers are free; everything else can 4589 be put in a register. */ 4590 rtx op0 = XEXP (x, 0); 4591 4592 mode = GET_MODE (XEXP (op0, 0)); 4593 *total = (rtx_cost (XEXP (op0, 0), mode, SET, 1, speed) 4594 + rtx_cost (XEXP (op0, 2), mode, SET, 1, speed)); 4595 return true; 4596 } 4597 /* Fall through. */ 4598 /* scc_insn expands into two insns. */ 4599 case GTU: case GEU: case LEU: 4600 if (mode == SImode) 4601 *total += COSTS_N_INSNS (1); 4602 return false; 4603 case LTU: /* might use adc. */ 4604 if (mode == SImode) 4605 *total += COSTS_N_INSNS (1) - 1; 4606 return false; 4607 default: 4608 return false; 4609 } 4610 } 4611 4612 /* Helper used by arc_legitimate_pc_offset_p. */ 4613 4614 static bool 4615 arc_needs_pcl_p (rtx x) 4616 { 4617 register const char *fmt; 4618 register int i, j; 4619 4620 if ((GET_CODE (x) == UNSPEC) 4621 && (XVECLEN (x, 0) == 1) 4622 && (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF)) 4623 switch (XINT (x, 1)) 4624 { 4625 case ARC_UNSPEC_GOT: 4626 case ARC_UNSPEC_GOTOFFPC: 4627 case UNSPEC_TLS_GD: 4628 case UNSPEC_TLS_IE: 4629 return true; 4630 default: 4631 break; 4632 } 4633 4634 fmt = GET_RTX_FORMAT (GET_CODE (x)); 4635 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 4636 { 4637 if (fmt[i] == 'e') 4638 { 4639 if (arc_needs_pcl_p (XEXP (x, i))) 4640 return true; 4641 } 4642 else if (fmt[i] == 'E') 4643 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 4644 if (arc_needs_pcl_p (XVECEXP (x, i, j))) 4645 return true; 4646 } 4647 4648 return false; 4649 } 4650 4651 /* Return true if ADDR is an address that needs to be expressed as an 4652 explicit sum of pcl + offset. */ 4653 4654 bool 4655 arc_legitimate_pc_offset_p (rtx addr) 4656 { 4657 if (GET_CODE (addr) != CONST) 4658 return false; 4659 4660 return arc_needs_pcl_p (addr); 4661 } 4662 4663 /* Return true if ADDR is a valid pic address. 4664 A valid pic address on arc should look like 4665 const (unspec (SYMBOL_REF/LABEL) (ARC_UNSPEC_GOTOFF/ARC_UNSPEC_GOT)) */ 4666 4667 bool 4668 arc_legitimate_pic_addr_p (rtx addr) 4669 { 4670 if (GET_CODE (addr) == LABEL_REF) 4671 return true; 4672 if (GET_CODE (addr) != CONST) 4673 return false; 4674 4675 addr = XEXP (addr, 0); 4676 4677 4678 if (GET_CODE (addr) == PLUS) 4679 { 4680 if (GET_CODE (XEXP (addr, 1)) != CONST_INT) 4681 return false; 4682 addr = XEXP (addr, 0); 4683 } 4684 4685 if (GET_CODE (addr) != UNSPEC 4686 || XVECLEN (addr, 0) != 1) 4687 return false; 4688 4689 /* Must be one of @GOT, @GOTOFF, @GOTOFFPC, @tlsgd, tlsie. */ 4690 if (XINT (addr, 1) != ARC_UNSPEC_GOT 4691 && XINT (addr, 1) != ARC_UNSPEC_GOTOFF 4692 && XINT (addr, 1) != ARC_UNSPEC_GOTOFFPC 4693 && XINT (addr, 1) != UNSPEC_TLS_GD 4694 && XINT (addr, 1) != UNSPEC_TLS_IE) 4695 return false; 4696 4697 if (GET_CODE (XVECEXP (addr, 0, 0)) != SYMBOL_REF 4698 && GET_CODE (XVECEXP (addr, 0, 0)) != LABEL_REF) 4699 return false; 4700 4701 return true; 4702 } 4703 4704 4705 4706 /* Return true if OP contains a symbol reference. */ 4707 4708 static bool 4709 symbolic_reference_mentioned_p (rtx op) 4710 { 4711 register const char *fmt; 4712 register int i; 4713 4714 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) 4715 return true; 4716 4717 fmt = GET_RTX_FORMAT (GET_CODE (op)); 4718 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) 4719 { 4720 if (fmt[i] == 'E') 4721 { 4722 register int j; 4723 4724 for (j = XVECLEN (op, i) - 1; j >= 0; j--) 4725 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) 4726 return true; 4727 } 4728 4729 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) 4730 return true; 4731 } 4732 4733 return false; 4734 } 4735 4736 /* Return true if OP contains a SYMBOL_REF that is not wrapped in an unspec. 4737 If SKIP_LOCAL is true, skip symbols that bind locally. 4738 This is used further down in this file, and, without SKIP_LOCAL, 4739 in the addsi3 / subsi3 expanders when generating PIC code. */ 4740 4741 bool 4742 arc_raw_symbolic_reference_mentioned_p (rtx op, bool skip_local) 4743 { 4744 register const char *fmt; 4745 register int i; 4746 4747 if (GET_CODE(op) == UNSPEC) 4748 return false; 4749 4750 if (GET_CODE (op) == SYMBOL_REF) 4751 { 4752 if (SYMBOL_REF_TLS_MODEL (op)) 4753 return true; 4754 if (!flag_pic) 4755 return false; 4756 tree decl = SYMBOL_REF_DECL (op); 4757 return !skip_local || !decl || !default_binds_local_p (decl); 4758 } 4759 4760 fmt = GET_RTX_FORMAT (GET_CODE (op)); 4761 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) 4762 { 4763 if (fmt[i] == 'E') 4764 { 4765 register int j; 4766 4767 for (j = XVECLEN (op, i) - 1; j >= 0; j--) 4768 if (arc_raw_symbolic_reference_mentioned_p (XVECEXP (op, i, j), 4769 skip_local)) 4770 return true; 4771 } 4772 4773 else if (fmt[i] == 'e' 4774 && arc_raw_symbolic_reference_mentioned_p (XEXP (op, i), 4775 skip_local)) 4776 return true; 4777 } 4778 4779 return false; 4780 } 4781 4782 /* Get the thread pointer. */ 4783 4784 static rtx 4785 arc_get_tp (void) 4786 { 4787 /* If arc_tp_regno has been set, we can use that hard register 4788 directly as a base register. */ 4789 if (arc_tp_regno != -1) 4790 return gen_rtx_REG (Pmode, arc_tp_regno); 4791 4792 /* Otherwise, call __read_tp. Copy the result to a pseudo to avoid 4793 conflicts with function arguments / results. */ 4794 rtx reg = gen_reg_rtx (Pmode); 4795 emit_insn (gen_tls_load_tp_soft ()); 4796 emit_move_insn (reg, gen_rtx_REG (Pmode, R0_REG)); 4797 return reg; 4798 } 4799 4800 /* Helper to be used by TLS Global dynamic model. */ 4801 4802 static rtx 4803 arc_emit_call_tls_get_addr (rtx sym, int reloc, rtx eqv) 4804 { 4805 rtx r0 = gen_rtx_REG (Pmode, R0_REG); 4806 rtx call_fusage = NULL_RTX; 4807 4808 start_sequence (); 4809 4810 rtx x = arc_unspec_offset (sym, reloc); 4811 emit_move_insn (r0, x); 4812 use_reg (&call_fusage, r0); 4813 4814 gcc_assert (reloc == UNSPEC_TLS_GD); 4815 rtx call_insn = emit_call_insn (gen_tls_gd_get_addr (sym)); 4816 /* Should we set RTL_CONST_CALL_P? We read memory, but not in a 4817 way that the application should care. */ 4818 RTL_PURE_CALL_P (call_insn) = 1; 4819 add_function_usage_to (call_insn, call_fusage); 4820 4821 rtx_insn *insns = get_insns (); 4822 end_sequence (); 4823 4824 rtx dest = gen_reg_rtx (Pmode); 4825 emit_libcall_block (insns, dest, r0, eqv); 4826 return dest; 4827 } 4828 4829 #define DTPOFF_ZERO_SYM ".tdata" 4830 4831 /* Return a legitimized address for ADDR, 4832 which is a SYMBOL_REF with tls_model MODEL. */ 4833 4834 static rtx 4835 arc_legitimize_tls_address (rtx addr, enum tls_model model) 4836 { 4837 if (!flag_pic && model == TLS_MODEL_LOCAL_DYNAMIC) 4838 model = TLS_MODEL_LOCAL_EXEC; 4839 4840 switch (model) 4841 { 4842 case TLS_MODEL_LOCAL_DYNAMIC: 4843 rtx base; 4844 tree decl; 4845 const char *base_name; 4846 rtvec v; 4847 4848 decl = SYMBOL_REF_DECL (addr); 4849 base_name = DTPOFF_ZERO_SYM; 4850 if (decl && bss_initializer_p (decl)) 4851 base_name = ".tbss"; 4852 4853 base = gen_rtx_SYMBOL_REF (Pmode, base_name); 4854 if (strcmp (base_name, DTPOFF_ZERO_SYM) == 0) 4855 { 4856 if (!flag_pic) 4857 goto local_exec; 4858 v = gen_rtvec (1, addr); 4859 } 4860 else 4861 v = gen_rtvec (2, addr, base); 4862 addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_TLS_OFF); 4863 addr = gen_rtx_CONST (Pmode, addr); 4864 base = arc_legitimize_tls_address (base, TLS_MODEL_GLOBAL_DYNAMIC); 4865 return gen_rtx_PLUS (Pmode, force_reg (Pmode, base), addr); 4866 4867 case TLS_MODEL_GLOBAL_DYNAMIC: 4868 return arc_emit_call_tls_get_addr (addr, UNSPEC_TLS_GD, addr); 4869 4870 case TLS_MODEL_INITIAL_EXEC: 4871 addr = arc_unspec_offset (addr, UNSPEC_TLS_IE); 4872 addr = copy_to_mode_reg (Pmode, gen_const_mem (Pmode, addr)); 4873 return gen_rtx_PLUS (Pmode, arc_get_tp (), addr); 4874 4875 case TLS_MODEL_LOCAL_EXEC: 4876 local_exec: 4877 addr = arc_unspec_offset (addr, UNSPEC_TLS_OFF); 4878 return gen_rtx_PLUS (Pmode, arc_get_tp (), addr); 4879 default: 4880 gcc_unreachable (); 4881 } 4882 } 4883 4884 /* Legitimize a pic address reference in ORIG. 4885 The return value is the legitimated address. 4886 If OLDX is non-zero, it is the target to assign the address to first. */ 4887 4888 static rtx 4889 arc_legitimize_pic_address (rtx orig, rtx oldx) 4890 { 4891 rtx addr = orig; 4892 rtx pat = orig; 4893 rtx base; 4894 4895 if (oldx == orig) 4896 oldx = NULL; 4897 4898 if (GET_CODE (addr) == LABEL_REF) 4899 ; /* Do nothing. */ 4900 else if (GET_CODE (addr) == SYMBOL_REF) 4901 { 4902 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr); 4903 if (model != 0) 4904 return arc_legitimize_tls_address (addr, model); 4905 else if (!flag_pic) 4906 return orig; 4907 else if (CONSTANT_POOL_ADDRESS_P (addr) || SYMBOL_REF_LOCAL_P (addr)) 4908 return arc_unspec_offset (addr, ARC_UNSPEC_GOTOFFPC); 4909 4910 /* This symbol must be referenced via a load from the Global 4911 Offset Table (@GOTPC). */ 4912 pat = arc_unspec_offset (addr, ARC_UNSPEC_GOT); 4913 pat = gen_const_mem (Pmode, pat); 4914 4915 if (oldx == NULL) 4916 oldx = gen_reg_rtx (Pmode); 4917 4918 emit_move_insn (oldx, pat); 4919 pat = oldx; 4920 } 4921 else 4922 { 4923 if (GET_CODE (addr) == CONST) 4924 { 4925 addr = XEXP (addr, 0); 4926 if (GET_CODE (addr) == UNSPEC) 4927 { 4928 /* Check that the unspec is one of the ones we generate? */ 4929 return orig; 4930 } 4931 /* fwprop is placing in the REG_EQUIV notes constant pic 4932 unspecs expressions. Then, loop may use these notes for 4933 optimizations resulting in complex patterns that are not 4934 supported by the current implementation. The following 4935 two if-cases are simplifying the complex patters to 4936 simpler ones. */ 4937 else if (GET_CODE (addr) == MINUS) 4938 { 4939 rtx op0 = XEXP (addr, 0); 4940 rtx op1 = XEXP (addr, 1); 4941 gcc_assert (oldx); 4942 gcc_assert (GET_CODE (op1) == UNSPEC); 4943 4944 emit_move_insn (oldx, 4945 gen_rtx_CONST (SImode, 4946 arc_legitimize_pic_address (op1, 4947 NULL_RTX))); 4948 emit_insn (gen_rtx_SET (oldx, gen_rtx_MINUS (SImode, op0, oldx))); 4949 return oldx; 4950 4951 } 4952 else if (GET_CODE (addr) != PLUS) 4953 { 4954 rtx tmp = XEXP (addr, 0); 4955 enum rtx_code code = GET_CODE (addr); 4956 4957 /* It only works for UNARY operations. */ 4958 gcc_assert (UNARY_P (addr)); 4959 gcc_assert (GET_CODE (tmp) == UNSPEC); 4960 gcc_assert (oldx); 4961 4962 emit_move_insn 4963 (oldx, 4964 gen_rtx_CONST (SImode, 4965 arc_legitimize_pic_address (tmp, 4966 NULL_RTX))); 4967 4968 emit_insn (gen_rtx_SET (oldx, 4969 gen_rtx_fmt_ee (code, SImode, 4970 oldx, const0_rtx))); 4971 4972 return oldx; 4973 } 4974 else 4975 { 4976 gcc_assert (GET_CODE (addr) == PLUS); 4977 if (GET_CODE (XEXP (addr, 0)) == UNSPEC) 4978 return orig; 4979 } 4980 } 4981 4982 if (GET_CODE (addr) == PLUS) 4983 { 4984 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); 4985 4986 base = arc_legitimize_pic_address (op0, oldx); 4987 pat = arc_legitimize_pic_address (op1, 4988 base == oldx ? NULL_RTX : oldx); 4989 4990 if (base == op0 && pat == op1) 4991 return orig; 4992 4993 if (GET_CODE (pat) == CONST_INT) 4994 pat = plus_constant (Pmode, base, INTVAL (pat)); 4995 else 4996 { 4997 if (GET_CODE (pat) == PLUS && CONSTANT_P (XEXP (pat, 1))) 4998 { 4999 base = gen_rtx_PLUS (Pmode, base, XEXP (pat, 0)); 5000 pat = XEXP (pat, 1); 5001 } 5002 pat = gen_rtx_PLUS (Pmode, base, pat); 5003 } 5004 } 5005 } 5006 5007 return pat; 5008 } 5009 5010 /* Output address constant X to FILE, taking PIC into account. */ 5011 5012 void 5013 arc_output_pic_addr_const (FILE * file, rtx x, int code) 5014 { 5015 char buf[256]; 5016 5017 restart: 5018 switch (GET_CODE (x)) 5019 { 5020 case PC: 5021 if (flag_pic) 5022 putc ('.', file); 5023 else 5024 gcc_unreachable (); 5025 break; 5026 5027 case SYMBOL_REF: 5028 output_addr_const (file, x); 5029 5030 /* Local functions do not get references through the PLT. */ 5031 if (code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) 5032 fputs ("@plt", file); 5033 break; 5034 5035 case LABEL_REF: 5036 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (XEXP (x, 0))); 5037 assemble_name (file, buf); 5038 break; 5039 5040 case CODE_LABEL: 5041 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); 5042 assemble_name (file, buf); 5043 break; 5044 5045 case CONST_INT: 5046 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 5047 break; 5048 5049 case CONST: 5050 arc_output_pic_addr_const (file, XEXP (x, 0), code); 5051 break; 5052 5053 case CONST_DOUBLE: 5054 if (GET_MODE (x) == VOIDmode) 5055 { 5056 /* We can use %d if the number is one word and positive. */ 5057 if (CONST_DOUBLE_HIGH (x)) 5058 fprintf (file, HOST_WIDE_INT_PRINT_DOUBLE_HEX, 5059 CONST_DOUBLE_HIGH (x), CONST_DOUBLE_LOW (x)); 5060 else if (CONST_DOUBLE_LOW (x) < 0) 5061 fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (x)); 5062 else 5063 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x)); 5064 } 5065 else 5066 /* We can't handle floating point constants; 5067 PRINT_OPERAND must handle them. */ 5068 output_operand_lossage ("floating constant misused"); 5069 break; 5070 5071 case PLUS: 5072 /* FIXME: Not needed here. */ 5073 /* Some assemblers need integer constants to appear last (eg masm). */ 5074 if (GET_CODE (XEXP (x, 0)) == CONST_INT) 5075 { 5076 arc_output_pic_addr_const (file, XEXP (x, 1), code); 5077 fprintf (file, "+"); 5078 arc_output_pic_addr_const (file, XEXP (x, 0), code); 5079 } 5080 else if (GET_CODE (XEXP (x, 1)) == CONST_INT) 5081 { 5082 arc_output_pic_addr_const (file, XEXP (x, 0), code); 5083 if (INTVAL (XEXP (x, 1)) >= 0) 5084 fprintf (file, "+"); 5085 arc_output_pic_addr_const (file, XEXP (x, 1), code); 5086 } 5087 else 5088 gcc_unreachable(); 5089 break; 5090 5091 case MINUS: 5092 /* Avoid outputting things like x-x or x+5-x, 5093 since some assemblers can't handle that. */ 5094 x = simplify_subtraction (x); 5095 if (GET_CODE (x) != MINUS) 5096 goto restart; 5097 5098 arc_output_pic_addr_const (file, XEXP (x, 0), code); 5099 fprintf (file, "-"); 5100 if (GET_CODE (XEXP (x, 1)) == CONST_INT 5101 && INTVAL (XEXP (x, 1)) < 0) 5102 { 5103 fprintf (file, "("); 5104 arc_output_pic_addr_const (file, XEXP (x, 1), code); 5105 fprintf (file, ")"); 5106 } 5107 else 5108 arc_output_pic_addr_const (file, XEXP (x, 1), code); 5109 break; 5110 5111 case ZERO_EXTEND: 5112 case SIGN_EXTEND: 5113 arc_output_pic_addr_const (file, XEXP (x, 0), code); 5114 break; 5115 5116 5117 case UNSPEC: 5118 const char *suffix; 5119 bool pcrel; pcrel = false; 5120 rtx base; base = NULL; 5121 gcc_assert (XVECLEN (x, 0) >= 1); 5122 switch (XINT (x, 1)) 5123 { 5124 case ARC_UNSPEC_GOT: 5125 suffix = "@gotpc", pcrel = true; 5126 break; 5127 case ARC_UNSPEC_GOTOFF: 5128 suffix = "@gotoff"; 5129 break; 5130 case ARC_UNSPEC_GOTOFFPC: 5131 suffix = "@pcl", pcrel = true; 5132 break; 5133 case ARC_UNSPEC_PLT: 5134 suffix = "@plt"; 5135 break; 5136 case UNSPEC_TLS_GD: 5137 suffix = "@tlsgd", pcrel = true; 5138 break; 5139 case UNSPEC_TLS_IE: 5140 suffix = "@tlsie", pcrel = true; 5141 break; 5142 case UNSPEC_TLS_OFF: 5143 if (XVECLEN (x, 0) == 2) 5144 base = XVECEXP (x, 0, 1); 5145 if (SYMBOL_REF_TLS_MODEL (XVECEXP (x, 0, 0)) == TLS_MODEL_LOCAL_EXEC 5146 || (!flag_pic && !base)) 5147 suffix = "@tpoff"; 5148 else 5149 suffix = "@dtpoff"; 5150 break; 5151 default: 5152 suffix = "@invalid"; 5153 output_operand_lossage ("invalid UNSPEC as operand: %d", XINT (x,1)); 5154 break; 5155 } 5156 if (pcrel) 5157 fputs ("pcl,", file); 5158 arc_output_pic_addr_const (file, XVECEXP (x, 0, 0), code); 5159 fputs (suffix, file); 5160 if (base) 5161 arc_output_pic_addr_const (file, base, code); 5162 break; 5163 5164 default: 5165 output_operand_lossage ("invalid expression as operand"); 5166 } 5167 } 5168 5169 #define SYMBOLIC_CONST(X) \ 5170 (GET_CODE (X) == SYMBOL_REF \ 5171 || GET_CODE (X) == LABEL_REF \ 5172 || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X))) 5173 5174 /* Emit insns to move operands[1] into operands[0]. */ 5175 5176 static void 5177 prepare_pic_move (rtx *operands, machine_mode) 5178 { 5179 if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1]) 5180 && flag_pic) 5181 operands[1] = force_reg (Pmode, operands[1]); 5182 else 5183 { 5184 rtx temp = (reload_in_progress ? operands[0] 5185 : flag_pic? gen_reg_rtx (Pmode) : NULL_RTX); 5186 operands[1] = arc_legitimize_pic_address (operands[1], temp); 5187 } 5188 } 5189 5190 5191 /* The function returning the number of words, at the beginning of an 5192 argument, must be put in registers. The returned value must be 5193 zero for arguments that are passed entirely in registers or that 5194 are entirely pushed on the stack. 5195 5196 On some machines, certain arguments must be passed partially in 5197 registers and partially in memory. On these machines, typically 5198 the first N words of arguments are passed in registers, and the 5199 rest on the stack. If a multi-word argument (a `double' or a 5200 structure) crosses that boundary, its first few words must be 5201 passed in registers and the rest must be pushed. This function 5202 tells the compiler when this occurs, and how many of the words 5203 should go in registers. 5204 5205 `FUNCTION_ARG' for these arguments should return the first register 5206 to be used by the caller for this argument; likewise 5207 `FUNCTION_INCOMING_ARG', for the called function. 5208 5209 The function is used to implement macro FUNCTION_ARG_PARTIAL_NREGS. */ 5210 5211 /* If REGNO is the least arg reg available then what is the total number of arg 5212 regs available. */ 5213 #define GPR_REST_ARG_REGS(REGNO) \ 5214 ((REGNO) <= MAX_ARC_PARM_REGS ? MAX_ARC_PARM_REGS - (REGNO) : 0 ) 5215 5216 /* Since arc parm regs are contiguous. */ 5217 #define ARC_NEXT_ARG_REG(REGNO) ( (REGNO) + 1 ) 5218 5219 /* Implement TARGET_ARG_PARTIAL_BYTES. */ 5220 5221 static int 5222 arc_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode, 5223 tree type, bool named ATTRIBUTE_UNUSED) 5224 { 5225 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 5226 int bytes = (mode == BLKmode 5227 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode)); 5228 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 5229 int arg_num = *cum; 5230 int ret; 5231 5232 arg_num = ROUND_ADVANCE_CUM (arg_num, mode, type); 5233 ret = GPR_REST_ARG_REGS (arg_num); 5234 5235 /* ICEd at function.c:2361, and ret is copied to data->partial */ 5236 ret = (ret >= words ? 0 : ret * UNITS_PER_WORD); 5237 5238 return ret; 5239 } 5240 5241 /* This function is used to control a function argument is passed in a 5242 register, and which register. 5243 5244 The arguments are CUM, of type CUMULATIVE_ARGS, which summarizes 5245 (in a way defined by INIT_CUMULATIVE_ARGS and FUNCTION_ARG_ADVANCE) 5246 all of the previous arguments so far passed in registers; MODE, the 5247 machine mode of the argument; TYPE, the data type of the argument 5248 as a tree node or 0 if that is not known (which happens for C 5249 support library functions); and NAMED, which is 1 for an ordinary 5250 argument and 0 for nameless arguments that correspond to `...' in 5251 the called function's prototype. 5252 5253 The returned value should either be a `reg' RTX for the hard 5254 register in which to pass the argument, or zero to pass the 5255 argument on the stack. 5256 5257 For machines like the Vax and 68000, where normally all arguments 5258 are pushed, zero suffices as a definition. 5259 5260 The usual way to make the ANSI library `stdarg.h' work on a machine 5261 where some arguments are usually passed in registers, is to cause 5262 nameless arguments to be passed on the stack instead. This is done 5263 by making the function return 0 whenever NAMED is 0. 5264 5265 You may use the macro `MUST_PASS_IN_STACK (MODE, TYPE)' in the 5266 definition of this function to determine if this argument is of a 5267 type that must be passed in the stack. If `REG_PARM_STACK_SPACE' 5268 is not defined and the function returns non-zero for such an 5269 argument, the compiler will abort. If `REG_PARM_STACK_SPACE' is 5270 defined, the argument will be computed in the stack and then loaded 5271 into a register. 5272 5273 The function is used to implement macro FUNCTION_ARG. */ 5274 /* On the ARC the first MAX_ARC_PARM_REGS args are normally in registers 5275 and the rest are pushed. */ 5276 5277 static rtx 5278 arc_function_arg (cumulative_args_t cum_v, 5279 machine_mode mode, 5280 const_tree type ATTRIBUTE_UNUSED, 5281 bool named ATTRIBUTE_UNUSED) 5282 { 5283 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 5284 int arg_num = *cum; 5285 rtx ret; 5286 const char *debstr ATTRIBUTE_UNUSED; 5287 5288 arg_num = ROUND_ADVANCE_CUM (arg_num, mode, type); 5289 /* Return a marker for use in the call instruction. */ 5290 if (mode == VOIDmode) 5291 { 5292 ret = const0_rtx; 5293 debstr = "<0>"; 5294 } 5295 else if (GPR_REST_ARG_REGS (arg_num) > 0) 5296 { 5297 ret = gen_rtx_REG (mode, arg_num); 5298 debstr = reg_names [arg_num]; 5299 } 5300 else 5301 { 5302 ret = NULL_RTX; 5303 debstr = "memory"; 5304 } 5305 return ret; 5306 } 5307 5308 /* The function to update the summarizer variable *CUM to advance past 5309 an argument in the argument list. The values MODE, TYPE and NAMED 5310 describe that argument. Once this is done, the variable *CUM is 5311 suitable for analyzing the *following* argument with 5312 `FUNCTION_ARG', etc. 5313 5314 This function need not do anything if the argument in question was 5315 passed on the stack. The compiler knows how to track the amount of 5316 stack space used for arguments without any special help. 5317 5318 The function is used to implement macro FUNCTION_ARG_ADVANCE. */ 5319 /* For the ARC: the cum set here is passed on to function_arg where we 5320 look at its value and say which reg to use. Strategy: advance the 5321 regnumber here till we run out of arg regs, then set *cum to last 5322 reg. In function_arg, since *cum > last arg reg we would return 0 5323 and thus the arg will end up on the stack. For straddling args of 5324 course function_arg_partial_nregs will come into play. */ 5325 5326 static void 5327 arc_function_arg_advance (cumulative_args_t cum_v, 5328 machine_mode mode, 5329 const_tree type, 5330 bool named ATTRIBUTE_UNUSED) 5331 { 5332 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 5333 int bytes = (mode == BLKmode 5334 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode)); 5335 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 5336 int i; 5337 5338 if (words) 5339 *cum = ROUND_ADVANCE_CUM (*cum, mode, type); 5340 for (i = 0; i < words; i++) 5341 *cum = ARC_NEXT_ARG_REG (*cum); 5342 5343 } 5344 5345 /* Define how to find the value returned by a function. 5346 VALTYPE is the data type of the value (as a tree). 5347 If the precise function being called is known, FN_DECL_OR_TYPE is its 5348 FUNCTION_DECL; otherwise, FN_DECL_OR_TYPE is its type. */ 5349 5350 static rtx 5351 arc_function_value (const_tree valtype, 5352 const_tree fn_decl_or_type ATTRIBUTE_UNUSED, 5353 bool outgoing ATTRIBUTE_UNUSED) 5354 { 5355 machine_mode mode = TYPE_MODE (valtype); 5356 int unsignedp ATTRIBUTE_UNUSED; 5357 5358 unsignedp = TYPE_UNSIGNED (valtype); 5359 if (INTEGRAL_TYPE_P (valtype) || TREE_CODE (valtype) == OFFSET_TYPE) 5360 PROMOTE_MODE (mode, unsignedp, valtype); 5361 return gen_rtx_REG (mode, 0); 5362 } 5363 5364 /* Returns the return address that is used by builtin_return_address. */ 5365 5366 rtx 5367 arc_return_addr_rtx (int count, ATTRIBUTE_UNUSED rtx frame) 5368 { 5369 if (count != 0) 5370 return const0_rtx; 5371 5372 return get_hard_reg_initial_val (Pmode , RETURN_ADDR_REGNUM); 5373 } 5374 5375 /* Nonzero if the constant value X is a legitimate general operand 5376 when generating PIC code. It is given that flag_pic is on and 5377 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ 5378 5379 bool 5380 arc_legitimate_pic_operand_p (rtx x) 5381 { 5382 return !arc_raw_symbolic_reference_mentioned_p (x, true); 5383 } 5384 5385 /* Determine if a given RTX is a valid constant. We already know this 5386 satisfies CONSTANT_P. */ 5387 5388 bool 5389 arc_legitimate_constant_p (machine_mode mode, rtx x) 5390 { 5391 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x)) 5392 return false; 5393 5394 if (!flag_pic && mode != Pmode) 5395 return true; 5396 5397 switch (GET_CODE (x)) 5398 { 5399 case CONST: 5400 x = XEXP (x, 0); 5401 5402 if (GET_CODE (x) == PLUS) 5403 { 5404 if (flag_pic 5405 ? GET_CODE (XEXP (x, 1)) != CONST_INT 5406 : !arc_legitimate_constant_p (mode, XEXP (x, 1))) 5407 return false; 5408 x = XEXP (x, 0); 5409 } 5410 5411 /* Only some unspecs are valid as "constants". */ 5412 if (GET_CODE (x) == UNSPEC) 5413 switch (XINT (x, 1)) 5414 { 5415 case ARC_UNSPEC_PLT: 5416 case ARC_UNSPEC_GOTOFF: 5417 case ARC_UNSPEC_GOTOFFPC: 5418 case ARC_UNSPEC_GOT: 5419 case UNSPEC_TLS_GD: 5420 case UNSPEC_TLS_IE: 5421 case UNSPEC_TLS_OFF: 5422 return true; 5423 5424 default: 5425 gcc_unreachable (); 5426 } 5427 5428 /* We must have drilled down to a symbol. */ 5429 if (arc_raw_symbolic_reference_mentioned_p (x, false)) 5430 return false; 5431 5432 /* Return true. */ 5433 break; 5434 5435 case SYMBOL_REF: 5436 if (SYMBOL_REF_TLS_MODEL (x)) 5437 return false; 5438 /* Fall through. */ 5439 case LABEL_REF: 5440 if (flag_pic) 5441 return false; 5442 /* Fall through. */ 5443 5444 default: 5445 break; 5446 } 5447 5448 /* Otherwise we handle everything else in the move patterns. */ 5449 return true; 5450 } 5451 5452 static bool 5453 arc_legitimate_address_p (machine_mode mode, rtx x, bool strict) 5454 { 5455 if (RTX_OK_FOR_BASE_P (x, strict)) 5456 return true; 5457 if (LEGITIMATE_OFFSET_ADDRESS_P (mode, x, TARGET_INDEXED_LOADS, strict)) 5458 return true; 5459 if (LEGITIMATE_SCALED_ADDRESS_P (mode, x, strict)) 5460 return true; 5461 if (LEGITIMATE_SMALL_DATA_ADDRESS_P (x)) 5462 return true; 5463 if (GET_CODE (x) == CONST_INT && LARGE_INT (INTVAL (x))) 5464 return true; 5465 5466 /* When we compile for size avoid const (@sym + offset) 5467 addresses. */ 5468 if (!flag_pic && optimize_size && !reload_completed 5469 && (GET_CODE (x) == CONST) 5470 && (GET_CODE (XEXP (x, 0)) == PLUS) 5471 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF) 5472 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) == 0 5473 && !SYMBOL_REF_FUNCTION_P (XEXP (XEXP (x, 0), 0))) 5474 { 5475 rtx addend = XEXP (XEXP (x, 0), 1); 5476 gcc_assert (CONST_INT_P (addend)); 5477 HOST_WIDE_INT offset = INTVAL (addend); 5478 5479 /* Allow addresses having a large offset to pass. Anyhow they 5480 will end in a limm. */ 5481 return !(offset > -1024 && offset < 1020); 5482 } 5483 5484 if ((GET_MODE_SIZE (mode) != 16) && CONSTANT_P (x)) 5485 { 5486 if (flag_pic ? arc_legitimate_pic_addr_p (x) 5487 : arc_legitimate_constant_p (Pmode, x)) 5488 return true; 5489 } 5490 if ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC 5491 || GET_CODE (x) == POST_DEC || GET_CODE (x) == POST_INC) 5492 && RTX_OK_FOR_BASE_P (XEXP (x, 0), strict)) 5493 return true; 5494 /* We're restricted here by the `st' insn. */ 5495 if ((GET_CODE (x) == PRE_MODIFY || GET_CODE (x) == POST_MODIFY) 5496 && GET_CODE (XEXP ((x), 1)) == PLUS 5497 && rtx_equal_p (XEXP ((x), 0), XEXP (XEXP (x, 1), 0)) 5498 && LEGITIMATE_OFFSET_ADDRESS_P (QImode, XEXP (x, 1), 5499 TARGET_AUTO_MODIFY_REG, strict)) 5500 return true; 5501 return false; 5502 } 5503 5504 /* Return true iff ADDR (a legitimate address expression) 5505 has an effect that depends on the machine mode it is used for. */ 5506 5507 static bool 5508 arc_mode_dependent_address_p (const_rtx addr, addr_space_t) 5509 { 5510 /* SYMBOL_REF is not mode dependent: it is either a small data reference, 5511 which is valid for loads and stores, or a limm offset, which is valid for 5512 loads. Scaled indices are scaled by the access mode. */ 5513 if (GET_CODE (addr) == PLUS 5514 && GET_CODE (XEXP ((addr), 0)) == MULT) 5515 return true; 5516 return false; 5517 } 5518 5519 /* Determine if it's legal to put X into the constant pool. */ 5520 5521 static bool 5522 arc_cannot_force_const_mem (machine_mode mode, rtx x) 5523 { 5524 return !arc_legitimate_constant_p (mode, x); 5525 } 5526 5527 /* IDs for all the ARC builtins. */ 5528 5529 enum arc_builtin_id 5530 { 5531 #define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK) \ 5532 ARC_BUILTIN_ ## NAME, 5533 #include "builtins.def" 5534 #undef DEF_BUILTIN 5535 5536 ARC_BUILTIN_COUNT 5537 }; 5538 5539 struct GTY(()) arc_builtin_description 5540 { 5541 enum insn_code icode; 5542 int n_args; 5543 tree fndecl; 5544 }; 5545 5546 static GTY(()) struct arc_builtin_description 5547 arc_bdesc[ARC_BUILTIN_COUNT] = 5548 { 5549 #define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK) \ 5550 { (enum insn_code) CODE_FOR_ ## ICODE, N_ARGS, NULL_TREE }, 5551 #include "builtins.def" 5552 #undef DEF_BUILTIN 5553 }; 5554 5555 /* Transform UP into lowercase and write the result to LO. 5556 You must provide enough space for LO. Return LO. */ 5557 5558 static char* 5559 arc_tolower (char *lo, const char *up) 5560 { 5561 char *lo0 = lo; 5562 5563 for (; *up; up++, lo++) 5564 *lo = TOLOWER (*up); 5565 5566 *lo = '\0'; 5567 5568 return lo0; 5569 } 5570 5571 /* Implement `TARGET_BUILTIN_DECL'. */ 5572 5573 static tree 5574 arc_builtin_decl (unsigned id, bool initialize_p ATTRIBUTE_UNUSED) 5575 { 5576 if (id < ARC_BUILTIN_COUNT) 5577 return arc_bdesc[id].fndecl; 5578 5579 return error_mark_node; 5580 } 5581 5582 static void 5583 arc_init_builtins (void) 5584 { 5585 tree V4HI_type_node; 5586 tree V2SI_type_node; 5587 tree V2HI_type_node; 5588 5589 /* Vector types based on HS SIMD elements. */ 5590 V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode); 5591 V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode); 5592 V2HI_type_node = build_vector_type_for_mode (intHI_type_node, V2HImode); 5593 5594 tree pcvoid_type_node 5595 = build_pointer_type (build_qualified_type (void_type_node, 5596 TYPE_QUAL_CONST)); 5597 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, 5598 V8HImode); 5599 5600 tree void_ftype_void 5601 = build_function_type_list (void_type_node, NULL_TREE); 5602 tree int_ftype_int 5603 = build_function_type_list (integer_type_node, integer_type_node, 5604 NULL_TREE); 5605 tree int_ftype_pcvoid_int 5606 = build_function_type_list (integer_type_node, pcvoid_type_node, 5607 integer_type_node, NULL_TREE); 5608 tree void_ftype_usint_usint 5609 = build_function_type_list (void_type_node, long_unsigned_type_node, 5610 long_unsigned_type_node, NULL_TREE); 5611 tree int_ftype_int_int 5612 = build_function_type_list (integer_type_node, integer_type_node, 5613 integer_type_node, NULL_TREE); 5614 tree usint_ftype_usint 5615 = build_function_type_list (long_unsigned_type_node, 5616 long_unsigned_type_node, NULL_TREE); 5617 tree void_ftype_usint 5618 = build_function_type_list (void_type_node, long_unsigned_type_node, 5619 NULL_TREE); 5620 tree int_ftype_void 5621 = build_function_type_list (integer_type_node, void_type_node, 5622 NULL_TREE); 5623 tree void_ftype_int 5624 = build_function_type_list (void_type_node, integer_type_node, 5625 NULL_TREE); 5626 tree int_ftype_short 5627 = build_function_type_list (integer_type_node, short_integer_type_node, 5628 NULL_TREE); 5629 5630 /* Old ARC SIMD types. */ 5631 tree v8hi_ftype_v8hi_v8hi 5632 = build_function_type_list (V8HI_type_node, V8HI_type_node, 5633 V8HI_type_node, NULL_TREE); 5634 tree v8hi_ftype_v8hi_int 5635 = build_function_type_list (V8HI_type_node, V8HI_type_node, 5636 integer_type_node, NULL_TREE); 5637 tree v8hi_ftype_v8hi_int_int 5638 = build_function_type_list (V8HI_type_node, V8HI_type_node, 5639 integer_type_node, integer_type_node, 5640 NULL_TREE); 5641 tree void_ftype_v8hi_int_int 5642 = build_function_type_list (void_type_node, V8HI_type_node, 5643 integer_type_node, integer_type_node, 5644 NULL_TREE); 5645 tree void_ftype_v8hi_int_int_int 5646 = build_function_type_list (void_type_node, V8HI_type_node, 5647 integer_type_node, integer_type_node, 5648 integer_type_node, NULL_TREE); 5649 tree v8hi_ftype_int_int 5650 = build_function_type_list (V8HI_type_node, integer_type_node, 5651 integer_type_node, NULL_TREE); 5652 tree void_ftype_int_int 5653 = build_function_type_list (void_type_node, integer_type_node, 5654 integer_type_node, NULL_TREE); 5655 tree v8hi_ftype_v8hi 5656 = build_function_type_list (V8HI_type_node, V8HI_type_node, 5657 NULL_TREE); 5658 /* ARCv2 SIMD types. */ 5659 tree long_ftype_v4hi_v4hi 5660 = build_function_type_list (long_long_integer_type_node, 5661 V4HI_type_node, V4HI_type_node, NULL_TREE); 5662 tree int_ftype_v2hi_v2hi 5663 = build_function_type_list (integer_type_node, 5664 V2HI_type_node, V2HI_type_node, NULL_TREE); 5665 tree v2si_ftype_v2hi_v2hi 5666 = build_function_type_list (V2SI_type_node, 5667 V2HI_type_node, V2HI_type_node, NULL_TREE); 5668 tree v2hi_ftype_v2hi_v2hi 5669 = build_function_type_list (V2HI_type_node, 5670 V2HI_type_node, V2HI_type_node, NULL_TREE); 5671 tree v2si_ftype_v2si_v2si 5672 = build_function_type_list (V2SI_type_node, 5673 V2SI_type_node, V2SI_type_node, NULL_TREE); 5674 tree v4hi_ftype_v4hi_v4hi 5675 = build_function_type_list (V4HI_type_node, 5676 V4HI_type_node, V4HI_type_node, NULL_TREE); 5677 tree long_ftype_v2si_v2hi 5678 = build_function_type_list (long_long_integer_type_node, 5679 V2SI_type_node, V2HI_type_node, NULL_TREE); 5680 5681 /* Add the builtins. */ 5682 #define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK) \ 5683 { \ 5684 int id = ARC_BUILTIN_ ## NAME; \ 5685 const char *Name = "__builtin_arc_" #NAME; \ 5686 char *name = (char*) alloca (1 + strlen (Name)); \ 5687 \ 5688 gcc_assert (id < ARC_BUILTIN_COUNT); \ 5689 if (MASK) \ 5690 arc_bdesc[id].fndecl \ 5691 = add_builtin_function (arc_tolower(name, Name), TYPE, id, \ 5692 BUILT_IN_MD, NULL, NULL_TREE); \ 5693 } 5694 #include "builtins.def" 5695 #undef DEF_BUILTIN 5696 } 5697 5698 /* Helper to expand __builtin_arc_aligned (void* val, int 5699 alignval). */ 5700 5701 static rtx 5702 arc_expand_builtin_aligned (tree exp) 5703 { 5704 tree arg0 = CALL_EXPR_ARG (exp, 0); 5705 tree arg1 = CALL_EXPR_ARG (exp, 1); 5706 fold (arg1); 5707 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); 5708 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL); 5709 5710 if (!CONST_INT_P (op1)) 5711 { 5712 /* If we can't fold the alignment to a constant integer 5713 whilst optimizing, this is probably a user error. */ 5714 if (optimize) 5715 warning (0, "__builtin_arc_aligned with non-constant alignment"); 5716 } 5717 else 5718 { 5719 HOST_WIDE_INT alignTest = INTVAL (op1); 5720 /* Check alignTest is positive, and a power of two. */ 5721 if (alignTest <= 0 || alignTest != (alignTest & -alignTest)) 5722 { 5723 error ("invalid alignment value for __builtin_arc_aligned"); 5724 return NULL_RTX; 5725 } 5726 5727 if (CONST_INT_P (op0)) 5728 { 5729 HOST_WIDE_INT pnt = INTVAL (op0); 5730 5731 if ((pnt & (alignTest - 1)) == 0) 5732 return const1_rtx; 5733 } 5734 else 5735 { 5736 unsigned align = get_pointer_alignment (arg0); 5737 unsigned numBits = alignTest * BITS_PER_UNIT; 5738 5739 if (align && align >= numBits) 5740 return const1_rtx; 5741 /* Another attempt to ascertain alignment. Check the type 5742 we are pointing to. */ 5743 if (POINTER_TYPE_P (TREE_TYPE (arg0)) 5744 && TYPE_ALIGN (TREE_TYPE (TREE_TYPE (arg0))) >= numBits) 5745 return const1_rtx; 5746 } 5747 } 5748 5749 /* Default to false. */ 5750 return const0_rtx; 5751 } 5752 5753 /* Helper arc_expand_builtin, generates a pattern for the given icode 5754 and arguments. */ 5755 5756 static rtx_insn * 5757 apply_GEN_FCN (enum insn_code icode, rtx *arg) 5758 { 5759 switch (insn_data[icode].n_generator_args) 5760 { 5761 case 0: 5762 return GEN_FCN (icode) (); 5763 case 1: 5764 return GEN_FCN (icode) (arg[0]); 5765 case 2: 5766 return GEN_FCN (icode) (arg[0], arg[1]); 5767 case 3: 5768 return GEN_FCN (icode) (arg[0], arg[1], arg[2]); 5769 case 4: 5770 return GEN_FCN (icode) (arg[0], arg[1], arg[2], arg[3]); 5771 case 5: 5772 return GEN_FCN (icode) (arg[0], arg[1], arg[2], arg[3], arg[4]); 5773 default: 5774 gcc_unreachable (); 5775 } 5776 } 5777 5778 /* Expand an expression EXP that calls a built-in function, 5779 with result going to TARGET if that's convenient 5780 (and in mode MODE if that's convenient). 5781 SUBTARGET may be used as the target for computing one of EXP's operands. 5782 IGNORE is nonzero if the value is to be ignored. */ 5783 5784 static rtx 5785 arc_expand_builtin (tree exp, 5786 rtx target, 5787 rtx subtarget ATTRIBUTE_UNUSED, 5788 machine_mode mode ATTRIBUTE_UNUSED, 5789 int ignore ATTRIBUTE_UNUSED) 5790 { 5791 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 5792 unsigned int id = DECL_FUNCTION_CODE (fndecl); 5793 const struct arc_builtin_description *d = &arc_bdesc[id]; 5794 int i, j, n_args = call_expr_nargs (exp); 5795 rtx pat = NULL_RTX; 5796 rtx xop[5]; 5797 enum insn_code icode = d->icode; 5798 machine_mode tmode = insn_data[icode].operand[0].mode; 5799 int nonvoid; 5800 tree arg0; 5801 tree arg1; 5802 tree arg2; 5803 tree arg3; 5804 rtx op0; 5805 rtx op1; 5806 rtx op2; 5807 rtx op3; 5808 rtx op4; 5809 machine_mode mode0; 5810 machine_mode mode1; 5811 machine_mode mode2; 5812 machine_mode mode3; 5813 machine_mode mode4; 5814 5815 if (id >= ARC_BUILTIN_COUNT) 5816 internal_error ("bad builtin fcode"); 5817 5818 /* 1st part: Expand special builtins. */ 5819 switch (id) 5820 { 5821 case ARC_BUILTIN_NOP: 5822 emit_insn (gen_nopv ()); 5823 return NULL_RTX; 5824 5825 case ARC_BUILTIN_RTIE: 5826 case ARC_BUILTIN_SYNC: 5827 case ARC_BUILTIN_BRK: 5828 case ARC_BUILTIN_SWI: 5829 case ARC_BUILTIN_UNIMP_S: 5830 gcc_assert (icode != 0); 5831 emit_insn (GEN_FCN (icode) (const1_rtx)); 5832 return NULL_RTX; 5833 5834 case ARC_BUILTIN_ALIGNED: 5835 return arc_expand_builtin_aligned (exp); 5836 5837 case ARC_BUILTIN_CLRI: 5838 target = gen_reg_rtx (SImode); 5839 emit_insn (gen_clri (target, const1_rtx)); 5840 return target; 5841 5842 case ARC_BUILTIN_TRAP_S: 5843 case ARC_BUILTIN_SLEEP: 5844 arg0 = CALL_EXPR_ARG (exp, 0); 5845 fold (arg0); 5846 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); 5847 5848 if (!CONST_INT_P (op0) || !satisfies_constraint_L (op0)) 5849 { 5850 error ("builtin operand should be an unsigned 6-bit value"); 5851 return NULL_RTX; 5852 } 5853 gcc_assert (icode != 0); 5854 emit_insn (GEN_FCN (icode) (op0)); 5855 return NULL_RTX; 5856 5857 case ARC_BUILTIN_VDORUN: 5858 case ARC_BUILTIN_VDIRUN: 5859 arg0 = CALL_EXPR_ARG (exp, 0); 5860 arg1 = CALL_EXPR_ARG (exp, 1); 5861 op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL); 5862 op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); 5863 5864 target = gen_rtx_REG (SImode, (id == ARC_BUILTIN_VDIRUN) ? 131 : 139); 5865 5866 mode0 = insn_data[icode].operand[1].mode; 5867 mode1 = insn_data[icode].operand[2].mode; 5868 5869 if (!insn_data[icode].operand[1].predicate (op0, mode0)) 5870 op0 = copy_to_mode_reg (mode0, op0); 5871 5872 if (!insn_data[icode].operand[2].predicate (op1, mode1)) 5873 op1 = copy_to_mode_reg (mode1, op1); 5874 5875 pat = GEN_FCN (icode) (target, op0, op1); 5876 if (!pat) 5877 return NULL_RTX; 5878 5879 emit_insn (pat); 5880 return NULL_RTX; 5881 5882 case ARC_BUILTIN_VDIWR: 5883 case ARC_BUILTIN_VDOWR: 5884 arg0 = CALL_EXPR_ARG (exp, 0); 5885 arg1 = CALL_EXPR_ARG (exp, 1); 5886 op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL); 5887 op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); 5888 5889 if (!CONST_INT_P (op0) 5890 || !(UNSIGNED_INT3 (INTVAL (op0)))) 5891 error ("operand 1 should be an unsigned 3-bit immediate"); 5892 5893 mode1 = insn_data[icode].operand[1].mode; 5894 5895 if (icode == CODE_FOR_vdiwr_insn) 5896 target = gen_rtx_REG (SImode, 5897 ARC_FIRST_SIMD_DMA_CONFIG_IN_REG + INTVAL (op0)); 5898 else if (icode == CODE_FOR_vdowr_insn) 5899 target = gen_rtx_REG (SImode, 5900 ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG + INTVAL (op0)); 5901 else 5902 gcc_unreachable (); 5903 5904 if (!insn_data[icode].operand[2].predicate (op1, mode1)) 5905 op1 = copy_to_mode_reg (mode1, op1); 5906 5907 pat = GEN_FCN (icode) (target, op1); 5908 if (!pat) 5909 return NULL_RTX; 5910 5911 emit_insn (pat); 5912 return NULL_RTX; 5913 5914 case ARC_BUILTIN_VASRW: 5915 case ARC_BUILTIN_VSR8: 5916 case ARC_BUILTIN_VSR8AW: 5917 arg0 = CALL_EXPR_ARG (exp, 0); 5918 arg1 = CALL_EXPR_ARG (exp, 1); 5919 op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL); 5920 op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); 5921 op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG); 5922 5923 target = gen_reg_rtx (V8HImode); 5924 mode0 = insn_data[icode].operand[1].mode; 5925 mode1 = insn_data[icode].operand[2].mode; 5926 5927 if (!insn_data[icode].operand[1].predicate (op0, mode0)) 5928 op0 = copy_to_mode_reg (mode0, op0); 5929 5930 if ((!insn_data[icode].operand[2].predicate (op1, mode1)) 5931 || !(UNSIGNED_INT3 (INTVAL (op1)))) 5932 error ("operand 2 should be an unsigned 3-bit value (I0-I7)"); 5933 5934 pat = GEN_FCN (icode) (target, op0, op1, op2); 5935 if (!pat) 5936 return NULL_RTX; 5937 5938 emit_insn (pat); 5939 return target; 5940 5941 case ARC_BUILTIN_VLD32WH: 5942 case ARC_BUILTIN_VLD32WL: 5943 case ARC_BUILTIN_VLD64: 5944 case ARC_BUILTIN_VLD32: 5945 rtx src_vreg; 5946 icode = d->icode; 5947 arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg. */ 5948 arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7. */ 5949 arg2 = CALL_EXPR_ARG (exp, 2); /* u8. */ 5950 5951 src_vreg = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL); 5952 op0 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); 5953 op1 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL); 5954 op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG); 5955 5956 /* target <- src vreg. */ 5957 emit_insn (gen_move_insn (target, src_vreg)); 5958 5959 /* target <- vec_concat: target, mem (Ib, u8). */ 5960 mode0 = insn_data[icode].operand[3].mode; 5961 mode1 = insn_data[icode].operand[1].mode; 5962 5963 if ((!insn_data[icode].operand[3].predicate (op0, mode0)) 5964 || !(UNSIGNED_INT3 (INTVAL (op0)))) 5965 error ("operand 1 should be an unsigned 3-bit value (I0-I7)"); 5966 5967 if ((!insn_data[icode].operand[1].predicate (op1, mode1)) 5968 || !(UNSIGNED_INT8 (INTVAL (op1)))) 5969 error ("operand 2 should be an unsigned 8-bit value"); 5970 5971 pat = GEN_FCN (icode) (target, op1, op2, op0); 5972 if (!pat) 5973 return NULL_RTX; 5974 5975 emit_insn (pat); 5976 return target; 5977 5978 case ARC_BUILTIN_VLD64W: 5979 case ARC_BUILTIN_VLD128: 5980 arg0 = CALL_EXPR_ARG (exp, 0); /* dest vreg. */ 5981 arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7. */ 5982 5983 op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG); 5984 op1 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL); 5985 op2 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); 5986 5987 /* target <- src vreg. */ 5988 target = gen_reg_rtx (V8HImode); 5989 5990 /* target <- vec_concat: target, mem (Ib, u8). */ 5991 mode0 = insn_data[icode].operand[1].mode; 5992 mode1 = insn_data[icode].operand[2].mode; 5993 mode2 = insn_data[icode].operand[3].mode; 5994 5995 if ((!insn_data[icode].operand[2].predicate (op1, mode1)) 5996 || !(UNSIGNED_INT3 (INTVAL (op1)))) 5997 error ("operand 1 should be an unsigned 3-bit value (I0-I7)"); 5998 5999 if ((!insn_data[icode].operand[3].predicate (op2, mode2)) 6000 || !(UNSIGNED_INT8 (INTVAL (op2)))) 6001 error ("operand 2 should be an unsigned 8-bit value"); 6002 6003 pat = GEN_FCN (icode) (target, op0, op1, op2); 6004 6005 if (!pat) 6006 return NULL_RTX; 6007 6008 emit_insn (pat); 6009 return target; 6010 6011 case ARC_BUILTIN_VST128: 6012 case ARC_BUILTIN_VST64: 6013 arg0 = CALL_EXPR_ARG (exp, 0); /* src vreg. */ 6014 arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7. */ 6015 arg2 = CALL_EXPR_ARG (exp, 2); /* u8. */ 6016 6017 op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG); 6018 op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); 6019 op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL); 6020 op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL); 6021 6022 mode0 = insn_data[icode].operand[0].mode; 6023 mode1 = insn_data[icode].operand[1].mode; 6024 mode2 = insn_data[icode].operand[2].mode; 6025 mode3 = insn_data[icode].operand[3].mode; 6026 6027 if ((!insn_data[icode].operand[1].predicate (op1, mode1)) 6028 || !(UNSIGNED_INT3 (INTVAL (op1)))) 6029 error ("operand 2 should be an unsigned 3-bit value (I0-I7)"); 6030 6031 if ((!insn_data[icode].operand[2].predicate (op2, mode2)) 6032 || !(UNSIGNED_INT8 (INTVAL (op2)))) 6033 error ("operand 3 should be an unsigned 8-bit value"); 6034 6035 if (!insn_data[icode].operand[3].predicate (op3, mode3)) 6036 op3 = copy_to_mode_reg (mode3, op3); 6037 6038 pat = GEN_FCN (icode) (op0, op1, op2, op3); 6039 if (!pat) 6040 return NULL_RTX; 6041 6042 emit_insn (pat); 6043 return NULL_RTX; 6044 6045 case ARC_BUILTIN_VST16_N: 6046 case ARC_BUILTIN_VST32_N: 6047 arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg. */ 6048 arg1 = CALL_EXPR_ARG (exp, 1); /* u3. */ 6049 arg2 = CALL_EXPR_ARG (exp, 2); /* [I]0-7. */ 6050 arg3 = CALL_EXPR_ARG (exp, 3); /* u8. */ 6051 6052 op0 = expand_expr (arg3, NULL_RTX, SImode, EXPAND_NORMAL); 6053 op1 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG); 6054 op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL); 6055 op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL); 6056 op4 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); 6057 6058 mode0 = insn_data[icode].operand[0].mode; 6059 mode2 = insn_data[icode].operand[2].mode; 6060 mode3 = insn_data[icode].operand[3].mode; 6061 mode4 = insn_data[icode].operand[4].mode; 6062 6063 /* Do some correctness checks for the operands. */ 6064 if ((!insn_data[icode].operand[0].predicate (op0, mode0)) 6065 || !(UNSIGNED_INT8 (INTVAL (op0)))) 6066 error ("operand 4 should be an unsigned 8-bit value (0-255)"); 6067 6068 if ((!insn_data[icode].operand[2].predicate (op2, mode2)) 6069 || !(UNSIGNED_INT3 (INTVAL (op2)))) 6070 error ("operand 3 should be an unsigned 3-bit value (I0-I7)"); 6071 6072 if (!insn_data[icode].operand[3].predicate (op3, mode3)) 6073 op3 = copy_to_mode_reg (mode3, op3); 6074 6075 if ((!insn_data[icode].operand[4].predicate (op4, mode4)) 6076 || !(UNSIGNED_INT3 (INTVAL (op4)))) 6077 error ("operand 2 should be an unsigned 3-bit value (subreg 0-7)"); 6078 else if (icode == CODE_FOR_vst32_n_insn 6079 && ((INTVAL (op4) % 2) != 0)) 6080 error ("operand 2 should be an even 3-bit value (subreg 0,2,4,6)"); 6081 6082 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4); 6083 if (!pat) 6084 return NULL_RTX; 6085 6086 emit_insn (pat); 6087 return NULL_RTX; 6088 6089 default: 6090 break; 6091 } 6092 6093 /* 2nd part: Expand regular builtins. */ 6094 if (icode == 0) 6095 internal_error ("bad builtin fcode"); 6096 6097 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; 6098 j = 0; 6099 6100 if (nonvoid) 6101 { 6102 if (target == NULL_RTX 6103 || GET_MODE (target) != tmode 6104 || !insn_data[icode].operand[0].predicate (target, tmode)) 6105 { 6106 target = gen_reg_rtx (tmode); 6107 } 6108 xop[j++] = target; 6109 } 6110 6111 gcc_assert (n_args <= 4); 6112 for (i = 0; i < n_args; i++, j++) 6113 { 6114 tree arg = CALL_EXPR_ARG (exp, i); 6115 machine_mode mode = insn_data[icode].operand[j].mode; 6116 rtx op = expand_expr (arg, NULL_RTX, mode, EXPAND_NORMAL); 6117 machine_mode opmode = GET_MODE (op); 6118 char c = insn_data[icode].operand[j].constraint[0]; 6119 6120 /* SIMD extension requires exact immediate operand match. */ 6121 if ((id > ARC_BUILTIN_SIMD_BEGIN) 6122 && (id < ARC_BUILTIN_SIMD_END) 6123 && (c != 'v') 6124 && (c != 'r')) 6125 { 6126 if (!CONST_INT_P (op)) 6127 error ("builtin requires an immediate for operand %d", j); 6128 switch (c) 6129 { 6130 case 'L': 6131 if (!satisfies_constraint_L (op)) 6132 error ("operand %d should be a 6 bit unsigned immediate", j); 6133 break; 6134 case 'P': 6135 if (!satisfies_constraint_P (op)) 6136 error ("operand %d should be a 8 bit unsigned immediate", j); 6137 break; 6138 case 'K': 6139 if (!satisfies_constraint_K (op)) 6140 error ("operand %d should be a 3 bit unsigned immediate", j); 6141 break; 6142 default: 6143 error ("unknown builtin immediate operand type for operand %d", 6144 j); 6145 } 6146 } 6147 6148 if (CONST_INT_P (op)) 6149 opmode = mode; 6150 6151 if ((opmode == SImode) && (mode == HImode)) 6152 { 6153 opmode = HImode; 6154 op = gen_lowpart (HImode, op); 6155 } 6156 6157 /* In case the insn wants input operands in modes different from 6158 the result, abort. */ 6159 gcc_assert (opmode == mode || opmode == VOIDmode); 6160 6161 if (!insn_data[icode].operand[i + nonvoid].predicate (op, mode)) 6162 op = copy_to_mode_reg (mode, op); 6163 6164 xop[j] = op; 6165 } 6166 6167 pat = apply_GEN_FCN (icode, xop); 6168 if (pat == NULL_RTX) 6169 return NULL_RTX; 6170 6171 emit_insn (pat); 6172 6173 if (nonvoid) 6174 return target; 6175 else 6176 return const0_rtx; 6177 } 6178 6179 /* Returns true if the operands[opno] is a valid compile-time constant to be 6180 used as register number in the code for builtins. Else it flags an error 6181 and returns false. */ 6182 6183 bool 6184 check_if_valid_regno_const (rtx *operands, int opno) 6185 { 6186 6187 switch (GET_CODE (operands[opno])) 6188 { 6189 case SYMBOL_REF : 6190 case CONST : 6191 case CONST_INT : 6192 return true; 6193 default: 6194 error ("register number must be a compile-time constant. Try giving higher optimization levels"); 6195 break; 6196 } 6197 return false; 6198 } 6199 6200 /* Check that after all the constant folding, whether the operand to 6201 __builtin_arc_sleep is an unsigned int of 6 bits. If not, flag an error. */ 6202 6203 bool 6204 check_if_valid_sleep_operand (rtx *operands, int opno) 6205 { 6206 switch (GET_CODE (operands[opno])) 6207 { 6208 case CONST : 6209 case CONST_INT : 6210 if( UNSIGNED_INT6 (INTVAL (operands[opno]))) 6211 return true; 6212 /* FALLTHRU */ 6213 default: 6214 fatal_error (input_location, 6215 "operand for sleep instruction must be an unsigned 6 bit compile-time constant"); 6216 break; 6217 } 6218 return false; 6219 } 6220 6221 /* Return true if it is ok to make a tail-call to DECL. */ 6222 6223 static bool 6224 arc_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, 6225 tree exp ATTRIBUTE_UNUSED) 6226 { 6227 /* Never tailcall from an ISR routine - it needs a special exit sequence. */ 6228 if (ARC_INTERRUPT_P (arc_compute_function_type (cfun))) 6229 return false; 6230 6231 /* Everything else is ok. */ 6232 return true; 6233 } 6234 6235 /* Output code to add DELTA to the first argument, and then jump 6236 to FUNCTION. Used for C++ multiple inheritance. */ 6237 6238 static void 6239 arc_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, 6240 HOST_WIDE_INT delta, 6241 HOST_WIDE_INT vcall_offset, 6242 tree function) 6243 { 6244 int mi_delta = delta; 6245 const char *const mi_op = mi_delta < 0 ? "sub" : "add"; 6246 int shift = 0; 6247 int this_regno 6248 = aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function) ? 1 : 0; 6249 rtx fnaddr; 6250 6251 if (mi_delta < 0) 6252 mi_delta = - mi_delta; 6253 6254 /* Add DELTA. When possible use a plain add, otherwise load it into 6255 a register first. */ 6256 6257 while (mi_delta != 0) 6258 { 6259 if ((mi_delta & (3 << shift)) == 0) 6260 shift += 2; 6261 else 6262 { 6263 asm_fprintf (file, "\t%s\t%s, %s, %d\n", 6264 mi_op, reg_names[this_regno], reg_names[this_regno], 6265 mi_delta & (0xff << shift)); 6266 mi_delta &= ~(0xff << shift); 6267 shift += 8; 6268 } 6269 } 6270 6271 /* If needed, add *(*THIS + VCALL_OFFSET) to THIS. */ 6272 if (vcall_offset != 0) 6273 { 6274 /* ld r12,[this] --> temp = *this 6275 add r12,r12,vcall_offset --> temp = *(*this + vcall_offset) 6276 ld r12,[r12] 6277 add this,this,r12 --> this+ = *(*this + vcall_offset) */ 6278 asm_fprintf (file, "\tld\t%s, [%s]\n", 6279 ARC_TEMP_SCRATCH_REG, reg_names[this_regno]); 6280 asm_fprintf (file, "\tadd\t%s, %s, " HOST_WIDE_INT_PRINT_DEC "\n", 6281 ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG, vcall_offset); 6282 asm_fprintf (file, "\tld\t%s, [%s]\n", 6283 ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG); 6284 asm_fprintf (file, "\tadd\t%s, %s, %s\n", reg_names[this_regno], 6285 reg_names[this_regno], ARC_TEMP_SCRATCH_REG); 6286 } 6287 6288 fnaddr = XEXP (DECL_RTL (function), 0); 6289 6290 if (arc_is_longcall_p (fnaddr)) 6291 fputs ("\tj\t", file); 6292 else 6293 fputs ("\tb\t", file); 6294 assemble_name (file, XSTR (fnaddr, 0)); 6295 fputc ('\n', file); 6296 } 6297 6298 /* Return true if a 32 bit "long_call" should be generated for 6299 this calling SYM_REF. We generate a long_call if the function: 6300 6301 a. has an __attribute__((long call)) 6302 or b. the -mlong-calls command line switch has been specified 6303 6304 However we do not generate a long call if the function has an 6305 __attribute__ ((short_call)) or __attribute__ ((medium_call)) 6306 6307 This function will be called by C fragments contained in the machine 6308 description file. */ 6309 6310 bool 6311 arc_is_longcall_p (rtx sym_ref) 6312 { 6313 if (GET_CODE (sym_ref) != SYMBOL_REF) 6314 return false; 6315 6316 return (SYMBOL_REF_LONG_CALL_P (sym_ref) 6317 || (TARGET_LONG_CALLS_SET 6318 && !SYMBOL_REF_SHORT_CALL_P (sym_ref) 6319 && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref))); 6320 6321 } 6322 6323 /* Likewise for short calls. */ 6324 6325 bool 6326 arc_is_shortcall_p (rtx sym_ref) 6327 { 6328 if (GET_CODE (sym_ref) != SYMBOL_REF) 6329 return false; 6330 6331 return (SYMBOL_REF_SHORT_CALL_P (sym_ref) 6332 || (!TARGET_LONG_CALLS_SET && !TARGET_MEDIUM_CALLS 6333 && !SYMBOL_REF_LONG_CALL_P (sym_ref) 6334 && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref))); 6335 6336 } 6337 6338 /* Worker function for TARGET_RETURN_IN_MEMORY. */ 6339 6340 static bool 6341 arc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) 6342 { 6343 if (AGGREGATE_TYPE_P (type) || TREE_ADDRESSABLE (type)) 6344 return true; 6345 else 6346 { 6347 HOST_WIDE_INT size = int_size_in_bytes (type); 6348 return (size == -1 || size > (TARGET_V2 ? 16 : 8)); 6349 } 6350 } 6351 6352 6353 /* This was in rtlanal.c, and can go in there when we decide we want 6354 to submit the change for inclusion in the GCC tree. */ 6355 /* Like note_stores, but allow the callback to have side effects on the rtl 6356 (like the note_stores of yore): 6357 Call FUN on each register or MEM that is stored into or clobbered by X. 6358 (X would be the pattern of an insn). DATA is an arbitrary pointer, 6359 ignored by note_stores, but passed to FUN. 6360 FUN may alter parts of the RTL. 6361 6362 FUN receives three arguments: 6363 1. the REG, MEM, CC0 or PC being stored in or clobbered, 6364 2. the SET or CLOBBER rtx that does the store, 6365 3. the pointer DATA provided to note_stores. 6366 6367 If the item being stored in or clobbered is a SUBREG of a hard register, 6368 the SUBREG will be passed. */ 6369 6370 /* For now. */ static 6371 void 6372 walk_stores (rtx x, void (*fun) (rtx, rtx, void *), void *data) 6373 { 6374 int i; 6375 6376 if (GET_CODE (x) == COND_EXEC) 6377 x = COND_EXEC_CODE (x); 6378 6379 if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER) 6380 { 6381 rtx dest = SET_DEST (x); 6382 6383 while ((GET_CODE (dest) == SUBREG 6384 && (!REG_P (SUBREG_REG (dest)) 6385 || REGNO (SUBREG_REG (dest)) >= FIRST_PSEUDO_REGISTER)) 6386 || GET_CODE (dest) == ZERO_EXTRACT 6387 || GET_CODE (dest) == STRICT_LOW_PART) 6388 dest = XEXP (dest, 0); 6389 6390 /* If we have a PARALLEL, SET_DEST is a list of EXPR_LIST expressions, 6391 each of whose first operand is a register. */ 6392 if (GET_CODE (dest) == PARALLEL) 6393 { 6394 for (i = XVECLEN (dest, 0) - 1; i >= 0; i--) 6395 if (XEXP (XVECEXP (dest, 0, i), 0) != 0) 6396 (*fun) (XEXP (XVECEXP (dest, 0, i), 0), x, data); 6397 } 6398 else 6399 (*fun) (dest, x, data); 6400 } 6401 6402 else if (GET_CODE (x) == PARALLEL) 6403 for (i = XVECLEN (x, 0) - 1; i >= 0; i--) 6404 walk_stores (XVECEXP (x, 0, i), fun, data); 6405 } 6406 6407 static bool 6408 arc_pass_by_reference (cumulative_args_t ca_v ATTRIBUTE_UNUSED, 6409 machine_mode mode ATTRIBUTE_UNUSED, 6410 const_tree type, 6411 bool named ATTRIBUTE_UNUSED) 6412 { 6413 return (type != 0 6414 && (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST 6415 || TREE_ADDRESSABLE (type))); 6416 } 6417 6418 /* Implement TARGET_CAN_USE_DOLOOP_P. */ 6419 6420 static bool 6421 arc_can_use_doloop_p (const widest_int &iterations, const widest_int &, 6422 unsigned int loop_depth, bool entered_at_top) 6423 { 6424 if (loop_depth > 1) 6425 return false; 6426 /* Setting up the loop with two sr instructions costs 6 cycles. */ 6427 if (TARGET_ARC700 6428 && !entered_at_top 6429 && wi::gtu_p (iterations, 0) 6430 && wi::leu_p (iterations, flag_pic ? 6 : 3)) 6431 return false; 6432 return true; 6433 } 6434 6435 /* NULL if INSN insn is valid within a low-overhead loop. 6436 Otherwise return why doloop cannot be applied. */ 6437 6438 static const char * 6439 arc_invalid_within_doloop (const rtx_insn *insn) 6440 { 6441 if (CALL_P (insn)) 6442 return "Function call in the loop."; 6443 return NULL; 6444 } 6445 6446 /* Return true if a load instruction (CONSUMER) uses the same address as a 6447 store instruction (PRODUCER). This function is used to avoid st/ld 6448 address hazard in ARC700 cores. */ 6449 bool 6450 arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer) 6451 { 6452 rtx in_set, out_set; 6453 rtx out_addr, in_addr; 6454 6455 if (!producer) 6456 return false; 6457 6458 if (!consumer) 6459 return false; 6460 6461 /* Peel the producer and the consumer for the address. */ 6462 out_set = single_set (producer); 6463 if (out_set) 6464 { 6465 out_addr = SET_DEST (out_set); 6466 if (!out_addr) 6467 return false; 6468 if (GET_CODE (out_addr) == ZERO_EXTEND 6469 || GET_CODE (out_addr) == SIGN_EXTEND) 6470 out_addr = XEXP (out_addr, 0); 6471 6472 if (!MEM_P (out_addr)) 6473 return false; 6474 6475 in_set = single_set (consumer); 6476 if (in_set) 6477 { 6478 in_addr = SET_SRC (in_set); 6479 if (!in_addr) 6480 return false; 6481 if (GET_CODE (in_addr) == ZERO_EXTEND 6482 || GET_CODE (in_addr) == SIGN_EXTEND) 6483 in_addr = XEXP (in_addr, 0); 6484 6485 if (!MEM_P (in_addr)) 6486 return false; 6487 /* Get rid of the MEM and check if the addresses are 6488 equivalent. */ 6489 in_addr = XEXP (in_addr, 0); 6490 out_addr = XEXP (out_addr, 0); 6491 6492 return exp_equiv_p (in_addr, out_addr, 0, true); 6493 } 6494 } 6495 return false; 6496 } 6497 6498 /* The same functionality as arc_hazard. It is called in machine 6499 reorg before any other optimization. Hence, the NOP size is taken 6500 into account when doing branch shortening. */ 6501 6502 static void 6503 workaround_arc_anomaly (void) 6504 { 6505 rtx_insn *insn, *succ0; 6506 6507 /* For any architecture: call arc_hazard here. */ 6508 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 6509 { 6510 succ0 = next_real_insn (insn); 6511 if (arc_hazard (insn, succ0)) 6512 { 6513 emit_insn_before (gen_nopv (), succ0); 6514 } 6515 } 6516 6517 if (TARGET_ARC700) 6518 { 6519 rtx_insn *succ1; 6520 6521 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 6522 { 6523 succ0 = next_real_insn (insn); 6524 if (arc_store_addr_hazard_p (insn, succ0)) 6525 { 6526 emit_insn_after (gen_nopv (), insn); 6527 emit_insn_after (gen_nopv (), insn); 6528 continue; 6529 } 6530 6531 /* Avoid adding nops if the instruction between the ST and LD is 6532 a call or jump. */ 6533 succ1 = next_real_insn (succ0); 6534 if (succ0 && !JUMP_P (succ0) && !CALL_P (succ0) 6535 && arc_store_addr_hazard_p (insn, succ1)) 6536 emit_insn_after (gen_nopv (), insn); 6537 } 6538 } 6539 } 6540 6541 static int arc_reorg_in_progress = 0; 6542 6543 /* ARC's machince specific reorg function. */ 6544 6545 static void 6546 arc_reorg (void) 6547 { 6548 rtx_insn *insn; 6549 rtx pattern; 6550 rtx pc_target; 6551 long offset; 6552 int changed; 6553 6554 workaround_arc_anomaly (); 6555 6556 cfun->machine->arc_reorg_started = 1; 6557 arc_reorg_in_progress = 1; 6558 6559 /* Link up loop ends with their loop start. */ 6560 { 6561 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 6562 if (GET_CODE (insn) == JUMP_INSN 6563 && recog_memoized (insn) == CODE_FOR_doloop_end_i) 6564 { 6565 rtx_insn *top_label 6566 = as_a <rtx_insn *> (XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 1), 0)); 6567 rtx num = GEN_INT (CODE_LABEL_NUMBER (top_label)); 6568 rtx_insn *lp, *prev = prev_nonnote_insn (top_label); 6569 rtx_insn *lp_simple = NULL; 6570 rtx_insn *next = NULL; 6571 rtx op0 = XEXP (XVECEXP (PATTERN (insn), 0, 1), 0); 6572 int seen_label = 0; 6573 6574 for (lp = prev; 6575 (lp && NONJUMP_INSN_P (lp) 6576 && recog_memoized (lp) != CODE_FOR_doloop_begin_i); 6577 lp = prev_nonnote_insn (lp)) 6578 ; 6579 if (!lp || !NONJUMP_INSN_P (lp) 6580 || dead_or_set_regno_p (lp, LP_COUNT)) 6581 { 6582 HOST_WIDE_INT loop_end_id 6583 = INTVAL (XEXP (XVECEXP (PATTERN (insn), 0, 4), 0)); 6584 6585 for (prev = next = insn, lp = NULL ; prev || next;) 6586 { 6587 if (prev) 6588 { 6589 if (NONJUMP_INSN_P (prev) 6590 && recog_memoized (prev) == CODE_FOR_doloop_begin_i 6591 && (INTVAL (XEXP (XVECEXP (PATTERN (prev), 0, 5), 0)) 6592 == loop_end_id)) 6593 { 6594 lp = prev; 6595 break; 6596 } 6597 else if (LABEL_P (prev)) 6598 seen_label = 1; 6599 prev = prev_nonnote_insn (prev); 6600 } 6601 if (next) 6602 { 6603 if (NONJUMP_INSN_P (next) 6604 && recog_memoized (next) == CODE_FOR_doloop_begin_i 6605 && (INTVAL (XEXP (XVECEXP (PATTERN (next), 0, 5), 0)) 6606 == loop_end_id)) 6607 { 6608 lp = next; 6609 break; 6610 } 6611 next = next_nonnote_insn (next); 6612 } 6613 } 6614 prev = NULL; 6615 } 6616 else 6617 lp_simple = lp; 6618 if (lp && !dead_or_set_regno_p (lp, LP_COUNT)) 6619 { 6620 rtx begin_cnt = XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0); 6621 if (INTVAL (XEXP (XVECEXP (PATTERN (lp), 0, 4), 0))) 6622 /* The loop end insn has been duplicated. That can happen 6623 when there is a conditional block at the very end of 6624 the loop. */ 6625 goto failure; 6626 /* If Register allocation failed to allocate to the right 6627 register, There is no point into teaching reload to 6628 fix this up with reloads, as that would cost more 6629 than using an ordinary core register with the 6630 doloop_fallback pattern. */ 6631 if ((true_regnum (op0) != LP_COUNT || !REG_P (begin_cnt)) 6632 /* Likewise, if the loop setup is evidently inside the loop, 6633 we loose. */ 6634 || (!lp_simple && lp != next && !seen_label)) 6635 { 6636 remove_insn (lp); 6637 goto failure; 6638 } 6639 /* It is common that the optimizers copy the loop count from 6640 another register, and doloop_begin_i is stuck with the 6641 source of the move. Making doloop_begin_i only accept "l" 6642 is nonsentical, as this then makes reload evict the pseudo 6643 used for the loop end. The underlying cause is that the 6644 optimizers don't understand that the register allocation for 6645 doloop_begin_i should be treated as part of the loop. 6646 Try to work around this problem by verifying the previous 6647 move exists. */ 6648 if (true_regnum (begin_cnt) != LP_COUNT) 6649 { 6650 rtx_insn *mov; 6651 rtx set, note; 6652 6653 for (mov = prev_nonnote_insn (lp); mov; 6654 mov = prev_nonnote_insn (mov)) 6655 { 6656 if (!NONJUMP_INSN_P (mov)) 6657 mov = 0; 6658 else if ((set = single_set (mov)) 6659 && rtx_equal_p (SET_SRC (set), begin_cnt) 6660 && rtx_equal_p (SET_DEST (set), op0)) 6661 break; 6662 } 6663 if (mov) 6664 { 6665 XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0) = op0; 6666 note = find_regno_note (lp, REG_DEAD, REGNO (begin_cnt)); 6667 if (note) 6668 remove_note (lp, note); 6669 } 6670 else 6671 { 6672 remove_insn (lp); 6673 goto failure; 6674 } 6675 } 6676 XEXP (XVECEXP (PATTERN (insn), 0, 4), 0) = num; 6677 XEXP (XVECEXP (PATTERN (lp), 0, 4), 0) = num; 6678 if (next == lp) 6679 XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const2_rtx; 6680 else if (!lp_simple) 6681 XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const1_rtx; 6682 else if (prev != lp) 6683 { 6684 remove_insn (lp); 6685 add_insn_after (lp, prev, NULL); 6686 } 6687 if (!lp_simple) 6688 { 6689 XEXP (XVECEXP (PATTERN (lp), 0, 7), 0) 6690 = gen_rtx_LABEL_REF (Pmode, top_label); 6691 add_reg_note (lp, REG_LABEL_OPERAND, top_label); 6692 LABEL_NUSES (top_label)++; 6693 } 6694 /* We can avoid tedious loop start / end setting for empty loops 6695 be merely setting the loop count to its final value. */ 6696 if (next_active_insn (top_label) == insn) 6697 { 6698 rtx lc_set 6699 = gen_rtx_SET (XEXP (XVECEXP (PATTERN (lp), 0, 3), 0), 6700 const0_rtx); 6701 6702 rtx_insn *lc_set_insn = emit_insn_before (lc_set, insn); 6703 delete_insn (lp); 6704 delete_insn (insn); 6705 insn = lc_set_insn; 6706 } 6707 /* If the loop is non-empty with zero length, we can't make it 6708 a zero-overhead loop. That can happen for empty asms. */ 6709 else 6710 { 6711 rtx_insn *scan; 6712 6713 for (scan = top_label; 6714 (scan && scan != insn 6715 && (!NONJUMP_INSN_P (scan) || !get_attr_length (scan))); 6716 scan = NEXT_INSN (scan)); 6717 if (scan == insn) 6718 { 6719 remove_insn (lp); 6720 goto failure; 6721 } 6722 } 6723 } 6724 else 6725 { 6726 /* Sometimes the loop optimizer makes a complete hash of the 6727 loop. If it were only that the loop is not entered at the 6728 top, we could fix this up by setting LP_START with SR . 6729 However, if we can't find the loop begin were it should be, 6730 chances are that it does not even dominate the loop, but is 6731 inside the loop instead. Using SR there would kill 6732 performance. 6733 We use the doloop_fallback pattern here, which executes 6734 in two cycles on the ARC700 when predicted correctly. */ 6735 failure: 6736 if (!REG_P (op0)) 6737 { 6738 rtx op3 = XEXP (XVECEXP (PATTERN (insn), 0, 5), 0); 6739 6740 emit_insn_before (gen_move_insn (op3, op0), insn); 6741 PATTERN (insn) 6742 = gen_doloop_fallback_m (op3, JUMP_LABEL (insn), op0); 6743 } 6744 else 6745 XVEC (PATTERN (insn), 0) 6746 = gen_rtvec (2, XVECEXP (PATTERN (insn), 0, 0), 6747 XVECEXP (PATTERN (insn), 0, 1)); 6748 INSN_CODE (insn) = -1; 6749 } 6750 } 6751 } 6752 6753 /* FIXME: should anticipate ccfsm action, generate special patterns for 6754 to-be-deleted branches that have no delay slot and have at least the 6755 length of the size increase forced on other insns that are conditionalized. 6756 This can also have an insn_list inside that enumerates insns which are 6757 not actually conditionalized because the destinations are dead in the 6758 not-execute case. 6759 Could also tag branches that we want to be unaligned if they get no delay 6760 slot, or even ones that we don't want to do delay slot sheduling for 6761 because we can unalign them. 6762 6763 However, there are cases when conditional execution is only possible after 6764 delay slot scheduling: 6765 6766 - If a delay slot is filled with a nocond/set insn from above, the previous 6767 basic block can become elegible for conditional execution. 6768 - If a delay slot is filled with a nocond insn from the fall-through path, 6769 the branch with that delay slot can become eligble for conditional 6770 execution (however, with the same sort of data flow analysis that dbr 6771 does, we could have figured out before that we don't need to 6772 conditionalize this insn.) 6773 - If a delay slot insn is filled with an insn from the target, the 6774 target label gets its uses decremented (even deleted if falling to zero), 6775 thus possibly creating more condexec opportunities there. 6776 Therefore, we should still be prepared to apply condexec optimization on 6777 non-prepared branches if the size increase of conditionalized insns is no 6778 more than the size saved from eliminating the branch. An invocation option 6779 could also be used to reserve a bit of extra size for condbranches so that 6780 this'll work more often (could also test in arc_reorg if the block is 6781 'close enough' to be eligible for condexec to make this likely, and 6782 estimate required size increase). */ 6783 /* Generate BRcc insns, by combining cmp and Bcc insns wherever possible. */ 6784 if (TARGET_NO_BRCC_SET) 6785 return; 6786 6787 do 6788 { 6789 init_insn_lengths(); 6790 changed = 0; 6791 6792 if (optimize > 1 && !TARGET_NO_COND_EXEC) 6793 { 6794 arc_ifcvt (); 6795 unsigned int flags = pass_data_arc_ifcvt.todo_flags_finish; 6796 df_finish_pass ((flags & TODO_df_verify) != 0); 6797 } 6798 6799 /* Call shorten_branches to calculate the insn lengths. */ 6800 shorten_branches (get_insns()); 6801 cfun->machine->ccfsm_current_insn = NULL_RTX; 6802 6803 if (!INSN_ADDRESSES_SET_P()) 6804 fatal_error (input_location, "Insn addresses not set after shorten_branches"); 6805 6806 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 6807 { 6808 rtx label; 6809 enum attr_type insn_type; 6810 6811 /* If a non-jump insn (or a casesi jump table), continue. */ 6812 if (GET_CODE (insn) != JUMP_INSN || 6813 GET_CODE (PATTERN (insn)) == ADDR_VEC 6814 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) 6815 continue; 6816 6817 /* If we already have a brcc, note if it is suitable for brcc_s. 6818 Be a bit generous with the brcc_s range so that we can take 6819 advantage of any code shortening from delay slot scheduling. */ 6820 if (recog_memoized (insn) == CODE_FOR_cbranchsi4_scratch) 6821 { 6822 rtx pat = PATTERN (insn); 6823 rtx op = XEXP (SET_SRC (XVECEXP (pat, 0, 0)), 0); 6824 rtx *ccp = &XEXP (XVECEXP (pat, 0, 1), 0); 6825 6826 offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn)); 6827 if ((offset >= -140 && offset < 140) 6828 && rtx_equal_p (XEXP (op, 1), const0_rtx) 6829 && compact_register_operand (XEXP (op, 0), VOIDmode) 6830 && equality_comparison_operator (op, VOIDmode)) 6831 PUT_MODE (*ccp, CC_Zmode); 6832 else if (GET_MODE (*ccp) == CC_Zmode) 6833 PUT_MODE (*ccp, CC_ZNmode); 6834 continue; 6835 } 6836 if ((insn_type = get_attr_type (insn)) == TYPE_BRCC 6837 || insn_type == TYPE_BRCC_NO_DELAY_SLOT) 6838 continue; 6839 6840 /* OK. so we have a jump insn. */ 6841 /* We need to check that it is a bcc. */ 6842 /* Bcc => set (pc) (if_then_else ) */ 6843 pattern = PATTERN (insn); 6844 if (GET_CODE (pattern) != SET 6845 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE 6846 || ANY_RETURN_P (XEXP (SET_SRC (pattern), 1))) 6847 continue; 6848 6849 /* Now check if the jump is beyond the s9 range. */ 6850 if (CROSSING_JUMP_P (insn)) 6851 continue; 6852 offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn)); 6853 6854 if(offset > 253 || offset < -254) 6855 continue; 6856 6857 pc_target = SET_SRC (pattern); 6858 6859 /* Avoid FPU instructions. */ 6860 if ((GET_MODE (XEXP (XEXP (pc_target, 0), 0)) == CC_FPUmode) 6861 || (GET_MODE (XEXP (XEXP (pc_target, 0), 0)) == CC_FPU_UNEQmode)) 6862 continue; 6863 6864 /* Now go back and search for the set cc insn. */ 6865 6866 label = XEXP (pc_target, 1); 6867 6868 { 6869 rtx pat; 6870 rtx_insn *scan, *link_insn = NULL; 6871 6872 for (scan = PREV_INSN (insn); 6873 scan && GET_CODE (scan) != CODE_LABEL; 6874 scan = PREV_INSN (scan)) 6875 { 6876 if (! INSN_P (scan)) 6877 continue; 6878 pat = PATTERN (scan); 6879 if (GET_CODE (pat) == SET 6880 && cc_register (SET_DEST (pat), VOIDmode)) 6881 { 6882 link_insn = scan; 6883 break; 6884 } 6885 } 6886 if (!link_insn) 6887 continue; 6888 else 6889 /* Check if this is a data dependency. */ 6890 { 6891 rtx op, cc_clob_rtx, op0, op1, brcc_insn, note; 6892 rtx cmp0, cmp1; 6893 6894 /* Ok this is the set cc. copy args here. */ 6895 op = XEXP (pc_target, 0); 6896 6897 op0 = cmp0 = XEXP (SET_SRC (pat), 0); 6898 op1 = cmp1 = XEXP (SET_SRC (pat), 1); 6899 if (GET_CODE (op0) == ZERO_EXTRACT 6900 && XEXP (op0, 1) == const1_rtx 6901 && (GET_CODE (op) == EQ 6902 || GET_CODE (op) == NE)) 6903 { 6904 /* btst / b{eq,ne} -> bbit{0,1} */ 6905 op0 = XEXP (cmp0, 0); 6906 op1 = XEXP (cmp0, 2); 6907 } 6908 else if (!register_operand (op0, VOIDmode) 6909 || !general_operand (op1, VOIDmode)) 6910 continue; 6911 /* Be careful not to break what cmpsfpx_raw is 6912 trying to create for checking equality of 6913 single-precision floats. */ 6914 else if (TARGET_SPFP 6915 && GET_MODE (op0) == SFmode 6916 && GET_MODE (op1) == SFmode) 6917 continue; 6918 6919 /* None of the two cmp operands should be set between the 6920 cmp and the branch. */ 6921 if (reg_set_between_p (op0, link_insn, insn)) 6922 continue; 6923 6924 if (reg_set_between_p (op1, link_insn, insn)) 6925 continue; 6926 6927 /* Since the MODE check does not work, check that this is 6928 CC reg's last set location before insn, and also no 6929 instruction between the cmp and branch uses the 6930 condition codes. */ 6931 if ((reg_set_between_p (SET_DEST (pat), link_insn, insn)) 6932 || (reg_used_between_p (SET_DEST (pat), link_insn, insn))) 6933 continue; 6934 6935 /* CC reg should be dead after insn. */ 6936 if (!find_regno_note (insn, REG_DEAD, CC_REG)) 6937 continue; 6938 6939 op = gen_rtx_fmt_ee (GET_CODE (op), 6940 GET_MODE (op), cmp0, cmp1); 6941 /* If we create a LIMM where there was none before, 6942 we only benefit if we can avoid a scheduling bubble 6943 for the ARC600. Otherwise, we'd only forgo chances 6944 at short insn generation, and risk out-of-range 6945 branches. */ 6946 if (!brcc_nolimm_operator (op, VOIDmode) 6947 && !long_immediate_operand (op1, VOIDmode) 6948 && (TARGET_ARC700 6949 || next_active_insn (link_insn) != insn)) 6950 continue; 6951 6952 /* Emit bbit / brcc (or brcc_s if possible). 6953 CC_Zmode indicates that brcc_s is possible. */ 6954 6955 if (op0 != cmp0) 6956 cc_clob_rtx = gen_rtx_REG (CC_ZNmode, CC_REG); 6957 else if ((offset >= -140 && offset < 140) 6958 && rtx_equal_p (op1, const0_rtx) 6959 && compact_register_operand (op0, VOIDmode) 6960 && (GET_CODE (op) == EQ 6961 || GET_CODE (op) == NE)) 6962 cc_clob_rtx = gen_rtx_REG (CC_Zmode, CC_REG); 6963 else 6964 cc_clob_rtx = gen_rtx_REG (CCmode, CC_REG); 6965 6966 brcc_insn 6967 = gen_rtx_IF_THEN_ELSE (VOIDmode, op, label, pc_rtx); 6968 brcc_insn = gen_rtx_SET (pc_rtx, brcc_insn); 6969 cc_clob_rtx = gen_rtx_CLOBBER (VOIDmode, cc_clob_rtx); 6970 brcc_insn 6971 = gen_rtx_PARALLEL 6972 (VOIDmode, gen_rtvec (2, brcc_insn, cc_clob_rtx)); 6973 brcc_insn = emit_jump_insn_before (brcc_insn, insn); 6974 6975 JUMP_LABEL (brcc_insn) = JUMP_LABEL (insn); 6976 note = find_reg_note (insn, REG_BR_PROB, 0); 6977 if (note) 6978 { 6979 XEXP (note, 1) = REG_NOTES (brcc_insn); 6980 REG_NOTES (brcc_insn) = note; 6981 } 6982 note = find_reg_note (link_insn, REG_DEAD, op0); 6983 if (note) 6984 { 6985 remove_note (link_insn, note); 6986 XEXP (note, 1) = REG_NOTES (brcc_insn); 6987 REG_NOTES (brcc_insn) = note; 6988 } 6989 note = find_reg_note (link_insn, REG_DEAD, op1); 6990 if (note) 6991 { 6992 XEXP (note, 1) = REG_NOTES (brcc_insn); 6993 REG_NOTES (brcc_insn) = note; 6994 } 6995 6996 changed = 1; 6997 6998 /* Delete the bcc insn. */ 6999 set_insn_deleted (insn); 7000 7001 /* Delete the cmp insn. */ 7002 set_insn_deleted (link_insn); 7003 7004 } 7005 } 7006 } 7007 /* Clear out insn_addresses. */ 7008 INSN_ADDRESSES_FREE (); 7009 7010 } while (changed); 7011 7012 if (INSN_ADDRESSES_SET_P()) 7013 fatal_error (input_location, "insn addresses not freed"); 7014 7015 arc_reorg_in_progress = 0; 7016 } 7017 7018 /* Check if the operands are valid for BRcc.d generation 7019 Valid Brcc.d patterns are 7020 Brcc.d b, c, s9 7021 Brcc.d b, u6, s9 7022 7023 For cc={GT, LE, GTU, LEU}, u6=63 can not be allowed, 7024 since they are encoded by the assembler as {GE, LT, HS, LS} 64, which 7025 does not have a delay slot 7026 7027 Assumed precondition: Second operand is either a register or a u6 value. */ 7028 7029 bool 7030 valid_brcc_with_delay_p (rtx *operands) 7031 { 7032 if (optimize_size && GET_MODE (operands[4]) == CC_Zmode) 7033 return false; 7034 return brcc_nolimm_operator (operands[0], VOIDmode); 7035 } 7036 7037 /* ??? Hack. This should no really be here. See PR32143. */ 7038 static bool 7039 arc_decl_anon_ns_mem_p (const_tree decl) 7040 { 7041 while (1) 7042 { 7043 if (decl == NULL_TREE || decl == error_mark_node) 7044 return false; 7045 if (TREE_CODE (decl) == NAMESPACE_DECL 7046 && DECL_NAME (decl) == NULL_TREE) 7047 return true; 7048 /* Classes and namespaces inside anonymous namespaces have 7049 TREE_PUBLIC == 0, so we can shortcut the search. */ 7050 else if (TYPE_P (decl)) 7051 return (TREE_PUBLIC (TYPE_NAME (decl)) == 0); 7052 else if (TREE_CODE (decl) == NAMESPACE_DECL) 7053 return (TREE_PUBLIC (decl) == 0); 7054 else 7055 decl = DECL_CONTEXT (decl); 7056 } 7057 } 7058 7059 /* Implement TARGET_IN_SMALL_DATA_P. Return true if it would be safe to 7060 access DECL using %gp_rel(...)($gp). */ 7061 7062 static bool 7063 arc_in_small_data_p (const_tree decl) 7064 { 7065 HOST_WIDE_INT size; 7066 7067 if (TREE_CODE (decl) == STRING_CST || TREE_CODE (decl) == FUNCTION_DECL) 7068 return false; 7069 7070 7071 /* We don't yet generate small-data references for -mabicalls. See related 7072 -G handling in override_options. */ 7073 if (TARGET_NO_SDATA_SET) 7074 return false; 7075 7076 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl) != 0) 7077 { 7078 const char *name; 7079 7080 /* Reject anything that isn't in a known small-data section. */ 7081 name = DECL_SECTION_NAME (decl); 7082 if (strcmp (name, ".sdata") != 0 && strcmp (name, ".sbss") != 0) 7083 return false; 7084 7085 /* If a symbol is defined externally, the assembler will use the 7086 usual -G rules when deciding how to implement macros. */ 7087 if (!DECL_EXTERNAL (decl)) 7088 return true; 7089 } 7090 /* Only global variables go into sdata section for now. */ 7091 else if (1) 7092 { 7093 /* Don't put constants into the small data section: we want them 7094 to be in ROM rather than RAM. */ 7095 if (TREE_CODE (decl) != VAR_DECL) 7096 return false; 7097 7098 if (TREE_READONLY (decl) 7099 && !TREE_SIDE_EFFECTS (decl) 7100 && (!DECL_INITIAL (decl) || TREE_CONSTANT (DECL_INITIAL (decl)))) 7101 return false; 7102 7103 /* TREE_PUBLIC might change after the first call, because of the patch 7104 for PR19238. */ 7105 if (default_binds_local_p_1 (decl, 1) 7106 || arc_decl_anon_ns_mem_p (decl)) 7107 return false; 7108 7109 /* To ensure -mvolatile-cache works 7110 ld.di does not have a gp-relative variant. */ 7111 if (TREE_THIS_VOLATILE (decl)) 7112 return false; 7113 } 7114 7115 /* Disable sdata references to weak variables. */ 7116 if (DECL_WEAK (decl)) 7117 return false; 7118 7119 size = int_size_in_bytes (TREE_TYPE (decl)); 7120 7121 /* if (AGGREGATE_TYPE_P (TREE_TYPE (decl))) */ 7122 /* return false; */ 7123 7124 /* Allow only <=4B long data types into sdata. */ 7125 return (size > 0 && size <= 4); 7126 } 7127 7128 /* Return true if X is a small data address that can be rewritten 7129 as a gp+symref. */ 7130 7131 static bool 7132 arc_rewrite_small_data_p (const_rtx x) 7133 { 7134 if (GET_CODE (x) == CONST) 7135 x = XEXP (x, 0); 7136 7137 if (GET_CODE (x) == PLUS) 7138 { 7139 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 7140 x = XEXP (x, 0); 7141 } 7142 7143 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_SMALL_P (x)) 7144 { 7145 gcc_assert (SYMBOL_REF_TLS_MODEL (x) == 0); 7146 return true; 7147 } 7148 return false; 7149 } 7150 7151 /* If possible, rewrite OP so that it refers to small data using 7152 explicit relocations. */ 7153 7154 rtx 7155 arc_rewrite_small_data (rtx op) 7156 { 7157 op = copy_insn (op); 7158 subrtx_ptr_iterator::array_type array; 7159 FOR_EACH_SUBRTX_PTR (iter, array, &op, ALL) 7160 { 7161 rtx *loc = *iter; 7162 if (arc_rewrite_small_data_p (*loc)) 7163 { 7164 gcc_assert (SDATA_BASE_REGNUM == PIC_OFFSET_TABLE_REGNUM); 7165 *loc = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, *loc); 7166 if (loc != &op) 7167 { 7168 if (GET_CODE (op) == MEM && &XEXP (op, 0) == loc) 7169 ; /* OK. */ 7170 else if (GET_CODE (op) == MEM 7171 && GET_CODE (XEXP (op, 0)) == PLUS 7172 && GET_CODE (XEXP (XEXP (op, 0), 0)) == MULT) 7173 *loc = force_reg (Pmode, *loc); 7174 else 7175 gcc_unreachable (); 7176 } 7177 iter.skip_subrtxes (); 7178 } 7179 else if (GET_CODE (*loc) == PLUS 7180 && rtx_equal_p (XEXP (*loc, 0), pic_offset_table_rtx)) 7181 iter.skip_subrtxes (); 7182 } 7183 return op; 7184 } 7185 7186 /* Return true if OP refers to small data symbols directly, not through 7187 a PLUS. */ 7188 7189 bool 7190 small_data_pattern (rtx op, machine_mode) 7191 { 7192 if (GET_CODE (op) == SEQUENCE) 7193 return false; 7194 subrtx_iterator::array_type array; 7195 FOR_EACH_SUBRTX (iter, array, op, ALL) 7196 { 7197 const_rtx x = *iter; 7198 if (GET_CODE (x) == PLUS 7199 && rtx_equal_p (XEXP (x, 0), pic_offset_table_rtx)) 7200 iter.skip_subrtxes (); 7201 else if (arc_rewrite_small_data_p (x)) 7202 return true; 7203 } 7204 return false; 7205 } 7206 7207 /* Return true if OP is an acceptable memory operand for ARCompact 7208 16-bit gp-relative load instructions. 7209 op shd look like : [r26, symref@sda] 7210 i.e. (mem (plus (reg 26) (symref with smalldata flag set)) 7211 */ 7212 /* volatile cache option still to be handled. */ 7213 7214 bool 7215 compact_sda_memory_operand (rtx op, machine_mode mode) 7216 { 7217 rtx addr; 7218 int size; 7219 7220 /* Eliminate non-memory operations. */ 7221 if (GET_CODE (op) != MEM) 7222 return false; 7223 7224 if (mode == VOIDmode) 7225 mode = GET_MODE (op); 7226 7227 size = GET_MODE_SIZE (mode); 7228 7229 /* dword operations really put out 2 instructions, so eliminate them. */ 7230 if (size > UNITS_PER_WORD) 7231 return false; 7232 7233 /* Decode the address now. */ 7234 addr = XEXP (op, 0); 7235 7236 return LEGITIMATE_SMALL_DATA_ADDRESS_P (addr); 7237 } 7238 7239 /* Implement ASM_OUTPUT_ALIGNED_DECL_LOCAL. */ 7240 7241 void 7242 arc_asm_output_aligned_decl_local (FILE * stream, tree decl, const char * name, 7243 unsigned HOST_WIDE_INT size, 7244 unsigned HOST_WIDE_INT align, 7245 unsigned HOST_WIDE_INT globalize_p) 7246 { 7247 int in_small_data = arc_in_small_data_p (decl); 7248 7249 if (in_small_data) 7250 switch_to_section (get_named_section (NULL, ".sbss", 0)); 7251 /* named_section (0,".sbss",0); */ 7252 else 7253 switch_to_section (bss_section); 7254 7255 if (globalize_p) 7256 (*targetm.asm_out.globalize_label) (stream, name); 7257 7258 ASM_OUTPUT_ALIGN (stream, floor_log2 ((align) / BITS_PER_UNIT)); 7259 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object"); 7260 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size); 7261 ASM_OUTPUT_LABEL (stream, name); 7262 7263 if (size != 0) 7264 ASM_OUTPUT_SKIP (stream, size); 7265 } 7266 7267 static bool 7268 arc_preserve_reload_p (rtx in) 7269 { 7270 return (GET_CODE (in) == PLUS 7271 && RTX_OK_FOR_BASE_P (XEXP (in, 0), true) 7272 && CONST_INT_P (XEXP (in, 1)) 7273 && !((INTVAL (XEXP (in, 1)) & 511))); 7274 } 7275 7276 int 7277 arc_register_move_cost (machine_mode, 7278 enum reg_class from_class, enum reg_class to_class) 7279 { 7280 /* The ARC600 has no bypass for extension registers, hence a nop might be 7281 needed to be inserted after a write so that reads are safe. */ 7282 if (TARGET_ARC600) 7283 { 7284 if (to_class == MPY_WRITABLE_CORE_REGS) 7285 return 3; 7286 /* Instructions modifying LP_COUNT need 4 additional cycles before 7287 the register will actually contain the value. */ 7288 else if (to_class == LPCOUNT_REG) 7289 return 6; 7290 else if (to_class == WRITABLE_CORE_REGS) 7291 return 6; 7292 } 7293 7294 /* The ARC700 stalls for 3 cycles when *reading* from lp_count. */ 7295 if (TARGET_ARC700 7296 && (from_class == LPCOUNT_REG || from_class == ALL_CORE_REGS 7297 || from_class == WRITABLE_CORE_REGS)) 7298 return 8; 7299 7300 /* Force an attempt to 'mov Dy,Dx' to spill. */ 7301 if ((TARGET_ARC700 || TARGET_EM) && TARGET_DPFP 7302 && from_class == DOUBLE_REGS && to_class == DOUBLE_REGS) 7303 return 100; 7304 7305 return 2; 7306 } 7307 7308 /* Emit code for an addsi3 instruction with OPERANDS. 7309 COND_P indicates if this will use conditional execution. 7310 Return the length of the instruction. 7311 If OUTPUT_P is false, don't actually output the instruction, just return 7312 its length. */ 7313 int 7314 arc_output_addsi (rtx *operands, bool cond_p, bool output_p) 7315 { 7316 char format[35]; 7317 7318 int match = operands_match_p (operands[0], operands[1]); 7319 int match2 = operands_match_p (operands[0], operands[2]); 7320 int intval = (REG_P (operands[2]) ? 1 7321 : CONST_INT_P (operands[2]) ? INTVAL (operands[2]) : 0xbadc057); 7322 int neg_intval = -intval; 7323 int short_0 = satisfies_constraint_Rcq (operands[0]); 7324 int short_p = (!cond_p && short_0 && satisfies_constraint_Rcq (operands[1])); 7325 int ret = 0; 7326 7327 #define ADDSI_OUTPUT1(FORMAT) do {\ 7328 if (output_p) \ 7329 output_asm_insn (FORMAT, operands);\ 7330 return ret; \ 7331 } while (0) 7332 #define ADDSI_OUTPUT(LIST) do {\ 7333 if (output_p) \ 7334 sprintf LIST;\ 7335 ADDSI_OUTPUT1 (format);\ 7336 return ret; \ 7337 } while (0) 7338 7339 /* First try to emit a 16 bit insn. */ 7340 ret = 2; 7341 if (!cond_p 7342 /* If we are actually about to output this insn, don't try a 16 bit 7343 variant if we already decided that we don't want that 7344 (I.e. we upsized this insn to align some following insn.) 7345 E.g. add_s r0,sp,70 is 16 bit, but add r0,sp,70 requires a LIMM - 7346 but add1 r0,sp,35 doesn't. */ 7347 && (!output_p || (get_attr_length (current_output_insn) & 2))) 7348 { 7349 if (short_p 7350 && (REG_P (operands[2]) 7351 ? (match || satisfies_constraint_Rcq (operands[2])) 7352 : (unsigned) intval <= (match ? 127 : 7))) 7353 ADDSI_OUTPUT1 ("add%? %0,%1,%2"); 7354 if (short_0 && REG_P (operands[1]) && match2) 7355 ADDSI_OUTPUT1 ("add%? %0,%2,%1"); 7356 if ((short_0 || REGNO (operands[0]) == STACK_POINTER_REGNUM) 7357 && REGNO (operands[1]) == STACK_POINTER_REGNUM && !(intval & ~124)) 7358 ADDSI_OUTPUT1 ("add%? %0,%1,%2"); 7359 7360 if ((short_p && (unsigned) neg_intval <= (match ? 31 : 7)) 7361 || (REGNO (operands[0]) == STACK_POINTER_REGNUM 7362 && match && !(neg_intval & ~124))) 7363 ADDSI_OUTPUT1 ("sub%? %0,%1,%n2"); 7364 7365 if (REG_P(operands[0]) && REG_P(operands[1]) 7366 && (REGNO(operands[0]) <= 31) && (REGNO(operands[0]) == REGNO(operands[1])) 7367 && CONST_INT_P (operands[2]) && ( (intval>= -1) && (intval <= 6))) 7368 ADDSI_OUTPUT1 ("add%? %0,%1,%2"); 7369 7370 if (TARGET_CODE_DENSITY && REG_P(operands[0]) && REG_P(operands[1]) 7371 && ((REGNO(operands[0]) == 0) || (REGNO(operands[0]) == 1)) 7372 && satisfies_constraint_Rcq (operands[1]) 7373 && satisfies_constraint_L (operands[2])) 7374 ADDSI_OUTPUT1 ("add%? %0,%1,%2 ;3"); 7375 } 7376 7377 /* Now try to emit a 32 bit insn without long immediate. */ 7378 ret = 4; 7379 if (!match && match2 && REG_P (operands[1])) 7380 ADDSI_OUTPUT1 ("add%? %0,%2,%1"); 7381 if (match || !cond_p) 7382 { 7383 int limit = (match && !cond_p) ? 0x7ff : 0x3f; 7384 int range_factor = neg_intval & intval; 7385 int shift; 7386 7387 if (intval == (HOST_WIDE_INT) (HOST_WIDE_INT_M1U << 31)) 7388 ADDSI_OUTPUT1 ("bxor%? %0,%1,31"); 7389 7390 /* If we can use a straight add / sub instead of a {add,sub}[123] of 7391 same size, do, so - the insn latency is lower. */ 7392 /* -0x800 is a 12-bit constant for add /add3 / sub / sub3, but 7393 0x800 is not. */ 7394 if ((intval >= 0 && intval <= limit) 7395 || (intval == -0x800 && limit == 0x7ff)) 7396 ADDSI_OUTPUT1 ("add%? %0,%1,%2"); 7397 else if ((intval < 0 && neg_intval <= limit) 7398 || (intval == 0x800 && limit == 0x7ff)) 7399 ADDSI_OUTPUT1 ("sub%? %0,%1,%n2"); 7400 shift = range_factor >= 8 ? 3 : (range_factor >> 1); 7401 gcc_assert (shift == 0 || shift == 1 || shift == 2 || shift == 3); 7402 gcc_assert ((((1 << shift) - 1) & intval) == 0); 7403 if (((intval < 0 && intval != -0x4000) 7404 /* sub[123] is slower than add_s / sub, only use it if it 7405 avoids a long immediate. */ 7406 && neg_intval <= limit << shift) 7407 || (intval == 0x4000 && limit == 0x7ff)) 7408 ADDSI_OUTPUT ((format, "sub%d%%? %%0,%%1,%d", 7409 shift, neg_intval >> shift)); 7410 else if ((intval >= 0 && intval <= limit << shift) 7411 || (intval == -0x4000 && limit == 0x7ff)) 7412 ADDSI_OUTPUT ((format, "add%d%%? %%0,%%1,%d", shift, intval >> shift)); 7413 } 7414 /* Try to emit a 16 bit opcode with long immediate. */ 7415 ret = 6; 7416 if (short_p && match) 7417 ADDSI_OUTPUT1 ("add%? %0,%1,%S2"); 7418 7419 /* We have to use a 32 bit opcode, and with a long immediate. */ 7420 ret = 8; 7421 ADDSI_OUTPUT1 (intval < 0 ? "sub%? %0,%1,%n2" : "add%? %0,%1,%S2"); 7422 } 7423 7424 /* Emit code for an commutative_cond_exec instruction with OPERANDS. 7425 Return the length of the instruction. 7426 If OUTPUT_P is false, don't actually output the instruction, just return 7427 its length. */ 7428 int 7429 arc_output_commutative_cond_exec (rtx *operands, bool output_p) 7430 { 7431 enum rtx_code commutative_op = GET_CODE (operands[3]); 7432 const char *pat = NULL; 7433 7434 /* Canonical rtl should not have a constant in the first operand position. */ 7435 gcc_assert (!CONSTANT_P (operands[1])); 7436 7437 switch (commutative_op) 7438 { 7439 case AND: 7440 if (satisfies_constraint_C1p (operands[2])) 7441 pat = "bmsk%? %0,%1,%Z2"; 7442 else if (satisfies_constraint_C2p (operands[2])) 7443 { 7444 operands[2] = GEN_INT ((~INTVAL (operands[2]))); 7445 pat = "bmskn%? %0,%1,%Z2"; 7446 } 7447 else if (satisfies_constraint_Ccp (operands[2])) 7448 pat = "bclr%? %0,%1,%M2"; 7449 else if (satisfies_constraint_CnL (operands[2])) 7450 pat = "bic%? %0,%1,%n2-1"; 7451 break; 7452 case IOR: 7453 if (satisfies_constraint_C0p (operands[2])) 7454 pat = "bset%? %0,%1,%z2"; 7455 break; 7456 case XOR: 7457 if (satisfies_constraint_C0p (operands[2])) 7458 pat = "bxor%? %0,%1,%z2"; 7459 break; 7460 case PLUS: 7461 return arc_output_addsi (operands, true, output_p); 7462 default: break; 7463 } 7464 if (output_p) 7465 output_asm_insn (pat ? pat : "%O3.%d5 %0,%1,%2", operands); 7466 if (pat || REG_P (operands[2]) || satisfies_constraint_L (operands[2])) 7467 return 4; 7468 return 8; 7469 } 7470 7471 /* Helper function of arc_expand_movmem. ADDR points to a chunk of memory. 7472 Emit code and return an potentially modified address such that offsets 7473 up to SIZE are can be added to yield a legitimate address. 7474 if REUSE is set, ADDR is a register that may be modified. */ 7475 7476 static rtx 7477 force_offsettable (rtx addr, HOST_WIDE_INT size, bool reuse) 7478 { 7479 rtx base = addr; 7480 rtx offs = const0_rtx; 7481 7482 if (GET_CODE (base) == PLUS) 7483 { 7484 offs = XEXP (base, 1); 7485 base = XEXP (base, 0); 7486 } 7487 if (!REG_P (base) 7488 || (REGNO (base) != STACK_POINTER_REGNUM 7489 && REGNO_PTR_FRAME_P (REGNO (base))) 7490 || !CONST_INT_P (offs) || !SMALL_INT (INTVAL (offs)) 7491 || !SMALL_INT (INTVAL (offs) + size)) 7492 { 7493 if (reuse) 7494 emit_insn (gen_add2_insn (addr, offs)); 7495 else 7496 addr = copy_to_mode_reg (Pmode, addr); 7497 } 7498 return addr; 7499 } 7500 7501 /* Like move_by_pieces, but take account of load latency, and actual 7502 offset ranges. Return true on success. */ 7503 7504 bool 7505 arc_expand_movmem (rtx *operands) 7506 { 7507 rtx dst = operands[0]; 7508 rtx src = operands[1]; 7509 rtx dst_addr, src_addr; 7510 HOST_WIDE_INT size; 7511 int align = INTVAL (operands[3]); 7512 unsigned n_pieces; 7513 int piece = align; 7514 rtx store[2]; 7515 rtx tmpx[2]; 7516 int i; 7517 7518 if (!CONST_INT_P (operands[2])) 7519 return false; 7520 size = INTVAL (operands[2]); 7521 /* move_by_pieces_ninsns is static, so we can't use it. */ 7522 if (align >= 4) 7523 { 7524 if (TARGET_LL64) 7525 n_pieces = (size + 4) / 8U + ((size >> 1) & 1) + (size & 1); 7526 else 7527 n_pieces = (size + 2) / 4U + (size & 1); 7528 } 7529 else if (align == 2) 7530 n_pieces = (size + 1) / 2U; 7531 else 7532 n_pieces = size; 7533 if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15)) 7534 return false; 7535 /* Force 32 bit aligned and larger datum to use 64 bit transfers, if 7536 possible. */ 7537 if (TARGET_LL64 && (piece >= 4) && (size >= 8)) 7538 piece = 8; 7539 else if (piece > 4) 7540 piece = 4; 7541 dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0); 7542 src_addr = force_offsettable (XEXP (operands[1], 0), size, 0); 7543 store[0] = store[1] = NULL_RTX; 7544 tmpx[0] = tmpx[1] = NULL_RTX; 7545 for (i = 0; size > 0; i ^= 1, size -= piece) 7546 { 7547 rtx tmp; 7548 machine_mode mode; 7549 7550 while (piece > size) 7551 piece >>= 1; 7552 mode = smallest_mode_for_size (piece * BITS_PER_UNIT, MODE_INT); 7553 /* If we don't re-use temporaries, the scheduler gets carried away, 7554 and the register pressure gets unnecessarily high. */ 7555 if (0 && tmpx[i] && GET_MODE (tmpx[i]) == mode) 7556 tmp = tmpx[i]; 7557 else 7558 tmpx[i] = tmp = gen_reg_rtx (mode); 7559 dst_addr = force_offsettable (dst_addr, piece, 1); 7560 src_addr = force_offsettable (src_addr, piece, 1); 7561 if (store[i]) 7562 emit_insn (store[i]); 7563 emit_move_insn (tmp, change_address (src, mode, src_addr)); 7564 store[i] = gen_move_insn (change_address (dst, mode, dst_addr), tmp); 7565 dst_addr = plus_constant (Pmode, dst_addr, piece); 7566 src_addr = plus_constant (Pmode, src_addr, piece); 7567 } 7568 if (store[i]) 7569 emit_insn (store[i]); 7570 if (store[i^1]) 7571 emit_insn (store[i^1]); 7572 return true; 7573 } 7574 7575 /* Prepare operands for move in MODE. Return true iff the move has 7576 been emitted. */ 7577 7578 bool 7579 prepare_move_operands (rtx *operands, machine_mode mode) 7580 { 7581 /* We used to do this only for MODE_INT Modes, but addresses to floating 7582 point variables may well be in the small data section. */ 7583 if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[0], Pmode)) 7584 operands[0] = arc_rewrite_small_data (operands[0]); 7585 7586 if (mode == SImode && SYMBOLIC_CONST (operands[1])) 7587 { 7588 prepare_pic_move (operands, SImode); 7589 7590 /* Disable any REG_EQUALs associated with the symref 7591 otherwise the optimization pass undoes the work done 7592 here and references the variable directly. */ 7593 } 7594 7595 if (GET_CODE (operands[0]) != MEM 7596 && !TARGET_NO_SDATA_SET 7597 && small_data_pattern (operands[1], Pmode)) 7598 { 7599 /* This is to take care of address calculations involving sdata 7600 variables. */ 7601 operands[1] = arc_rewrite_small_data (operands[1]); 7602 7603 emit_insn (gen_rtx_SET (operands[0],operands[1])); 7604 /* ??? This note is useless, since it only restates the set itself. 7605 We should rather use the original SYMBOL_REF. However, there is 7606 the problem that we are lying to the compiler about these 7607 SYMBOL_REFs to start with. symbol@sda should be encoded specially 7608 so that we can tell it apart from an actual symbol. */ 7609 set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]); 7610 7611 /* Take care of the REG_EQUAL note that will be attached to mark the 7612 output reg equal to the initial symbol_ref after this code is 7613 executed. */ 7614 emit_move_insn (operands[0], operands[0]); 7615 return true; 7616 } 7617 7618 if (MEM_P (operands[0]) 7619 && !(reload_in_progress || reload_completed)) 7620 { 7621 operands[1] = force_reg (mode, operands[1]); 7622 if (!move_dest_operand (operands[0], mode)) 7623 { 7624 rtx addr = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); 7625 /* This is like change_address_1 (operands[0], mode, 0, 1) , 7626 except that we can't use that function because it is static. */ 7627 rtx pat = change_address (operands[0], mode, addr); 7628 MEM_COPY_ATTRIBUTES (pat, operands[0]); 7629 operands[0] = pat; 7630 } 7631 if (!cse_not_expected) 7632 { 7633 rtx pat = XEXP (operands[0], 0); 7634 7635 pat = arc_legitimize_address_0 (pat, pat, mode); 7636 if (pat) 7637 { 7638 pat = change_address (operands[0], mode, pat); 7639 MEM_COPY_ATTRIBUTES (pat, operands[0]); 7640 operands[0] = pat; 7641 } 7642 } 7643 } 7644 7645 if (MEM_P (operands[1]) && !cse_not_expected) 7646 { 7647 rtx pat = XEXP (operands[1], 0); 7648 7649 pat = arc_legitimize_address_0 (pat, pat, mode); 7650 if (pat) 7651 { 7652 pat = change_address (operands[1], mode, pat); 7653 MEM_COPY_ATTRIBUTES (pat, operands[1]); 7654 operands[1] = pat; 7655 } 7656 } 7657 7658 return false; 7659 } 7660 7661 /* Prepare OPERANDS for an extension using CODE to OMODE. 7662 Return true iff the move has been emitted. */ 7663 7664 bool 7665 prepare_extend_operands (rtx *operands, enum rtx_code code, 7666 machine_mode omode) 7667 { 7668 if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[1], Pmode)) 7669 { 7670 /* This is to take care of address calculations involving sdata 7671 variables. */ 7672 operands[1] 7673 = gen_rtx_fmt_e (code, omode, arc_rewrite_small_data (operands[1])); 7674 emit_insn (gen_rtx_SET (operands[0], operands[1])); 7675 set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]); 7676 7677 /* Take care of the REG_EQUAL note that will be attached to mark the 7678 output reg equal to the initial extension after this code is 7679 executed. */ 7680 emit_move_insn (operands[0], operands[0]); 7681 return true; 7682 } 7683 return false; 7684 } 7685 7686 /* Output a library call to a function called FNAME that has been arranged 7687 to be local to any dso. */ 7688 7689 const char * 7690 arc_output_libcall (const char *fname) 7691 { 7692 unsigned len = strlen (fname); 7693 static char buf[64]; 7694 7695 gcc_assert (len < sizeof buf - 35); 7696 if (TARGET_LONG_CALLS_SET 7697 || (TARGET_MEDIUM_CALLS && arc_ccfsm_cond_exec_p ())) 7698 { 7699 if (flag_pic) 7700 sprintf (buf, "add r12,pcl,@%s@pcl\n\tjl%%!%%* [r12]", fname); 7701 else 7702 sprintf (buf, "jl%%! @%s", fname); 7703 } 7704 else 7705 sprintf (buf, "bl%%!%%* @%s", fname); 7706 return buf; 7707 } 7708 7709 /* Return the SImode highpart of the DImode value IN. */ 7710 7711 rtx 7712 disi_highpart (rtx in) 7713 { 7714 return simplify_gen_subreg (SImode, in, DImode, TARGET_BIG_ENDIAN ? 0 : 4); 7715 } 7716 7717 /* Return length adjustment for INSN. 7718 For ARC600: 7719 A write to a core reg greater or equal to 32 must not be immediately 7720 followed by a use. Anticipate the length requirement to insert a nop 7721 between PRED and SUCC to prevent a hazard. */ 7722 7723 static int 7724 arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ) 7725 { 7726 if (!TARGET_ARC600) 7727 return 0; 7728 /* If SUCC is a doloop_end_i with a preceding label, we must output a nop 7729 in front of SUCC anyway, so there will be separation between PRED and 7730 SUCC. */ 7731 if (recog_memoized (succ) == CODE_FOR_doloop_end_i 7732 && LABEL_P (prev_nonnote_insn (succ))) 7733 return 0; 7734 if (recog_memoized (succ) == CODE_FOR_doloop_begin_i) 7735 return 0; 7736 if (GET_CODE (PATTERN (pred)) == SEQUENCE) 7737 pred = as_a <rtx_sequence *> (PATTERN (pred))->insn (1); 7738 if (GET_CODE (PATTERN (succ)) == SEQUENCE) 7739 succ = as_a <rtx_sequence *> (PATTERN (succ))->insn (0); 7740 if (recog_memoized (pred) == CODE_FOR_mulsi_600 7741 || recog_memoized (pred) == CODE_FOR_umul_600 7742 || recog_memoized (pred) == CODE_FOR_mac_600 7743 || recog_memoized (pred) == CODE_FOR_mul64_600 7744 || recog_memoized (pred) == CODE_FOR_mac64_600 7745 || recog_memoized (pred) == CODE_FOR_umul64_600 7746 || recog_memoized (pred) == CODE_FOR_umac64_600) 7747 return 0; 7748 subrtx_iterator::array_type array; 7749 FOR_EACH_SUBRTX (iter, array, PATTERN (pred), NONCONST) 7750 { 7751 const_rtx x = *iter; 7752 switch (GET_CODE (x)) 7753 { 7754 case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC: 7755 break; 7756 default: 7757 /* This is also fine for PRE/POST_MODIFY, because they 7758 contain a SET. */ 7759 continue; 7760 } 7761 rtx dest = XEXP (x, 0); 7762 /* Check if this sets a an extension register. N.B. we use 61 for the 7763 condition codes, which is definitely not an extension register. */ 7764 if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61 7765 /* Check if the same register is used by the PAT. */ 7766 && (refers_to_regno_p 7767 (REGNO (dest), 7768 REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U, 7769 PATTERN (succ), 0))) 7770 return 4; 7771 } 7772 return 0; 7773 } 7774 7775 /* Given a rtx, check if it is an assembly instruction or not. */ 7776 7777 static int 7778 arc_asm_insn_p (rtx x) 7779 { 7780 int i, j; 7781 7782 if (x == 0) 7783 return 0; 7784 7785 switch (GET_CODE (x)) 7786 { 7787 case ASM_OPERANDS: 7788 case ASM_INPUT: 7789 return 1; 7790 7791 case SET: 7792 return arc_asm_insn_p (SET_SRC (x)); 7793 7794 case PARALLEL: 7795 j = 0; 7796 for (i = XVECLEN (x, 0) - 1; i >= 0; i--) 7797 j += arc_asm_insn_p (XVECEXP (x, 0, i)); 7798 if ( j > 0) 7799 return 1; 7800 break; 7801 7802 default: 7803 break; 7804 } 7805 7806 return 0; 7807 } 7808 7809 /* We might have a CALL to a non-returning function before a loop end. 7810 ??? Although the manual says that's OK (the target is outside the 7811 loop, and the loop counter unused there), the assembler barfs on 7812 this for ARC600, so we must insert a nop before such a call too. 7813 For ARC700, and ARCv2 is not allowed to have the last ZOL 7814 instruction a jump to a location where lp_count is modified. */ 7815 7816 static bool 7817 arc_loop_hazard (rtx_insn *pred, rtx_insn *succ) 7818 { 7819 rtx_insn *jump = NULL; 7820 rtx label_rtx = NULL_RTX; 7821 rtx_insn *label = NULL; 7822 basic_block succ_bb; 7823 7824 if (recog_memoized (succ) != CODE_FOR_doloop_end_i) 7825 return false; 7826 7827 /* Phase 1: ARC600 and ARCv2HS doesn't allow any control instruction 7828 (i.e., jump/call) as the last instruction of a ZOL. */ 7829 if (TARGET_ARC600 || TARGET_HS) 7830 if (JUMP_P (pred) || CALL_P (pred) 7831 || arc_asm_insn_p (PATTERN (pred)) 7832 || GET_CODE (PATTERN (pred)) == SEQUENCE) 7833 return true; 7834 7835 /* Phase 2: Any architecture, it is not allowed to have the last ZOL 7836 instruction a jump to a location where lp_count is modified. */ 7837 7838 /* Phase 2a: Dig for the jump instruction. */ 7839 if (JUMP_P (pred)) 7840 jump = pred; 7841 else if (GET_CODE (PATTERN (pred)) == SEQUENCE 7842 && JUMP_P (XVECEXP (PATTERN (pred), 0, 0))) 7843 jump = as_a <rtx_insn *> (XVECEXP (PATTERN (pred), 0, 0)); 7844 else 7845 return false; 7846 7847 /* Phase 2b: Make sure is not a millicode jump. */ 7848 if ((GET_CODE (PATTERN (jump)) == PARALLEL) 7849 && (XVECEXP (PATTERN (jump), 0, 0) == ret_rtx)) 7850 return false; 7851 7852 label_rtx = JUMP_LABEL (jump); 7853 if (!label_rtx) 7854 return false; 7855 7856 /* Phase 2c: Make sure is not a return. */ 7857 if (ANY_RETURN_P (label_rtx)) 7858 return false; 7859 7860 /* Pahse 2d: Go to the target of the jump and check for aliveness of 7861 LP_COUNT register. */ 7862 label = safe_as_a <rtx_insn *> (label_rtx); 7863 succ_bb = BLOCK_FOR_INSN (label); 7864 if (!succ_bb) 7865 { 7866 gcc_assert (NEXT_INSN (label)); 7867 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (label))) 7868 succ_bb = NOTE_BASIC_BLOCK (NEXT_INSN (label)); 7869 else 7870 succ_bb = BLOCK_FOR_INSN (NEXT_INSN (label)); 7871 } 7872 7873 if (succ_bb && REGNO_REG_SET_P (df_get_live_out (succ_bb), LP_COUNT)) 7874 return true; 7875 7876 return false; 7877 } 7878 7879 /* For ARC600: 7880 A write to a core reg greater or equal to 32 must not be immediately 7881 followed by a use. Anticipate the length requirement to insert a nop 7882 between PRED and SUCC to prevent a hazard. */ 7883 7884 int 7885 arc_hazard (rtx_insn *pred, rtx_insn *succ) 7886 { 7887 if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ)) 7888 return 0; 7889 7890 if (arc_loop_hazard (pred, succ)) 7891 return 4; 7892 7893 if (TARGET_ARC600) 7894 return arc600_corereg_hazard (pred, succ); 7895 7896 return 0; 7897 } 7898 7899 /* Return length adjustment for INSN. */ 7900 7901 int 7902 arc_adjust_insn_length (rtx_insn *insn, int len, bool) 7903 { 7904 if (!INSN_P (insn)) 7905 return len; 7906 /* We already handle sequences by ignoring the delay sequence flag. */ 7907 if (GET_CODE (PATTERN (insn)) == SEQUENCE) 7908 return len; 7909 7910 /* It is impossible to jump to the very end of a Zero-Overhead Loop, as 7911 the ZOL mechanism only triggers when advancing to the end address, 7912 so if there's a label at the end of a ZOL, we need to insert a nop. 7913 The ARC600 ZOL also has extra restrictions on jumps at the end of a 7914 loop. */ 7915 if (recog_memoized (insn) == CODE_FOR_doloop_end_i) 7916 { 7917 rtx_insn *prev = prev_nonnote_insn (insn); 7918 7919 return ((LABEL_P (prev) 7920 || (TARGET_ARC600 7921 && (JUMP_P (prev) 7922 || CALL_P (prev) /* Could be a noreturn call. */ 7923 || (NONJUMP_INSN_P (prev) 7924 && GET_CODE (PATTERN (prev)) == SEQUENCE)))) 7925 ? len + 4 : len); 7926 } 7927 7928 /* Check for return with but one preceding insn since function 7929 start / call. */ 7930 if (TARGET_PAD_RETURN 7931 && JUMP_P (insn) 7932 && GET_CODE (PATTERN (insn)) != ADDR_VEC 7933 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC 7934 && get_attr_type (insn) == TYPE_RETURN) 7935 { 7936 rtx_insn *prev = prev_active_insn (insn); 7937 7938 if (!prev || !(prev = prev_active_insn (prev)) 7939 || ((NONJUMP_INSN_P (prev) 7940 && GET_CODE (PATTERN (prev)) == SEQUENCE) 7941 ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0), 7942 NON_SIBCALL) 7943 : CALL_ATTR (prev, NON_SIBCALL))) 7944 return len + 4; 7945 } 7946 if (TARGET_ARC600) 7947 { 7948 rtx_insn *succ = next_real_insn (insn); 7949 7950 /* One the ARC600, a write to an extension register must be separated 7951 from a read. */ 7952 if (succ && INSN_P (succ)) 7953 len += arc600_corereg_hazard (insn, succ); 7954 } 7955 7956 /* Restore extracted operands - otherwise splitters like the addsi3_mixed one 7957 can go awry. */ 7958 extract_constrain_insn_cached (insn); 7959 7960 return len; 7961 } 7962 7963 /* Values for length_sensitive. */ 7964 enum 7965 { 7966 ARC_LS_NONE,// Jcc 7967 ARC_LS_25, // 25 bit offset, B 7968 ARC_LS_21, // 21 bit offset, Bcc 7969 ARC_LS_U13,// 13 bit unsigned offset, LP 7970 ARC_LS_10, // 10 bit offset, B_s, Beq_s, Bne_s 7971 ARC_LS_9, // 9 bit offset, BRcc 7972 ARC_LS_8, // 8 bit offset, BRcc_s 7973 ARC_LS_U7, // 7 bit unsigned offset, LPcc 7974 ARC_LS_7 // 7 bit offset, Bcc_s 7975 }; 7976 7977 /* While the infrastructure patch is waiting for review, duplicate the 7978 struct definitions, to allow this file to compile. */ 7979 #if 1 7980 typedef struct 7981 { 7982 unsigned align_set; 7983 /* Cost as a branch / call target or call return address. */ 7984 int target_cost; 7985 int fallthrough_cost; 7986 int branch_cost; 7987 int length; 7988 /* 0 for not length sensitive, 1 for largest offset range, 7989 * 2 for next smaller etc. */ 7990 unsigned length_sensitive : 8; 7991 bool enabled; 7992 } insn_length_variant_t; 7993 7994 typedef struct insn_length_parameters_s 7995 { 7996 int align_unit_log; 7997 int align_base_log; 7998 int max_variants; 7999 int (*get_variants) (rtx_insn *, int, bool, bool, insn_length_variant_t *); 8000 } insn_length_parameters_t; 8001 8002 static void 8003 arc_insn_length_parameters (insn_length_parameters_t *ilp) ATTRIBUTE_UNUSED; 8004 #endif 8005 8006 static int 8007 arc_get_insn_variants (rtx_insn *insn, int len, bool, bool target_p, 8008 insn_length_variant_t *ilv) 8009 { 8010 if (!NONDEBUG_INSN_P (insn)) 8011 return 0; 8012 enum attr_type type; 8013 /* shorten_branches doesn't take optimize_size into account yet for the 8014 get_variants mechanism, so turn this off for now. */ 8015 if (optimize_size) 8016 return 0; 8017 if (rtx_sequence *pat = dyn_cast <rtx_sequence *> (PATTERN (insn))) 8018 { 8019 /* The interaction of a short delay slot insn with a short branch is 8020 too weird for shorten_branches to piece together, so describe the 8021 entire SEQUENCE. */ 8022 rtx_insn *inner; 8023 if (TARGET_UPSIZE_DBR 8024 && get_attr_length (pat->insn (1)) <= 2 8025 && (((type = get_attr_type (inner = pat->insn (0))) 8026 == TYPE_UNCOND_BRANCH) 8027 || type == TYPE_BRANCH) 8028 && get_attr_delay_slot_filled (inner) == DELAY_SLOT_FILLED_YES) 8029 { 8030 int n_variants 8031 = arc_get_insn_variants (inner, get_attr_length (inner), true, 8032 target_p, ilv+1); 8033 /* The short variant gets split into a higher-cost aligned 8034 and a lower cost unaligned variant. */ 8035 gcc_assert (n_variants); 8036 gcc_assert (ilv[1].length_sensitive == ARC_LS_7 8037 || ilv[1].length_sensitive == ARC_LS_10); 8038 gcc_assert (ilv[1].align_set == 3); 8039 ilv[0] = ilv[1]; 8040 ilv[0].align_set = 1; 8041 ilv[0].branch_cost += 1; 8042 ilv[1].align_set = 2; 8043 n_variants++; 8044 for (int i = 0; i < n_variants; i++) 8045 ilv[i].length += 2; 8046 /* In case an instruction with aligned size is wanted, and 8047 the short variants are unavailable / too expensive, add 8048 versions of long branch + long delay slot. */ 8049 for (int i = 2, end = n_variants; i < end; i++, n_variants++) 8050 { 8051 ilv[n_variants] = ilv[i]; 8052 ilv[n_variants].length += 2; 8053 } 8054 return n_variants; 8055 } 8056 return 0; 8057 } 8058 insn_length_variant_t *first_ilv = ilv; 8059 type = get_attr_type (insn); 8060 bool delay_filled 8061 = (get_attr_delay_slot_filled (insn) == DELAY_SLOT_FILLED_YES); 8062 int branch_align_cost = delay_filled ? 0 : 1; 8063 int branch_unalign_cost = delay_filled ? 0 : TARGET_UNALIGN_BRANCH ? 0 : 1; 8064 /* If the previous instruction is an sfunc call, this insn is always 8065 a target, even though the middle-end is unaware of this. */ 8066 bool force_target = false; 8067 rtx_insn *prev = prev_active_insn (insn); 8068 if (prev && arc_next_active_insn (prev, 0) == insn 8069 && ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE) 8070 ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0), 8071 NON_SIBCALL) 8072 : (CALL_ATTR (prev, NON_SIBCALL) 8073 && NEXT_INSN (PREV_INSN (prev)) == prev))) 8074 force_target = true; 8075 8076 switch (type) 8077 { 8078 case TYPE_BRCC: 8079 /* Short BRCC only comes in no-delay-slot version, and without limm */ 8080 if (!delay_filled) 8081 { 8082 ilv->align_set = 3; 8083 ilv->length = 2; 8084 ilv->branch_cost = 1; 8085 ilv->enabled = (len == 2); 8086 ilv->length_sensitive = ARC_LS_8; 8087 ilv++; 8088 } 8089 /* Fall through. */ 8090 case TYPE_BRCC_NO_DELAY_SLOT: 8091 /* doloop_fallback* patterns are TYPE_BRCC_NO_DELAY_SLOT for 8092 (delay slot) scheduling purposes, but they are longer. */ 8093 if (GET_CODE (PATTERN (insn)) == PARALLEL 8094 && GET_CODE (XVECEXP (PATTERN (insn), 0, 1)) == SET) 8095 return 0; 8096 /* Standard BRCC: 4 bytes, or 8 bytes with limm. */ 8097 ilv->length = ((type == TYPE_BRCC) ? 4 : 8); 8098 ilv->align_set = 3; 8099 ilv->branch_cost = branch_align_cost; 8100 ilv->enabled = (len <= ilv->length); 8101 ilv->length_sensitive = ARC_LS_9; 8102 if ((target_p || force_target) 8103 || (!delay_filled && TARGET_UNALIGN_BRANCH)) 8104 { 8105 ilv[1] = *ilv; 8106 ilv->align_set = 1; 8107 ilv++; 8108 ilv->align_set = 2; 8109 ilv->target_cost = 1; 8110 ilv->branch_cost = branch_unalign_cost; 8111 } 8112 ilv++; 8113 8114 rtx op, op0; 8115 op = XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 0); 8116 op0 = XEXP (op, 0); 8117 8118 if (GET_CODE (op0) == ZERO_EXTRACT 8119 && satisfies_constraint_L (XEXP (op0, 2))) 8120 op0 = XEXP (op0, 0); 8121 if (satisfies_constraint_Rcq (op0)) 8122 { 8123 ilv->length = ((type == TYPE_BRCC) ? 6 : 10); 8124 ilv->align_set = 3; 8125 ilv->branch_cost = 1 + branch_align_cost; 8126 ilv->fallthrough_cost = 1; 8127 ilv->enabled = true; 8128 ilv->length_sensitive = ARC_LS_21; 8129 if (!delay_filled && TARGET_UNALIGN_BRANCH) 8130 { 8131 ilv[1] = *ilv; 8132 ilv->align_set = 1; 8133 ilv++; 8134 ilv->align_set = 2; 8135 ilv->branch_cost = 1 + branch_unalign_cost; 8136 } 8137 ilv++; 8138 } 8139 ilv->length = ((type == TYPE_BRCC) ? 8 : 12); 8140 ilv->align_set = 3; 8141 ilv->branch_cost = 1 + branch_align_cost; 8142 ilv->fallthrough_cost = 1; 8143 ilv->enabled = true; 8144 ilv->length_sensitive = ARC_LS_21; 8145 if ((target_p || force_target) 8146 || (!delay_filled && TARGET_UNALIGN_BRANCH)) 8147 { 8148 ilv[1] = *ilv; 8149 ilv->align_set = 1; 8150 ilv++; 8151 ilv->align_set = 2; 8152 ilv->target_cost = 1; 8153 ilv->branch_cost = 1 + branch_unalign_cost; 8154 } 8155 ilv++; 8156 break; 8157 8158 case TYPE_SFUNC: 8159 ilv->length = 12; 8160 goto do_call; 8161 case TYPE_CALL_NO_DELAY_SLOT: 8162 ilv->length = 8; 8163 goto do_call; 8164 case TYPE_CALL: 8165 ilv->length = 4; 8166 ilv->length_sensitive 8167 = GET_CODE (PATTERN (insn)) == COND_EXEC ? ARC_LS_21 : ARC_LS_25; 8168 do_call: 8169 ilv->align_set = 3; 8170 ilv->fallthrough_cost = branch_align_cost; 8171 ilv->enabled = true; 8172 if ((target_p || force_target) 8173 || (!delay_filled && TARGET_UNALIGN_BRANCH)) 8174 { 8175 ilv[1] = *ilv; 8176 ilv->align_set = 1; 8177 ilv++; 8178 ilv->align_set = 2; 8179 ilv->target_cost = 1; 8180 ilv->fallthrough_cost = branch_unalign_cost; 8181 } 8182 ilv++; 8183 break; 8184 case TYPE_UNCOND_BRANCH: 8185 /* Strictly speaking, this should be ARC_LS_10 for equality comparisons, 8186 but that makes no difference at the moment. */ 8187 ilv->length_sensitive = ARC_LS_7; 8188 ilv[1].length_sensitive = ARC_LS_25; 8189 goto do_branch; 8190 case TYPE_BRANCH: 8191 ilv->length_sensitive = ARC_LS_10; 8192 ilv[1].length_sensitive = ARC_LS_21; 8193 do_branch: 8194 ilv->align_set = 3; 8195 ilv->length = 2; 8196 ilv->branch_cost = branch_align_cost; 8197 ilv->enabled = (len == ilv->length); 8198 ilv++; 8199 ilv->length = 4; 8200 ilv->align_set = 3; 8201 ilv->branch_cost = branch_align_cost; 8202 ilv->enabled = true; 8203 if ((target_p || force_target) 8204 || (!delay_filled && TARGET_UNALIGN_BRANCH)) 8205 { 8206 ilv[1] = *ilv; 8207 ilv->align_set = 1; 8208 ilv++; 8209 ilv->align_set = 2; 8210 ilv->target_cost = 1; 8211 ilv->branch_cost = branch_unalign_cost; 8212 } 8213 ilv++; 8214 break; 8215 case TYPE_JUMP: 8216 return 0; 8217 default: 8218 /* For every short insn, there is generally also a long insn. 8219 trap_s is an exception. */ 8220 if ((len & 2) == 0 || recog_memoized (insn) == CODE_FOR_trap_s) 8221 return 0; 8222 ilv->align_set = 3; 8223 ilv->length = len; 8224 ilv->enabled = 1; 8225 ilv++; 8226 ilv->align_set = 3; 8227 ilv->length = len + 2; 8228 ilv->enabled = 1; 8229 if (target_p || force_target) 8230 { 8231 ilv[1] = *ilv; 8232 ilv->align_set = 1; 8233 ilv++; 8234 ilv->align_set = 2; 8235 ilv->target_cost = 1; 8236 } 8237 ilv++; 8238 } 8239 /* If the previous instruction is an sfunc call, this insn is always 8240 a target, even though the middle-end is unaware of this. 8241 Therefore, if we have a call predecessor, transfer the target cost 8242 to the fallthrough and branch costs. */ 8243 if (force_target) 8244 { 8245 for (insn_length_variant_t *p = first_ilv; p < ilv; p++) 8246 { 8247 p->fallthrough_cost += p->target_cost; 8248 p->branch_cost += p->target_cost; 8249 p->target_cost = 0; 8250 } 8251 } 8252 8253 return ilv - first_ilv; 8254 } 8255 8256 static void 8257 arc_insn_length_parameters (insn_length_parameters_t *ilp) 8258 { 8259 ilp->align_unit_log = 1; 8260 ilp->align_base_log = 1; 8261 ilp->max_variants = 7; 8262 ilp->get_variants = arc_get_insn_variants; 8263 } 8264 8265 /* Return a copy of COND from *STATEP, inverted if that is indicated by the 8266 CC field of *STATEP. */ 8267 8268 static rtx 8269 arc_get_ccfsm_cond (struct arc_ccfsm *statep, bool reverse) 8270 { 8271 rtx cond = statep->cond; 8272 int raw_cc = get_arc_condition_code (cond); 8273 if (reverse) 8274 raw_cc = ARC_INVERSE_CONDITION_CODE (raw_cc); 8275 8276 if (statep->cc == raw_cc) 8277 return copy_rtx (cond); 8278 8279 gcc_assert (ARC_INVERSE_CONDITION_CODE (raw_cc) == statep->cc); 8280 8281 machine_mode ccm = GET_MODE (XEXP (cond, 0)); 8282 enum rtx_code code = reverse_condition (GET_CODE (cond)); 8283 if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode) 8284 code = reverse_condition_maybe_unordered (GET_CODE (cond)); 8285 8286 return gen_rtx_fmt_ee (code, GET_MODE (cond), 8287 copy_rtx (XEXP (cond, 0)), copy_rtx (XEXP (cond, 1))); 8288 } 8289 8290 /* Return version of PAT conditionalized with COND, which is part of INSN. 8291 ANNULLED indicates if INSN is an annulled delay-slot insn. 8292 Register further changes if necessary. */ 8293 static rtx 8294 conditionalize_nonjump (rtx pat, rtx cond, rtx insn, bool annulled) 8295 { 8296 /* For commutative operators, we generally prefer to have 8297 the first source match the destination. */ 8298 if (GET_CODE (pat) == SET) 8299 { 8300 rtx src = SET_SRC (pat); 8301 8302 if (COMMUTATIVE_P (src)) 8303 { 8304 rtx src0 = XEXP (src, 0); 8305 rtx src1 = XEXP (src, 1); 8306 rtx dst = SET_DEST (pat); 8307 8308 if (rtx_equal_p (src1, dst) && !rtx_equal_p (src0, dst) 8309 /* Leave add_n alone - the canonical form is to 8310 have the complex summand first. */ 8311 && REG_P (src0)) 8312 pat = gen_rtx_SET (dst, 8313 gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src), 8314 src1, src0)); 8315 } 8316 } 8317 8318 /* dwarf2out.c:dwarf2out_frame_debug_expr doesn't know 8319 what to do with COND_EXEC. */ 8320 if (RTX_FRAME_RELATED_P (insn)) 8321 { 8322 /* If this is the delay slot insn of an anulled branch, 8323 dwarf2out.c:scan_trace understands the anulling semantics 8324 without the COND_EXEC. */ 8325 gcc_assert (annulled); 8326 rtx note = alloc_reg_note (REG_FRAME_RELATED_EXPR, pat, 8327 REG_NOTES (insn)); 8328 validate_change (insn, ®_NOTES (insn), note, 1); 8329 } 8330 pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat); 8331 return pat; 8332 } 8333 8334 /* Use the ccfsm machinery to do if conversion. */ 8335 8336 static unsigned 8337 arc_ifcvt (void) 8338 { 8339 struct arc_ccfsm *statep = &cfun->machine->ccfsm_current; 8340 basic_block merge_bb = 0; 8341 8342 memset (statep, 0, sizeof *statep); 8343 for (rtx_insn *insn = get_insns (); insn; insn = next_insn (insn)) 8344 { 8345 arc_ccfsm_advance (insn, statep); 8346 8347 switch (statep->state) 8348 { 8349 case 0: 8350 if (JUMP_P (insn)) 8351 merge_bb = 0; 8352 break; 8353 case 1: case 2: 8354 { 8355 /* Deleted branch. */ 8356 gcc_assert (!merge_bb); 8357 merge_bb = BLOCK_FOR_INSN (insn); 8358 basic_block succ_bb 8359 = BLOCK_FOR_INSN (NEXT_INSN (NEXT_INSN (PREV_INSN (insn)))); 8360 arc_ccfsm_post_advance (insn, statep); 8361 gcc_assert (!IN_RANGE (statep->state, 1, 2)); 8362 rtx_insn *seq = NEXT_INSN (PREV_INSN (insn)); 8363 if (seq != insn) 8364 { 8365 rtx slot = XVECEXP (PATTERN (seq), 0, 1); 8366 rtx pat = PATTERN (slot); 8367 if (INSN_ANNULLED_BRANCH_P (insn)) 8368 { 8369 rtx cond 8370 = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (slot)); 8371 pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat); 8372 } 8373 if (!validate_change (seq, &PATTERN (seq), pat, 0)) 8374 gcc_unreachable (); 8375 PUT_CODE (slot, NOTE); 8376 NOTE_KIND (slot) = NOTE_INSN_DELETED; 8377 if (merge_bb && succ_bb) 8378 merge_blocks (merge_bb, succ_bb); 8379 } 8380 else if (merge_bb && succ_bb) 8381 { 8382 set_insn_deleted (insn); 8383 merge_blocks (merge_bb, succ_bb); 8384 } 8385 else 8386 { 8387 PUT_CODE (insn, NOTE); 8388 NOTE_KIND (insn) = NOTE_INSN_DELETED; 8389 } 8390 continue; 8391 } 8392 case 3: 8393 if (LABEL_P (insn) 8394 && statep->target_label == CODE_LABEL_NUMBER (insn)) 8395 { 8396 arc_ccfsm_post_advance (insn, statep); 8397 basic_block succ_bb = BLOCK_FOR_INSN (insn); 8398 if (merge_bb && succ_bb) 8399 merge_blocks (merge_bb, succ_bb); 8400 else if (--LABEL_NUSES (insn) == 0) 8401 { 8402 const char *name = LABEL_NAME (insn); 8403 PUT_CODE (insn, NOTE); 8404 NOTE_KIND (insn) = NOTE_INSN_DELETED_LABEL; 8405 NOTE_DELETED_LABEL_NAME (insn) = name; 8406 } 8407 merge_bb = 0; 8408 continue; 8409 } 8410 /* Fall through. */ 8411 case 4: case 5: 8412 if (!NONDEBUG_INSN_P (insn)) 8413 break; 8414 8415 /* Conditionalized insn. */ 8416 8417 rtx_insn *prev, *pprev; 8418 rtx *patp, pat, cond; 8419 bool annulled; annulled = false; 8420 8421 /* If this is a delay slot insn in a non-annulled branch, 8422 don't conditionalize it. N.B., this should be fine for 8423 conditional return too. However, don't do this for 8424 unconditional branches, as these would be encountered when 8425 processing an 'else' part. */ 8426 prev = PREV_INSN (insn); 8427 pprev = PREV_INSN (prev); 8428 if (pprev && NEXT_INSN (NEXT_INSN (pprev)) == NEXT_INSN (insn) 8429 && JUMP_P (prev) && get_attr_cond (prev) == COND_USE) 8430 { 8431 if (!INSN_ANNULLED_BRANCH_P (prev)) 8432 break; 8433 annulled = true; 8434 } 8435 8436 patp = &PATTERN (insn); 8437 pat = *patp; 8438 cond = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (insn)); 8439 if (NONJUMP_INSN_P (insn) || CALL_P (insn)) 8440 { 8441 /* ??? don't conditionalize if all side effects are dead 8442 in the not-execute case. */ 8443 8444 pat = conditionalize_nonjump (pat, cond, insn, annulled); 8445 } 8446 else if (simplejump_p (insn)) 8447 { 8448 patp = &SET_SRC (pat); 8449 pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, *patp, pc_rtx); 8450 } 8451 else if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn))) 8452 { 8453 pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, pat, pc_rtx); 8454 pat = gen_rtx_SET (pc_rtx, pat); 8455 } 8456 else 8457 gcc_unreachable (); 8458 validate_change (insn, patp, pat, 1); 8459 if (!apply_change_group ()) 8460 gcc_unreachable (); 8461 if (JUMP_P (insn)) 8462 { 8463 rtx_insn *next = next_nonnote_insn (insn); 8464 if (GET_CODE (next) == BARRIER) 8465 delete_insn (next); 8466 if (statep->state == 3) 8467 continue; 8468 } 8469 break; 8470 default: 8471 gcc_unreachable (); 8472 } 8473 arc_ccfsm_post_advance (insn, statep); 8474 } 8475 return 0; 8476 } 8477 8478 /* Find annulled delay insns and convert them to use the appropriate predicate. 8479 This allows branch shortening to size up these insns properly. */ 8480 8481 static unsigned 8482 arc_predicate_delay_insns (void) 8483 { 8484 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn)) 8485 { 8486 rtx pat, jump, dlay, src, cond, *patp; 8487 int reverse; 8488 8489 if (!NONJUMP_INSN_P (insn) 8490 || GET_CODE (pat = PATTERN (insn)) != SEQUENCE) 8491 continue; 8492 jump = XVECEXP (pat, 0, 0); 8493 dlay = XVECEXP (pat, 0, 1); 8494 if (!JUMP_P (jump) || !INSN_ANNULLED_BRANCH_P (jump)) 8495 continue; 8496 /* If the branch insn does the annulling, leave the delay insn alone. */ 8497 if (!TARGET_AT_DBR_CONDEXEC && !INSN_FROM_TARGET_P (dlay)) 8498 continue; 8499 /* ??? Could also leave DLAY un-conditionalized if its target is dead 8500 on the other path. */ 8501 gcc_assert (GET_CODE (PATTERN (jump)) == SET); 8502 gcc_assert (SET_DEST (PATTERN (jump)) == pc_rtx); 8503 src = SET_SRC (PATTERN (jump)); 8504 gcc_assert (GET_CODE (src) == IF_THEN_ELSE); 8505 cond = XEXP (src, 0); 8506 if (XEXP (src, 2) == pc_rtx) 8507 reverse = 0; 8508 else if (XEXP (src, 1) == pc_rtx) 8509 reverse = 1; 8510 else 8511 gcc_unreachable (); 8512 if (reverse != !INSN_FROM_TARGET_P (dlay)) 8513 { 8514 machine_mode ccm = GET_MODE (XEXP (cond, 0)); 8515 enum rtx_code code = reverse_condition (GET_CODE (cond)); 8516 if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode) 8517 code = reverse_condition_maybe_unordered (GET_CODE (cond)); 8518 8519 cond = gen_rtx_fmt_ee (code, GET_MODE (cond), 8520 copy_rtx (XEXP (cond, 0)), 8521 copy_rtx (XEXP (cond, 1))); 8522 } 8523 else 8524 cond = copy_rtx (cond); 8525 patp = &PATTERN (dlay); 8526 pat = *patp; 8527 pat = conditionalize_nonjump (pat, cond, dlay, true); 8528 validate_change (dlay, patp, pat, 1); 8529 if (!apply_change_group ()) 8530 gcc_unreachable (); 8531 } 8532 return 0; 8533 } 8534 8535 /* For ARC600: If a write to a core reg >=32 appears in a delay slot 8536 (other than of a forward brcc), it creates a hazard when there is a read 8537 of the same register at the branch target. We can't know what is at the 8538 branch target of calls, and for branches, we don't really know before the 8539 end of delay slot scheduling, either. Not only can individual instruction 8540 be hoisted out into a delay slot, a basic block can also be emptied this 8541 way, and branch and/or fall through targets be redirected. Hence we don't 8542 want such writes in a delay slot. */ 8543 8544 /* Return nonzreo iff INSN writes to an extension core register. */ 8545 8546 int 8547 arc_write_ext_corereg (rtx insn) 8548 { 8549 subrtx_iterator::array_type array; 8550 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) 8551 { 8552 const_rtx x = *iter; 8553 switch (GET_CODE (x)) 8554 { 8555 case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC: 8556 break; 8557 default: 8558 /* This is also fine for PRE/POST_MODIFY, because they 8559 contain a SET. */ 8560 continue; 8561 } 8562 const_rtx dest = XEXP (x, 0); 8563 if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61) 8564 return 1; 8565 } 8566 return 0; 8567 } 8568 8569 /* This is like the hook, but returns NULL when it can't / won't generate 8570 a legitimate address. */ 8571 8572 static rtx 8573 arc_legitimize_address_0 (rtx x, rtx oldx ATTRIBUTE_UNUSED, 8574 machine_mode mode) 8575 { 8576 rtx addr, inner; 8577 8578 if (flag_pic && SYMBOLIC_CONST (x)) 8579 (x) = arc_legitimize_pic_address (x, 0); 8580 addr = x; 8581 if (GET_CODE (addr) == CONST) 8582 addr = XEXP (addr, 0); 8583 if (GET_CODE (addr) == PLUS 8584 && CONST_INT_P (XEXP (addr, 1)) 8585 && ((GET_CODE (XEXP (addr, 0)) == SYMBOL_REF 8586 && !SYMBOL_REF_FUNCTION_P (XEXP (addr, 0))) 8587 || (REG_P (XEXP (addr, 0)) 8588 && (INTVAL (XEXP (addr, 1)) & 252)))) 8589 { 8590 HOST_WIDE_INT offs, upper; 8591 int size = GET_MODE_SIZE (mode); 8592 8593 offs = INTVAL (XEXP (addr, 1)); 8594 upper = (offs + 256 * size) & ~511 * size; 8595 inner = plus_constant (Pmode, XEXP (addr, 0), upper); 8596 #if 0 /* ??? this produces worse code for EEMBC idctrn01 */ 8597 if (GET_CODE (x) == CONST) 8598 inner = gen_rtx_CONST (Pmode, inner); 8599 #endif 8600 addr = plus_constant (Pmode, force_reg (Pmode, inner), offs - upper); 8601 x = addr; 8602 } 8603 else if (GET_CODE (addr) == SYMBOL_REF && !SYMBOL_REF_FUNCTION_P (addr)) 8604 x = force_reg (Pmode, x); 8605 if (memory_address_p ((machine_mode) mode, x)) 8606 return x; 8607 return NULL_RTX; 8608 } 8609 8610 static rtx 8611 arc_legitimize_address (rtx orig_x, rtx oldx, machine_mode mode) 8612 { 8613 if (GET_CODE (orig_x) == SYMBOL_REF) 8614 { 8615 enum tls_model model = SYMBOL_REF_TLS_MODEL (orig_x); 8616 if (model != 0) 8617 return arc_legitimize_tls_address (orig_x, model); 8618 } 8619 8620 rtx new_x = arc_legitimize_address_0 (orig_x, oldx, mode); 8621 8622 if (new_x) 8623 return new_x; 8624 return orig_x; 8625 } 8626 8627 static rtx 8628 arc_delegitimize_address_0 (rtx x) 8629 { 8630 rtx u, gp, p; 8631 8632 if (GET_CODE (x) == CONST && GET_CODE (u = XEXP (x, 0)) == UNSPEC) 8633 { 8634 if (XINT (u, 1) == ARC_UNSPEC_GOT 8635 || XINT (u, 1) == ARC_UNSPEC_GOTOFFPC) 8636 return XVECEXP (u, 0, 0); 8637 } 8638 else if (GET_CODE (x) == CONST && GET_CODE (p = XEXP (x, 0)) == PLUS 8639 && GET_CODE (u = XEXP (p, 0)) == UNSPEC 8640 && (XINT (u, 1) == ARC_UNSPEC_GOT 8641 || XINT (u, 1) == ARC_UNSPEC_GOTOFFPC)) 8642 return gen_rtx_CONST 8643 (GET_MODE (x), 8644 gen_rtx_PLUS (GET_MODE (p), XVECEXP (u, 0, 0), XEXP (p, 1))); 8645 else if (GET_CODE (x) == PLUS 8646 && ((REG_P (gp = XEXP (x, 0)) 8647 && REGNO (gp) == PIC_OFFSET_TABLE_REGNUM) 8648 || (GET_CODE (gp) == CONST 8649 && GET_CODE (u = XEXP (gp, 0)) == UNSPEC 8650 && XINT (u, 1) == ARC_UNSPEC_GOT 8651 && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF 8652 && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC"))) 8653 && GET_CODE (XEXP (x, 1)) == CONST 8654 && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC 8655 && XINT (u, 1) == ARC_UNSPEC_GOTOFF) 8656 return XVECEXP (u, 0, 0); 8657 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS 8658 && ((REG_P (gp = XEXP (XEXP (x, 0), 1)) 8659 && REGNO (gp) == PIC_OFFSET_TABLE_REGNUM) 8660 || (GET_CODE (gp) == CONST 8661 && GET_CODE (u = XEXP (gp, 0)) == UNSPEC 8662 && XINT (u, 1) == ARC_UNSPEC_GOT 8663 && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF 8664 && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC"))) 8665 && GET_CODE (XEXP (x, 1)) == CONST 8666 && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC 8667 && XINT (u, 1) == ARC_UNSPEC_GOTOFF) 8668 return gen_rtx_PLUS (GET_MODE (x), XEXP (XEXP (x, 0), 0), 8669 XVECEXP (u, 0, 0)); 8670 else if (GET_CODE (x) == PLUS 8671 && (u = arc_delegitimize_address_0 (XEXP (x, 1)))) 8672 return gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0), u); 8673 return NULL_RTX; 8674 } 8675 8676 static rtx 8677 arc_delegitimize_address (rtx x) 8678 { 8679 rtx orig_x = x = delegitimize_mem_from_attrs (x); 8680 if (GET_CODE (x) == MEM) 8681 x = XEXP (x, 0); 8682 x = arc_delegitimize_address_0 (x); 8683 if (x) 8684 { 8685 if (MEM_P (orig_x)) 8686 x = replace_equiv_address_nv (orig_x, x); 8687 return x; 8688 } 8689 return orig_x; 8690 } 8691 8692 /* Return a REG rtx for acc1. N.B. the gcc-internal representation may 8693 differ from the hardware register number in order to allow the generic 8694 code to correctly split the concatenation of acc1 and acc2. */ 8695 8696 rtx 8697 gen_acc1 (void) 8698 { 8699 return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 56: 57); 8700 } 8701 8702 /* Return a REG rtx for acc2. N.B. the gcc-internal representation may 8703 differ from the hardware register number in order to allow the generic 8704 code to correctly split the concatenation of acc1 and acc2. */ 8705 8706 rtx 8707 gen_acc2 (void) 8708 { 8709 return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 57: 56); 8710 } 8711 8712 /* Return a REG rtx for mlo. N.B. the gcc-internal representation may 8713 differ from the hardware register number in order to allow the generic 8714 code to correctly split the concatenation of mhi and mlo. */ 8715 8716 rtx 8717 gen_mlo (void) 8718 { 8719 return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 59: 58); 8720 } 8721 8722 /* Return a REG rtx for mhi. N.B. the gcc-internal representation may 8723 differ from the hardware register number in order to allow the generic 8724 code to correctly split the concatenation of mhi and mlo. */ 8725 8726 rtx 8727 gen_mhi (void) 8728 { 8729 return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 58: 59); 8730 } 8731 8732 /* FIXME: a parameter should be added, and code added to final.c, 8733 to reproduce this functionality in shorten_branches. */ 8734 #if 0 8735 /* Return nonzero iff BRANCH should be unaligned if possible by upsizing 8736 a previous instruction. */ 8737 int 8738 arc_unalign_branch_p (rtx branch) 8739 { 8740 rtx note; 8741 8742 if (!TARGET_UNALIGN_BRANCH) 8743 return 0; 8744 /* Do not do this if we have a filled delay slot. */ 8745 if (get_attr_delay_slot_filled (branch) == DELAY_SLOT_FILLED_YES 8746 && !NEXT_INSN (branch)->deleted ()) 8747 return 0; 8748 note = find_reg_note (branch, REG_BR_PROB, 0); 8749 return (!note 8750 || (arc_unalign_prob_threshold && !br_prob_note_reliable_p (note)) 8751 || INTVAL (XEXP (note, 0)) < arc_unalign_prob_threshold); 8752 } 8753 #endif 8754 8755 /* When estimating sizes during arc_reorg, when optimizing for speed, there 8756 are three reasons why we need to consider branches to be length 6: 8757 - annull-false delay slot insns are implemented using conditional execution, 8758 thus preventing short insn formation where used. 8759 - for ARC600: annul-true delay slot insns are implemented where possible 8760 using conditional execution, preventing short insn formation where used. 8761 - for ARC700: likely or somewhat likely taken branches are made long and 8762 unaligned if possible to avoid branch penalty. */ 8763 8764 bool 8765 arc_branch_size_unknown_p (void) 8766 { 8767 return !optimize_size && arc_reorg_in_progress; 8768 } 8769 8770 /* We are about to output a return insn. Add padding if necessary to avoid 8771 a mispredict. A return could happen immediately after the function 8772 start, but after a call we know that there will be at least a blink 8773 restore. */ 8774 8775 void 8776 arc_pad_return (void) 8777 { 8778 rtx_insn *insn = current_output_insn; 8779 rtx_insn *prev = prev_active_insn (insn); 8780 int want_long; 8781 8782 if (!prev) 8783 { 8784 fputs ("\tnop_s\n", asm_out_file); 8785 cfun->machine->unalign ^= 2; 8786 want_long = 1; 8787 } 8788 /* If PREV is a sequence, we know it must be a branch / jump or a tailcall, 8789 because after a call, we'd have to restore blink first. */ 8790 else if (GET_CODE (PATTERN (prev)) == SEQUENCE) 8791 return; 8792 else 8793 { 8794 want_long = (get_attr_length (prev) == 2); 8795 prev = prev_active_insn (prev); 8796 } 8797 if (!prev 8798 || ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE) 8799 ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0), 8800 NON_SIBCALL) 8801 : CALL_ATTR (prev, NON_SIBCALL))) 8802 { 8803 if (want_long) 8804 cfun->machine->size_reason 8805 = "call/return and return/return must be 6 bytes apart to avoid mispredict"; 8806 else if (TARGET_UNALIGN_BRANCH && cfun->machine->unalign) 8807 { 8808 cfun->machine->size_reason 8809 = "Long unaligned jump avoids non-delay slot penalty"; 8810 want_long = 1; 8811 } 8812 /* Disgorge delay insn, if there is any, and it may be moved. */ 8813 if (final_sequence 8814 /* ??? Annulled would be OK if we can and do conditionalize 8815 the delay slot insn accordingly. */ 8816 && !INSN_ANNULLED_BRANCH_P (insn) 8817 && (get_attr_cond (insn) != COND_USE 8818 || !reg_set_p (gen_rtx_REG (CCmode, CC_REG), 8819 XVECEXP (final_sequence, 0, 1)))) 8820 { 8821 prev = as_a <rtx_insn *> (XVECEXP (final_sequence, 0, 1)); 8822 gcc_assert (!prev_real_insn (insn) 8823 || !arc_hazard (prev_real_insn (insn), prev)); 8824 cfun->machine->force_short_suffix = !want_long; 8825 rtx save_pred = current_insn_predicate; 8826 final_scan_insn (prev, asm_out_file, optimize, 1, NULL); 8827 cfun->machine->force_short_suffix = -1; 8828 prev->set_deleted (); 8829 current_output_insn = insn; 8830 current_insn_predicate = save_pred; 8831 } 8832 else if (want_long) 8833 fputs ("\tnop\n", asm_out_file); 8834 else 8835 { 8836 fputs ("\tnop_s\n", asm_out_file); 8837 cfun->machine->unalign ^= 2; 8838 } 8839 } 8840 return; 8841 } 8842 8843 /* The usual; we set up our machine_function data. */ 8844 8845 static struct machine_function * 8846 arc_init_machine_status (void) 8847 { 8848 struct machine_function *machine; 8849 machine = ggc_cleared_alloc<machine_function> (); 8850 machine->fn_type = ARC_FUNCTION_UNKNOWN; 8851 machine->force_short_suffix = -1; 8852 8853 return machine; 8854 } 8855 8856 /* Implements INIT_EXPANDERS. We just set up to call the above 8857 function. */ 8858 8859 void 8860 arc_init_expanders (void) 8861 { 8862 init_machine_status = arc_init_machine_status; 8863 } 8864 8865 /* Check if OP is a proper parallel of a millicode call pattern. OFFSET 8866 indicates a number of elements to ignore - that allows to have a 8867 sibcall pattern that starts with (return). LOAD_P is zero for store 8868 multiple (for prologues), and one for load multiples (for epilogues), 8869 and two for load multiples where no final clobber of blink is required. 8870 We also skip the first load / store element since this is supposed to 8871 be checked in the instruction pattern. */ 8872 8873 int 8874 arc_check_millicode (rtx op, int offset, int load_p) 8875 { 8876 int len = XVECLEN (op, 0) - offset; 8877 int i; 8878 8879 if (load_p == 2) 8880 { 8881 if (len < 2 || len > 13) 8882 return 0; 8883 load_p = 1; 8884 } 8885 else 8886 { 8887 rtx elt = XVECEXP (op, 0, --len); 8888 8889 if (GET_CODE (elt) != CLOBBER 8890 || !REG_P (XEXP (elt, 0)) 8891 || REGNO (XEXP (elt, 0)) != RETURN_ADDR_REGNUM 8892 || len < 3 || len > 13) 8893 return 0; 8894 } 8895 for (i = 1; i < len; i++) 8896 { 8897 rtx elt = XVECEXP (op, 0, i + offset); 8898 rtx reg, mem, addr; 8899 8900 if (GET_CODE (elt) != SET) 8901 return 0; 8902 mem = XEXP (elt, load_p); 8903 reg = XEXP (elt, 1-load_p); 8904 if (!REG_P (reg) || REGNO (reg) != 13U+i || !MEM_P (mem)) 8905 return 0; 8906 addr = XEXP (mem, 0); 8907 if (GET_CODE (addr) != PLUS 8908 || !rtx_equal_p (stack_pointer_rtx, XEXP (addr, 0)) 8909 || !CONST_INT_P (XEXP (addr, 1)) || INTVAL (XEXP (addr, 1)) != i*4) 8910 return 0; 8911 } 8912 return 1; 8913 } 8914 8915 /* Accessor functions for cfun->machine->unalign. */ 8916 8917 int 8918 arc_get_unalign (void) 8919 { 8920 return cfun->machine->unalign; 8921 } 8922 8923 void 8924 arc_clear_unalign (void) 8925 { 8926 if (cfun) 8927 cfun->machine->unalign = 0; 8928 } 8929 8930 void 8931 arc_toggle_unalign (void) 8932 { 8933 cfun->machine->unalign ^= 2; 8934 } 8935 8936 /* Operands 0..2 are the operands of a addsi which uses a 12 bit 8937 constant in operand 2, but which would require a LIMM because of 8938 operand mismatch. 8939 operands 3 and 4 are new SET_SRCs for operands 0. */ 8940 8941 void 8942 split_addsi (rtx *operands) 8943 { 8944 int val = INTVAL (operands[2]); 8945 8946 /* Try for two short insns first. Lengths being equal, we prefer 8947 expansions with shorter register lifetimes. */ 8948 if (val > 127 && val <= 255 8949 && satisfies_constraint_Rcq (operands[0])) 8950 { 8951 operands[3] = operands[2]; 8952 operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]); 8953 } 8954 else 8955 { 8956 operands[3] = operands[1]; 8957 operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[2]); 8958 } 8959 } 8960 8961 /* Operands 0..2 are the operands of a subsi which uses a 12 bit 8962 constant in operand 1, but which would require a LIMM because of 8963 operand mismatch. 8964 operands 3 and 4 are new SET_SRCs for operands 0. */ 8965 8966 void 8967 split_subsi (rtx *operands) 8968 { 8969 int val = INTVAL (operands[1]); 8970 8971 /* Try for two short insns first. Lengths being equal, we prefer 8972 expansions with shorter register lifetimes. */ 8973 if (satisfies_constraint_Rcq (operands[0]) 8974 && satisfies_constraint_Rcq (operands[2])) 8975 { 8976 if (val >= -31 && val <= 127) 8977 { 8978 operands[3] = gen_rtx_NEG (SImode, operands[2]); 8979 operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]); 8980 return; 8981 } 8982 else if (val >= 0 && val < 255) 8983 { 8984 operands[3] = operands[1]; 8985 operands[4] = gen_rtx_MINUS (SImode, operands[0], operands[2]); 8986 return; 8987 } 8988 } 8989 /* If the destination is not an ARCompact16 register, we might 8990 still have a chance to make a short insn if the source is; 8991 we need to start with a reg-reg move for this. */ 8992 operands[3] = operands[2]; 8993 operands[4] = gen_rtx_MINUS (SImode, operands[1], operands[0]); 8994 } 8995 8996 /* Handle DOUBLE_REGS uses. 8997 Operand 0: destination register 8998 Operand 1: source register */ 8999 9000 static bool 9001 arc_process_double_reg_moves (rtx *operands) 9002 { 9003 rtx dest = operands[0]; 9004 rtx src = operands[1]; 9005 9006 enum usesDxState { none, srcDx, destDx, maxDx }; 9007 enum usesDxState state = none; 9008 9009 if (refers_to_regno_p (40, 44, src, 0)) 9010 state = srcDx; 9011 if (refers_to_regno_p (40, 44, dest, 0)) 9012 { 9013 /* Via arc_register_move_cost, we should never see D,D moves. */ 9014 gcc_assert (state == none); 9015 state = destDx; 9016 } 9017 9018 if (state == none) 9019 return false; 9020 9021 if (state == srcDx) 9022 { 9023 /* Without the LR insn, we need to split this into a 9024 sequence of insns which will use the DEXCLx and DADDHxy 9025 insns to be able to read the Dx register in question. */ 9026 if (TARGET_DPFP_DISABLE_LRSR) 9027 { 9028 /* gen *movdf_insn_nolrsr */ 9029 rtx set = gen_rtx_SET (dest, src); 9030 rtx use1 = gen_rtx_USE (VOIDmode, const1_rtx); 9031 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, use1))); 9032 } 9033 else 9034 { 9035 /* When we have 'mov D, r' or 'mov D, D' then get the target 9036 register pair for use with LR insn. */ 9037 rtx destHigh = simplify_gen_subreg (SImode, dest, DFmode, 9038 TARGET_BIG_ENDIAN ? 0 : 4); 9039 rtx destLow = simplify_gen_subreg (SImode, dest, DFmode, 9040 TARGET_BIG_ENDIAN ? 4 : 0); 9041 9042 /* Produce the two LR insns to get the high and low parts. */ 9043 emit_insn (gen_rtx_SET (destHigh, 9044 gen_rtx_UNSPEC_VOLATILE (Pmode, 9045 gen_rtvec (1, src), 9046 VUNSPEC_ARC_LR_HIGH))); 9047 emit_insn (gen_rtx_SET (destLow, 9048 gen_rtx_UNSPEC_VOLATILE (Pmode, 9049 gen_rtvec (1, src), 9050 VUNSPEC_ARC_LR))); 9051 } 9052 } 9053 else if (state == destDx) 9054 { 9055 /* When we have 'mov r, D' or 'mov D, D' and we have access to the 9056 LR insn get the target register pair. */ 9057 rtx srcHigh = simplify_gen_subreg (SImode, src, DFmode, 9058 TARGET_BIG_ENDIAN ? 0 : 4); 9059 rtx srcLow = simplify_gen_subreg (SImode, src, DFmode, 9060 TARGET_BIG_ENDIAN ? 4 : 0); 9061 9062 emit_insn (gen_dexcl_2op (dest, srcHigh, srcLow)); 9063 } 9064 else 9065 gcc_unreachable (); 9066 9067 return true; 9068 } 9069 9070 /* operands 0..1 are the operands of a 64 bit move instruction. 9071 split it into two moves with operands 2/3 and 4/5. */ 9072 9073 void 9074 arc_split_move (rtx *operands) 9075 { 9076 machine_mode mode = GET_MODE (operands[0]); 9077 int i; 9078 int swap = 0; 9079 rtx xop[4]; 9080 9081 if (TARGET_DPFP) 9082 { 9083 if (arc_process_double_reg_moves (operands)) 9084 return; 9085 } 9086 9087 if (TARGET_LL64 9088 && ((memory_operand (operands[0], mode) 9089 && even_register_operand (operands[1], mode)) 9090 || (memory_operand (operands[1], mode) 9091 && even_register_operand (operands[0], mode)))) 9092 { 9093 emit_move_insn (operands[0], operands[1]); 9094 return; 9095 } 9096 9097 if (TARGET_PLUS_QMACW 9098 && GET_CODE (operands[1]) == CONST_VECTOR) 9099 { 9100 HOST_WIDE_INT intval0, intval1; 9101 if (GET_MODE (operands[1]) == V2SImode) 9102 { 9103 intval0 = INTVAL (XVECEXP (operands[1], 0, 0)); 9104 intval1 = INTVAL (XVECEXP (operands[1], 0, 1)); 9105 } 9106 else 9107 { 9108 intval1 = INTVAL (XVECEXP (operands[1], 0, 3)) << 16; 9109 intval1 |= INTVAL (XVECEXP (operands[1], 0, 2)) & 0xFFFF; 9110 intval0 = INTVAL (XVECEXP (operands[1], 0, 1)) << 16; 9111 intval0 |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF; 9112 } 9113 xop[0] = gen_rtx_REG (SImode, REGNO (operands[0])); 9114 xop[3] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); 9115 xop[2] = GEN_INT (trunc_int_for_mode (intval0, SImode)); 9116 xop[1] = GEN_INT (trunc_int_for_mode (intval1, SImode)); 9117 emit_move_insn (xop[0], xop[2]); 9118 emit_move_insn (xop[3], xop[1]); 9119 return; 9120 } 9121 9122 for (i = 0; i < 2; i++) 9123 { 9124 if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0))) 9125 { 9126 rtx addr = XEXP (operands[i], 0); 9127 rtx r, o; 9128 enum rtx_code code; 9129 9130 gcc_assert (!reg_overlap_mentioned_p (operands[0], addr)); 9131 switch (GET_CODE (addr)) 9132 { 9133 case PRE_DEC: o = GEN_INT (-8); goto pre_modify; 9134 case PRE_INC: o = GEN_INT (8); goto pre_modify; 9135 case PRE_MODIFY: o = XEXP (XEXP (addr, 1), 1); 9136 pre_modify: 9137 code = PRE_MODIFY; 9138 break; 9139 case POST_DEC: o = GEN_INT (-8); goto post_modify; 9140 case POST_INC: o = GEN_INT (8); goto post_modify; 9141 case POST_MODIFY: o = XEXP (XEXP (addr, 1), 1); 9142 post_modify: 9143 code = POST_MODIFY; 9144 swap = 2; 9145 break; 9146 default: 9147 gcc_unreachable (); 9148 } 9149 r = XEXP (addr, 0); 9150 xop[0+i] = adjust_automodify_address_nv 9151 (operands[i], SImode, 9152 gen_rtx_fmt_ee (code, Pmode, r, 9153 gen_rtx_PLUS (Pmode, r, o)), 9154 0); 9155 xop[2+i] = adjust_automodify_address_nv 9156 (operands[i], SImode, plus_constant (Pmode, r, 4), 4); 9157 } 9158 else 9159 { 9160 xop[0+i] = operand_subword (operands[i], 0, 0, mode); 9161 xop[2+i] = operand_subword (operands[i], 1, 0, mode); 9162 } 9163 } 9164 if (reg_overlap_mentioned_p (xop[0], xop[3])) 9165 { 9166 swap = 2; 9167 gcc_assert (!reg_overlap_mentioned_p (xop[2], xop[1])); 9168 } 9169 9170 emit_move_insn (xop[0 + swap], xop[1 + swap]); 9171 emit_move_insn (xop[2 - swap], xop[3 - swap]); 9172 9173 } 9174 9175 /* Select between the instruction output templates s_tmpl (for short INSNs) 9176 and l_tmpl (for long INSNs). */ 9177 9178 const char * 9179 arc_short_long (rtx_insn *insn, const char *s_tmpl, const char *l_tmpl) 9180 { 9181 int is_short = arc_verify_short (insn, cfun->machine->unalign, -1); 9182 9183 extract_constrain_insn_cached (insn); 9184 return is_short ? s_tmpl : l_tmpl; 9185 } 9186 9187 /* Searches X for any reference to REGNO, returning the rtx of the 9188 reference found if any. Otherwise, returns NULL_RTX. */ 9189 9190 rtx 9191 arc_regno_use_in (unsigned int regno, rtx x) 9192 { 9193 const char *fmt; 9194 int i, j; 9195 rtx tem; 9196 9197 if (REG_P (x) && refers_to_regno_p (regno, x)) 9198 return x; 9199 9200 fmt = GET_RTX_FORMAT (GET_CODE (x)); 9201 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 9202 { 9203 if (fmt[i] == 'e') 9204 { 9205 if ((tem = regno_use_in (regno, XEXP (x, i)))) 9206 return tem; 9207 } 9208 else if (fmt[i] == 'E') 9209 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 9210 if ((tem = regno_use_in (regno , XVECEXP (x, i, j)))) 9211 return tem; 9212 } 9213 9214 return NULL_RTX; 9215 } 9216 9217 /* Return the integer value of the "type" attribute for INSN, or -1 if 9218 INSN can't have attributes. */ 9219 9220 int 9221 arc_attr_type (rtx_insn *insn) 9222 { 9223 if (NONJUMP_INSN_P (insn) 9224 ? (GET_CODE (PATTERN (insn)) == USE 9225 || GET_CODE (PATTERN (insn)) == CLOBBER) 9226 : JUMP_P (insn) 9227 ? (GET_CODE (PATTERN (insn)) == ADDR_VEC 9228 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) 9229 : !CALL_P (insn)) 9230 return -1; 9231 return get_attr_type (insn); 9232 } 9233 9234 /* Return true if insn sets the condition codes. */ 9235 9236 bool 9237 arc_sets_cc_p (rtx_insn *insn) 9238 { 9239 if (NONJUMP_INSN_P (insn)) 9240 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn))) 9241 insn = seq->insn (seq->len () - 1); 9242 return arc_attr_type (insn) == TYPE_COMPARE; 9243 } 9244 9245 /* Return true if INSN is an instruction with a delay slot we may want 9246 to fill. */ 9247 9248 bool 9249 arc_need_delay (rtx_insn *insn) 9250 { 9251 rtx_insn *next; 9252 9253 if (!flag_delayed_branch) 9254 return false; 9255 /* The return at the end of a function needs a delay slot. */ 9256 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE 9257 && (!(next = next_active_insn (insn)) 9258 || ((!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) != SEQUENCE) 9259 && arc_attr_type (next) == TYPE_RETURN)) 9260 && (!TARGET_PAD_RETURN 9261 || (prev_active_insn (insn) 9262 && prev_active_insn (prev_active_insn (insn)) 9263 && prev_active_insn (prev_active_insn (prev_active_insn (insn)))))) 9264 return true; 9265 if (NONJUMP_INSN_P (insn) 9266 ? (GET_CODE (PATTERN (insn)) == USE 9267 || GET_CODE (PATTERN (insn)) == CLOBBER 9268 || GET_CODE (PATTERN (insn)) == SEQUENCE) 9269 : JUMP_P (insn) 9270 ? (GET_CODE (PATTERN (insn)) == ADDR_VEC 9271 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) 9272 : !CALL_P (insn)) 9273 return false; 9274 return num_delay_slots (insn) != 0; 9275 } 9276 9277 /* Return true if the scheduling pass(es) has/have already run, 9278 i.e. where possible, we should try to mitigate high latencies 9279 by different instruction selection. */ 9280 9281 bool 9282 arc_scheduling_not_expected (void) 9283 { 9284 return cfun->machine->arc_reorg_started; 9285 } 9286 9287 /* Oddly enough, sometimes we get a zero overhead loop that branch 9288 shortening doesn't think is a loop - observed with compile/pr24883.c 9289 -O3 -fomit-frame-pointer -funroll-loops. Make sure to include the 9290 alignment visible for branch shortening (we actually align the loop 9291 insn before it, but that is equivalent since the loop insn is 4 byte 9292 long.) */ 9293 9294 int 9295 arc_label_align (rtx_insn *label) 9296 { 9297 int loop_align = LOOP_ALIGN (LABEL); 9298 9299 if (loop_align > align_labels_log) 9300 { 9301 rtx_insn *prev = prev_nonnote_insn (label); 9302 9303 if (prev && NONJUMP_INSN_P (prev) 9304 && GET_CODE (PATTERN (prev)) == PARALLEL 9305 && recog_memoized (prev) == CODE_FOR_doloop_begin_i) 9306 return loop_align; 9307 } 9308 /* Code has a minimum p2 alignment of 1, which we must restore after an 9309 ADDR_DIFF_VEC. */ 9310 if (align_labels_log < 1) 9311 { 9312 rtx_insn *next = next_nonnote_nondebug_insn (label); 9313 if (INSN_P (next) && recog_memoized (next) >= 0) 9314 return 1; 9315 } 9316 return align_labels_log; 9317 } 9318 9319 /* Return true if LABEL is in executable code. */ 9320 9321 bool 9322 arc_text_label (rtx_insn *label) 9323 { 9324 rtx_insn *next; 9325 9326 /* ??? We use deleted labels like they were still there, see 9327 gcc.c-torture/compile/20000326-2.c . */ 9328 gcc_assert (GET_CODE (label) == CODE_LABEL 9329 || (GET_CODE (label) == NOTE 9330 && NOTE_KIND (label) == NOTE_INSN_DELETED_LABEL)); 9331 next = next_nonnote_insn (label); 9332 if (next) 9333 return (!JUMP_TABLE_DATA_P (next) 9334 || GET_CODE (PATTERN (next)) != ADDR_VEC); 9335 else if (!PREV_INSN (label)) 9336 /* ??? sometimes text labels get inserted very late, see 9337 gcc.dg/torture/stackalign/comp-goto-1.c */ 9338 return true; 9339 return false; 9340 } 9341 9342 /* Without this, gcc.dg/tree-prof/bb-reorg.c fails to assemble 9343 when compiling with -O2 -freorder-blocks-and-partition -fprofile-use 9344 -D_PROFILE_USE; delay branch scheduling then follows a crossing jump 9345 to redirect two breqs. */ 9346 9347 static bool 9348 arc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee) 9349 { 9350 /* ??? get_attr_type is declared to take an rtx. */ 9351 union { const rtx_insn *c; rtx_insn *r; } u; 9352 9353 u.c = follower; 9354 if (CROSSING_JUMP_P (followee)) 9355 switch (get_attr_type (u.r)) 9356 { 9357 case TYPE_BRCC: 9358 case TYPE_BRCC_NO_DELAY_SLOT: 9359 return false; 9360 default: 9361 return true; 9362 } 9363 return true; 9364 } 9365 9366 /* Implement EPILOGUE__USES. 9367 Return true if REGNO should be added to the deemed uses of the epilogue. 9368 9369 We use the return address 9370 arc_return_address_regs[arc_compute_function_type (cfun)]. But 9371 also, we have to make sure all the register restore instructions 9372 are known to be live in interrupt functions, plus the blink 9373 register if it is clobbered by the isr. */ 9374 9375 bool 9376 arc_epilogue_uses (int regno) 9377 { 9378 if (regno == arc_tp_regno) 9379 return true; 9380 if (reload_completed) 9381 { 9382 if (ARC_INTERRUPT_P (cfun->machine->fn_type)) 9383 { 9384 if (!fixed_regs[regno]) 9385 return true; 9386 return ((regno == arc_return_address_regs[cfun->machine->fn_type]) 9387 || (regno == RETURN_ADDR_REGNUM)); 9388 } 9389 else 9390 return regno == RETURN_ADDR_REGNUM; 9391 } 9392 else 9393 return regno == arc_return_address_regs[arc_compute_function_type (cfun)]; 9394 } 9395 9396 /* Helper for EH_USES macro. */ 9397 9398 bool 9399 arc_eh_uses (int regno) 9400 { 9401 if (regno == arc_tp_regno) 9402 return true; 9403 return false; 9404 } 9405 9406 #ifndef TARGET_NO_LRA 9407 #define TARGET_NO_LRA !TARGET_LRA 9408 #endif 9409 9410 static bool 9411 arc_lra_p (void) 9412 { 9413 return !TARGET_NO_LRA; 9414 } 9415 9416 /* ??? Should we define TARGET_REGISTER_PRIORITY? We might perfer to use 9417 Rcq registers, because some insn are shorter with them. OTOH we already 9418 have separate alternatives for this purpose, and other insns don't 9419 mind, so maybe we should rather prefer the other registers? 9420 We need more data, and we can only get that if we allow people to 9421 try all options. */ 9422 static int 9423 arc_register_priority (int r) 9424 { 9425 switch (arc_lra_priority_tag) 9426 { 9427 case ARC_LRA_PRIORITY_NONE: 9428 return 0; 9429 case ARC_LRA_PRIORITY_NONCOMPACT: 9430 return ((((r & 7) ^ 4) - 4) & 15) != r; 9431 case ARC_LRA_PRIORITY_COMPACT: 9432 return ((((r & 7) ^ 4) - 4) & 15) == r; 9433 default: 9434 gcc_unreachable (); 9435 } 9436 } 9437 9438 static reg_class_t 9439 arc_spill_class (reg_class_t /* orig_class */, machine_mode) 9440 { 9441 return GENERAL_REGS; 9442 } 9443 9444 bool 9445 arc_legitimize_reload_address (rtx *p, machine_mode mode, int opnum, 9446 int itype) 9447 { 9448 rtx x = *p; 9449 enum reload_type type = (enum reload_type) itype; 9450 9451 if (GET_CODE (x) == PLUS 9452 && CONST_INT_P (XEXP (x, 1)) 9453 && (RTX_OK_FOR_BASE_P (XEXP (x, 0), true) 9454 || (REG_P (XEXP (x, 0)) 9455 && reg_equiv_constant (REGNO (XEXP (x, 0)))))) 9456 { 9457 int scale = GET_MODE_SIZE (mode); 9458 int shift; 9459 rtx index_rtx = XEXP (x, 1); 9460 HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base; 9461 rtx reg, sum, sum2; 9462 9463 if (scale > 4) 9464 scale = 4; 9465 if ((scale-1) & offset) 9466 scale = 1; 9467 shift = scale >> 1; 9468 offset_base 9469 = ((offset + (256 << shift)) 9470 & ((HOST_WIDE_INT)((unsigned HOST_WIDE_INT) -512 << shift))); 9471 /* Sometimes the normal form does not suit DImode. We 9472 could avoid that by using smaller ranges, but that 9473 would give less optimized code when SImode is 9474 prevalent. */ 9475 if (GET_MODE_SIZE (mode) + offset - offset_base <= (256 << shift)) 9476 { 9477 int regno; 9478 9479 reg = XEXP (x, 0); 9480 regno = REGNO (reg); 9481 sum2 = sum = plus_constant (Pmode, reg, offset_base); 9482 9483 if (reg_equiv_constant (regno)) 9484 { 9485 sum2 = plus_constant (Pmode, reg_equiv_constant (regno), 9486 offset_base); 9487 if (GET_CODE (sum2) == PLUS) 9488 sum2 = gen_rtx_CONST (Pmode, sum2); 9489 } 9490 *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base)); 9491 push_reload (sum2, NULL_RTX, &XEXP (*p, 0), NULL, 9492 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, 9493 type); 9494 return true; 9495 } 9496 } 9497 /* We must re-recognize what we created before. */ 9498 else if (GET_CODE (x) == PLUS 9499 && GET_CODE (XEXP (x, 0)) == PLUS 9500 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 9501 && REG_P (XEXP (XEXP (x, 0), 0)) 9502 && CONST_INT_P (XEXP (x, 1))) 9503 { 9504 /* Because this address is so complex, we know it must have 9505 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus, 9506 it is already unshared, and needs no further unsharing. */ 9507 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, 9508 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); 9509 return true; 9510 } 9511 return false; 9512 } 9513 9514 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */ 9515 9516 static bool 9517 arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, 9518 unsigned int align, 9519 enum by_pieces_operation op, 9520 bool speed_p) 9521 { 9522 /* Let the movmem expander handle small block moves. */ 9523 if (op == MOVE_BY_PIECES) 9524 return false; 9525 9526 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p); 9527 } 9528 9529 /* Emit a (pre) memory barrier around an atomic sequence according to 9530 MODEL. */ 9531 9532 static void 9533 arc_pre_atomic_barrier (enum memmodel model) 9534 { 9535 if (need_atomic_barrier_p (model, true)) 9536 emit_insn (gen_memory_barrier ()); 9537 } 9538 9539 /* Emit a (post) memory barrier around an atomic sequence according to 9540 MODEL. */ 9541 9542 static void 9543 arc_post_atomic_barrier (enum memmodel model) 9544 { 9545 if (need_atomic_barrier_p (model, false)) 9546 emit_insn (gen_memory_barrier ()); 9547 } 9548 9549 /* Expand a compare and swap pattern. */ 9550 9551 static void 9552 emit_unlikely_jump (rtx insn) 9553 { 9554 int very_unlikely = REG_BR_PROB_BASE / 100 - 1; 9555 9556 rtx_insn *jump = emit_jump_insn (insn); 9557 add_int_reg_note (jump, REG_BR_PROB, very_unlikely); 9558 } 9559 9560 /* Expand code to perform a 8 or 16-bit compare and swap by doing 9561 32-bit compare and swap on the word containing the byte or 9562 half-word. The difference between a weak and a strong CAS is that 9563 the weak version may simply fail. The strong version relies on two 9564 loops, one checks if the SCOND op is succsfully or not, the other 9565 checks if the 32 bit accessed location which contains the 8 or 16 9566 bit datum is not changed by other thread. The first loop is 9567 implemented by the atomic_compare_and_swapsi_1 pattern. The second 9568 loops is implemented by this routine. */ 9569 9570 static void 9571 arc_expand_compare_and_swap_qh (rtx bool_result, rtx result, rtx mem, 9572 rtx oldval, rtx newval, rtx weak, 9573 rtx mod_s, rtx mod_f) 9574 { 9575 rtx addr1 = force_reg (Pmode, XEXP (mem, 0)); 9576 rtx addr = gen_reg_rtx (Pmode); 9577 rtx off = gen_reg_rtx (SImode); 9578 rtx oldv = gen_reg_rtx (SImode); 9579 rtx newv = gen_reg_rtx (SImode); 9580 rtx oldvalue = gen_reg_rtx (SImode); 9581 rtx newvalue = gen_reg_rtx (SImode); 9582 rtx res = gen_reg_rtx (SImode); 9583 rtx resv = gen_reg_rtx (SImode); 9584 rtx memsi, val, mask, end_label, loop_label, cc, x; 9585 machine_mode mode; 9586 bool is_weak = (weak != const0_rtx); 9587 9588 /* Truncate the address. */ 9589 emit_insn (gen_rtx_SET (addr, 9590 gen_rtx_AND (Pmode, addr1, GEN_INT (-4)))); 9591 9592 /* Compute the datum offset. */ 9593 emit_insn (gen_rtx_SET (off, 9594 gen_rtx_AND (SImode, addr1, GEN_INT (3)))); 9595 if (TARGET_BIG_ENDIAN) 9596 emit_insn (gen_rtx_SET (off, 9597 gen_rtx_MINUS (SImode, 9598 (GET_MODE (mem) == QImode) ? 9599 GEN_INT (3) : GEN_INT (2), off))); 9600 9601 /* Normal read from truncated address. */ 9602 memsi = gen_rtx_MEM (SImode, addr); 9603 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER); 9604 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem); 9605 9606 val = copy_to_reg (memsi); 9607 9608 /* Convert the offset in bits. */ 9609 emit_insn (gen_rtx_SET (off, 9610 gen_rtx_ASHIFT (SImode, off, GEN_INT (3)))); 9611 9612 /* Get the proper mask. */ 9613 if (GET_MODE (mem) == QImode) 9614 mask = force_reg (SImode, GEN_INT (0xff)); 9615 else 9616 mask = force_reg (SImode, GEN_INT (0xffff)); 9617 9618 emit_insn (gen_rtx_SET (mask, 9619 gen_rtx_ASHIFT (SImode, mask, off))); 9620 9621 /* Prepare the old and new values. */ 9622 emit_insn (gen_rtx_SET (val, 9623 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask), 9624 val))); 9625 9626 oldval = gen_lowpart (SImode, oldval); 9627 emit_insn (gen_rtx_SET (oldv, 9628 gen_rtx_ASHIFT (SImode, oldval, off))); 9629 9630 newval = gen_lowpart_common (SImode, newval); 9631 emit_insn (gen_rtx_SET (newv, 9632 gen_rtx_ASHIFT (SImode, newval, off))); 9633 9634 emit_insn (gen_rtx_SET (oldv, 9635 gen_rtx_AND (SImode, oldv, mask))); 9636 9637 emit_insn (gen_rtx_SET (newv, 9638 gen_rtx_AND (SImode, newv, mask))); 9639 9640 if (!is_weak) 9641 { 9642 end_label = gen_label_rtx (); 9643 loop_label = gen_label_rtx (); 9644 emit_label (loop_label); 9645 } 9646 9647 /* Make the old and new values. */ 9648 emit_insn (gen_rtx_SET (oldvalue, 9649 gen_rtx_IOR (SImode, oldv, val))); 9650 9651 emit_insn (gen_rtx_SET (newvalue, 9652 gen_rtx_IOR (SImode, newv, val))); 9653 9654 /* Try an 32bit atomic compare and swap. It clobbers the CC 9655 register. */ 9656 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue, 9657 weak, mod_s, mod_f)); 9658 9659 /* Regardless of the weakness of the operation, a proper boolean 9660 result needs to be provided. */ 9661 x = gen_rtx_REG (CC_Zmode, CC_REG); 9662 x = gen_rtx_EQ (SImode, x, const0_rtx); 9663 emit_insn (gen_rtx_SET (bool_result, x)); 9664 9665 if (!is_weak) 9666 { 9667 /* Check the results: if the atomic op is successfully the goto 9668 to end label. */ 9669 x = gen_rtx_REG (CC_Zmode, CC_REG); 9670 x = gen_rtx_EQ (VOIDmode, x, const0_rtx); 9671 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, 9672 gen_rtx_LABEL_REF (Pmode, end_label), pc_rtx); 9673 emit_jump_insn (gen_rtx_SET (pc_rtx, x)); 9674 9675 /* Wait for the right moment when the accessed 32-bit location 9676 is stable. */ 9677 emit_insn (gen_rtx_SET (resv, 9678 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask), 9679 res))); 9680 mode = SELECT_CC_MODE (NE, resv, val); 9681 cc = gen_rtx_REG (mode, CC_REG); 9682 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, resv, val))); 9683 9684 /* Set the new value of the 32 bit location, proper masked. */ 9685 emit_insn (gen_rtx_SET (val, resv)); 9686 9687 /* Try again if location is unstable. Fall through if only 9688 scond op failed. */ 9689 x = gen_rtx_NE (VOIDmode, cc, const0_rtx); 9690 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, 9691 gen_rtx_LABEL_REF (Pmode, loop_label), pc_rtx); 9692 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); 9693 9694 emit_label (end_label); 9695 } 9696 9697 /* End: proper return the result for the given mode. */ 9698 emit_insn (gen_rtx_SET (res, 9699 gen_rtx_AND (SImode, res, mask))); 9700 9701 emit_insn (gen_rtx_SET (res, 9702 gen_rtx_LSHIFTRT (SImode, res, off))); 9703 9704 emit_move_insn (result, gen_lowpart (GET_MODE (result), res)); 9705 } 9706 9707 /* Helper function used by "atomic_compare_and_swap" expand 9708 pattern. */ 9709 9710 void 9711 arc_expand_compare_and_swap (rtx operands[]) 9712 { 9713 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x; 9714 machine_mode mode; 9715 9716 bval = operands[0]; 9717 rval = operands[1]; 9718 mem = operands[2]; 9719 oldval = operands[3]; 9720 newval = operands[4]; 9721 is_weak = operands[5]; 9722 mod_s = operands[6]; 9723 mod_f = operands[7]; 9724 mode = GET_MODE (mem); 9725 9726 if (reg_overlap_mentioned_p (rval, oldval)) 9727 oldval = copy_to_reg (oldval); 9728 9729 if (mode == SImode) 9730 { 9731 emit_insn (gen_atomic_compare_and_swapsi_1 (rval, mem, oldval, newval, 9732 is_weak, mod_s, mod_f)); 9733 x = gen_rtx_REG (CC_Zmode, CC_REG); 9734 x = gen_rtx_EQ (SImode, x, const0_rtx); 9735 emit_insn (gen_rtx_SET (bval, x)); 9736 } 9737 else 9738 { 9739 arc_expand_compare_and_swap_qh (bval, rval, mem, oldval, newval, 9740 is_weak, mod_s, mod_f); 9741 } 9742 } 9743 9744 /* Helper function used by the "atomic_compare_and_swapsi_1" 9745 pattern. */ 9746 9747 void 9748 arc_split_compare_and_swap (rtx operands[]) 9749 { 9750 rtx rval, mem, oldval, newval; 9751 machine_mode mode; 9752 enum memmodel mod_s, mod_f; 9753 bool is_weak; 9754 rtx label1, label2, x, cond; 9755 9756 rval = operands[0]; 9757 mem = operands[1]; 9758 oldval = operands[2]; 9759 newval = operands[3]; 9760 is_weak = (operands[4] != const0_rtx); 9761 mod_s = (enum memmodel) INTVAL (operands[5]); 9762 mod_f = (enum memmodel) INTVAL (operands[6]); 9763 mode = GET_MODE (mem); 9764 9765 /* ARC atomic ops work only with 32-bit aligned memories. */ 9766 gcc_assert (mode == SImode); 9767 9768 arc_pre_atomic_barrier (mod_s); 9769 9770 label1 = NULL_RTX; 9771 if (!is_weak) 9772 { 9773 label1 = gen_label_rtx (); 9774 emit_label (label1); 9775 } 9776 label2 = gen_label_rtx (); 9777 9778 /* Load exclusive. */ 9779 emit_insn (gen_arc_load_exclusivesi (rval, mem)); 9780 9781 /* Check if it is oldval. */ 9782 mode = SELECT_CC_MODE (NE, rval, oldval); 9783 cond = gen_rtx_REG (mode, CC_REG); 9784 emit_insn (gen_rtx_SET (cond, gen_rtx_COMPARE (mode, rval, oldval))); 9785 9786 x = gen_rtx_NE (VOIDmode, cond, const0_rtx); 9787 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, 9788 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); 9789 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); 9790 9791 /* Exclusively store new item. Store clobbers CC reg. */ 9792 emit_insn (gen_arc_store_exclusivesi (mem, newval)); 9793 9794 if (!is_weak) 9795 { 9796 /* Check the result of the store. */ 9797 cond = gen_rtx_REG (CC_Zmode, CC_REG); 9798 x = gen_rtx_NE (VOIDmode, cond, const0_rtx); 9799 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, 9800 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx); 9801 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); 9802 } 9803 9804 if (mod_f != MEMMODEL_RELAXED) 9805 emit_label (label2); 9806 9807 arc_post_atomic_barrier (mod_s); 9808 9809 if (mod_f == MEMMODEL_RELAXED) 9810 emit_label (label2); 9811 } 9812 9813 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation 9814 to perform. MEM is the memory on which to operate. VAL is the second 9815 operand of the binary operator. BEFORE and AFTER are optional locations to 9816 return the value of MEM either before of after the operation. MODEL_RTX 9817 is a CONST_INT containing the memory model to use. */ 9818 9819 void 9820 arc_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, 9821 rtx orig_before, rtx orig_after, rtx model_rtx) 9822 { 9823 enum memmodel model = (enum memmodel) INTVAL (model_rtx); 9824 machine_mode mode = GET_MODE (mem); 9825 rtx label, x, cond; 9826 rtx before = orig_before, after = orig_after; 9827 9828 /* ARC atomic ops work only with 32-bit aligned memories. */ 9829 gcc_assert (mode == SImode); 9830 9831 arc_pre_atomic_barrier (model); 9832 9833 label = gen_label_rtx (); 9834 emit_label (label); 9835 label = gen_rtx_LABEL_REF (VOIDmode, label); 9836 9837 if (before == NULL_RTX) 9838 before = gen_reg_rtx (mode); 9839 9840 if (after == NULL_RTX) 9841 after = gen_reg_rtx (mode); 9842 9843 /* Load exclusive. */ 9844 emit_insn (gen_arc_load_exclusivesi (before, mem)); 9845 9846 switch (code) 9847 { 9848 case NOT: 9849 x = gen_rtx_AND (mode, before, val); 9850 emit_insn (gen_rtx_SET (after, x)); 9851 x = gen_rtx_NOT (mode, after); 9852 emit_insn (gen_rtx_SET (after, x)); 9853 break; 9854 9855 case MINUS: 9856 if (CONST_INT_P (val)) 9857 { 9858 val = GEN_INT (-INTVAL (val)); 9859 code = PLUS; 9860 } 9861 9862 /* FALLTHRU. */ 9863 default: 9864 x = gen_rtx_fmt_ee (code, mode, before, val); 9865 emit_insn (gen_rtx_SET (after, x)); 9866 break; 9867 } 9868 9869 /* Exclusively store new item. Store clobbers CC reg. */ 9870 emit_insn (gen_arc_store_exclusivesi (mem, after)); 9871 9872 /* Check the result of the store. */ 9873 cond = gen_rtx_REG (CC_Zmode, CC_REG); 9874 x = gen_rtx_NE (VOIDmode, cond, const0_rtx); 9875 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, 9876 label, pc_rtx); 9877 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); 9878 9879 arc_post_atomic_barrier (model); 9880 } 9881 9882 /* Implement TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P. */ 9883 9884 static bool 9885 arc_no_speculation_in_delay_slots_p () 9886 { 9887 return true; 9888 } 9889 9890 /* Return a parallel of registers to represent where to find the 9891 register pieces if required, otherwise NULL_RTX. */ 9892 9893 static rtx 9894 arc_dwarf_register_span (rtx rtl) 9895 { 9896 machine_mode mode = GET_MODE (rtl); 9897 unsigned regno; 9898 rtx p; 9899 9900 if (GET_MODE_SIZE (mode) != 8) 9901 return NULL_RTX; 9902 9903 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); 9904 regno = REGNO (rtl); 9905 XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno); 9906 XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1); 9907 9908 return p; 9909 } 9910 9911 /* Return true if OP is an acceptable memory operand for ARCompact 9912 16-bit load instructions of MODE. 9913 9914 AV2SHORT: TRUE if address needs to fit into the new ARCv2 short 9915 non scaled instructions. 9916 9917 SCALED: TRUE if address can be scaled. */ 9918 9919 bool 9920 compact_memory_operand_p (rtx op, machine_mode mode, 9921 bool av2short, bool scaled) 9922 { 9923 rtx addr, plus0, plus1; 9924 int size, off; 9925 9926 /* Eliminate non-memory operations. */ 9927 if (GET_CODE (op) != MEM) 9928 return 0; 9929 9930 /* .di instructions have no 16-bit form. */ 9931 if (MEM_VOLATILE_P (op) && !TARGET_VOLATILE_CACHE_SET) 9932 return false; 9933 9934 if (mode == VOIDmode) 9935 mode = GET_MODE (op); 9936 9937 size = GET_MODE_SIZE (mode); 9938 9939 /* dword operations really put out 2 instructions, so eliminate 9940 them. */ 9941 if (size > UNITS_PER_WORD) 9942 return false; 9943 9944 /* Decode the address now. */ 9945 addr = XEXP (op, 0); 9946 switch (GET_CODE (addr)) 9947 { 9948 case REG: 9949 return (REGNO (addr) >= FIRST_PSEUDO_REGISTER 9950 || COMPACT_GP_REG_P (REGNO (addr)) 9951 || (SP_REG_P (REGNO (addr)) && (size != 2))); 9952 case PLUS: 9953 plus0 = XEXP (addr, 0); 9954 plus1 = XEXP (addr, 1); 9955 9956 if ((GET_CODE (plus0) == REG) 9957 && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER) 9958 || COMPACT_GP_REG_P (REGNO (plus0))) 9959 && ((GET_CODE (plus1) == REG) 9960 && ((REGNO (plus1) >= FIRST_PSEUDO_REGISTER) 9961 || COMPACT_GP_REG_P (REGNO (plus1))))) 9962 { 9963 return !av2short; 9964 } 9965 9966 if ((GET_CODE (plus0) == REG) 9967 && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER) 9968 || (COMPACT_GP_REG_P (REGNO (plus0)) && !av2short) 9969 || (IN_RANGE (REGNO (plus0), 0, 31) && av2short)) 9970 && (GET_CODE (plus1) == CONST_INT)) 9971 { 9972 bool valid = false; 9973 9974 off = INTVAL (plus1); 9975 9976 /* Negative offset is not supported in 16-bit load/store insns. */ 9977 if (off < 0) 9978 return 0; 9979 9980 /* Only u5 immediates allowed in code density instructions. */ 9981 if (av2short) 9982 { 9983 switch (size) 9984 { 9985 case 1: 9986 return false; 9987 case 2: 9988 /* This is an ldh_s.x instruction, check the u6 9989 immediate. */ 9990 if (COMPACT_GP_REG_P (REGNO (plus0))) 9991 valid = true; 9992 break; 9993 case 4: 9994 /* Only u5 immediates allowed in 32bit access code 9995 density instructions. */ 9996 if (REGNO (plus0) <= 31) 9997 return ((off < 32) && (off % 4 == 0)); 9998 break; 9999 default: 10000 return false; 10001 } 10002 } 10003 else 10004 if (COMPACT_GP_REG_P (REGNO (plus0))) 10005 valid = true; 10006 10007 if (valid) 10008 { 10009 10010 switch (size) 10011 { 10012 case 1: 10013 return (off < 32); 10014 case 2: 10015 /* The 6-bit constant get shifted to fit the real 10016 5-bits field. Check also for the alignment. */ 10017 return ((off < 64) && (off % 2 == 0)); 10018 case 4: 10019 return ((off < 128) && (off % 4 == 0)); 10020 default: 10021 return false; 10022 } 10023 } 10024 } 10025 10026 if (REG_P (plus0) && CONST_INT_P (plus1) 10027 && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER) 10028 || SP_REG_P (REGNO (plus0))) 10029 && !av2short) 10030 { 10031 off = INTVAL (plus1); 10032 return ((size != 2) && (off >= 0 && off < 128) && (off % 4 == 0)); 10033 } 10034 10035 if ((GET_CODE (plus0) == MULT) 10036 && (GET_CODE (XEXP (plus0, 0)) == REG) 10037 && ((REGNO (XEXP (plus0, 0)) >= FIRST_PSEUDO_REGISTER) 10038 || COMPACT_GP_REG_P (REGNO (XEXP (plus0, 0)))) 10039 && (GET_CODE (plus1) == REG) 10040 && ((REGNO (plus1) >= FIRST_PSEUDO_REGISTER) 10041 || COMPACT_GP_REG_P (REGNO (plus1)))) 10042 return scaled; 10043 default: 10044 break ; 10045 /* TODO: 'gp' and 'pcl' are to supported as base address operand 10046 for 16-bit load instructions. */ 10047 } 10048 return false; 10049 } 10050 10051 struct gcc_target targetm = TARGET_INITIALIZER; 10052 10053 #include "gt-arc.h" 10054