1;; Copyright (C) 2016-2021 Free Software Foundation, Inc. 2 3;; This file is free software; you can redistribute it and/or modify it under 4;; the terms of the GNU General Public License as published by the Free 5;; Software Foundation; either version 3 of the License, or (at your option) 6;; any later version. 7 8;; This file is distributed in the hope that it will be useful, but WITHOUT 9;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11;; for more details. 12 13;; You should have received a copy of the GNU General Public License 14;; along with GCC; see the file COPYING3. If not see 15;; <http://www.gnu.org/licenses/>. 16 17;;- See file "rtl.def" for documentation on define_insn, match_*, et. al. 18 19(include "predicates.md") 20(include "constraints.md") 21 22;; {{{ Constants and enums 23 24; Named registers 25(define_constants 26 [(FIRST_SGPR_REG 0) 27 (CC_SAVE_REG 22) 28 (LAST_SGPR_REG 101) 29 (FLAT_SCRATCH_REG 102) 30 (FLAT_SCRATCH_LO_REG 102) 31 (FLAT_SCRATCH_HI_REG 103) 32 (XNACK_MASK_REG 104) 33 (XNACK_MASK_LO_REG 104) 34 (XNACK_MASK_HI_REG 105) 35 (VCC_REG 106) 36 (VCC_LO_REG 106) 37 (VCC_HI_REG 107) 38 (VCCZ_REG 108) 39 (TBA_REG 109) 40 (TBA_LO_REG 109) 41 (TBA_HI_REG 110) 42 (TMA_REG 111) 43 (TMA_LO_REG 111) 44 (TMA_HI_REG 112) 45 (TTMP0_REG 113) 46 (TTMP11_REG 124) 47 (M0_REG 125) 48 (EXEC_REG 126) 49 (EXEC_LO_REG 126) 50 (EXEC_HI_REG 127) 51 (EXECZ_REG 128) 52 (SCC_REG 129) 53 (FIRST_VGPR_REG 160) 54 (LAST_VGPR_REG 415)]) 55 56(define_constants 57 [(SP_REGNUM 16) 58 (LR_REGNUM 18) 59 (AP_REGNUM 416) 60 (FP_REGNUM 418)]) 61 62(define_c_enum "unspecv" [ 63 UNSPECV_PROLOGUE_USE 64 UNSPECV_KERNEL_RETURN 65 UNSPECV_BARRIER 66 UNSPECV_ATOMIC 67 UNSPECV_ICACHE_INV]) 68 69(define_c_enum "unspec" [ 70 UNSPEC_ADDPTR 71 UNSPEC_VECTOR 72 UNSPEC_BPERMUTE 73 UNSPEC_SGPRBASE 74 UNSPEC_MEMORY_BARRIER 75 UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR 76 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR 77 UNSPEC_PLUS_DPP_SHR 78 UNSPEC_PLUS_CARRY_DPP_SHR UNSPEC_PLUS_CARRY_IN_DPP_SHR 79 UNSPEC_AND_DPP_SHR UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR 80 UNSPEC_MOV_DPP_SHR 81 UNSPEC_MOV_FROM_LANE63 82 UNSPEC_GATHER 83 UNSPEC_SCATTER 84 UNSPEC_RCP]) 85 86;; }}} 87;; {{{ Attributes 88 89; Instruction type (encoding) as described in the ISA specification. 90; The following table summarizes possible operands of individual instruction 91; types and corresponding constraints. 92; 93; sop2 - scalar, two inputs, one output 94; ssrc0/ssrc1: sgpr 0-102; flat_scratch,xnack,vcc,tba,tma,ttmp0-11,exec 95; vccz,execz,scc,inline immedate,fp inline immediate 96; sdst: sgpr 0-102; flat_scratch,xnack,vcc,tba,tma,ttmp0-11,exec 97; 98; Constraints "=SD, SD", "SSA,SSB","SSB,SSA" 99; 100; sopk - scalar, inline constant input, one output 101; simm16: 16bit inline constant 102; sdst: same as sop2/ssrc0 103; 104; Constraints "=SD", "J" 105; 106; sop1 - scalar, one input, one output 107; ssrc0: same as sop2/ssrc0. FIXME: manual omit VCCZ 108; sdst: same as sop2/sdst 109; 110; Constraints "=SD", "SSA" 111; 112; sopc - scalar, two inputs, one comparsion 113; ssrc0: same as sop2/ssc0. 114; 115; Constraints "SSI,SSA","SSA,SSI" 116; 117; sopp - scalar, one constant input, one special 118; simm16 119; 120; smem - scalar memory 121; sbase: aligned pair of sgprs. Specify {size[15:0], base[47:0]} in 122; dwords 123; sdata: sgpr0-102, flat_scratch, xnack, vcc, tba, tma 124; offset: sgpr or 20bit unsigned byte offset 125; 126; vop2 - vector, two inputs, one output 127; vsrc0: sgpr0-102,flat_scratch,xnack,vcc,tba,ttmp0-11,m0,exec, 128; inline constant -16 to -64, fp inline immediate, vccz, execz, 129; scc, lds, literal constant, vgpr0-255 130; vsrc1: vgpr0-255 131; vdst: vgpr0-255 132; Limitations: At most one SGPR, at most one constant 133; if constant is used, SGPR must be M0 134; Only SRC0 can be LDS_DIRECT 135; 136; constraints: "=v", "vBSv", "v" 137; 138; vop1 - vector, one input, one output 139; vsrc0: same as vop2/src0 140; vdst: vgpr0-255 141; 142; constraints: "=v", "vBSv" 143; 144; vopc - vector, two inputs, one comparsion output; 145; vsrc0: same as vop2/src0 146; vsrc1: vgpr0-255 147; vdst: 148; 149; constraints: "vASv", "v" 150; 151; vop3a - vector, three inputs, one output 152; vdst: vgpr0-255, for v_cmp sgpr or vcc 153; abs,clamp 154; vsrc0: sgpr0-102,vcc,tba,ttmp0-11,m0,exec, 155; inline constant -16 to -64, fp inline immediate, vccz, execz, 156; scc, lds_direct 157; FIXME: really missing 1/pi? really 104 SGPRs 158; 159; vop3b - vector, three inputs, one vector output, one scalar output 160; vsrc0,vsrc1,vsrc2: same as vop3a vsrc0 161; vdst: vgpr0-255 162; sdst: sgpr0-103/vcc/tba/tma/ttmp0-11 163; 164; vop_sdwa - second dword for vop1/vop2/vopc for specifying sub-dword address 165; src0: vgpr0-255 166; dst_sel: BYTE_0-3, WORD_0-1, DWORD 167; dst_unused: UNUSED_PAD, UNUSED_SEXT, UNUSED_PRESERVE 168; clamp: true/false 169; src0_sel: BYTE_0-3, WORD_0-1, DWORD 170; flags: src0_sext, src0_neg, src0_abs, src1_sel, src1_sext, src1_neg, 171 ; src1_abs 172; 173; vop_dpp - second dword for vop1/vop2/vopc for specifying data-parallel ops 174; src0: vgpr0-255 175; dpp_ctrl: quad_perm, row_sl0-15, row_sr0-15, row_rr0-15, wf_sl1, 176; wf_rl1, wf_sr1, wf_rr1, row_mirror, row_half_mirror, 177; bcast15, bcast31 178; flags: src0_neg, src0_abs, src1_neg, src1_abs 179; bank_mask: 4-bit mask 180; row_mask: 4-bit mask 181; 182; ds - Local and global data share instructions. 183; offset0: 8-bit constant 184; offset1: 8-bit constant 185; flag: gds 186; addr: vgpr0-255 187; data0: vgpr0-255 188; data1: vgpr0-255 189; vdst: vgpr0-255 190; 191; mubuf - Untyped memory buffer operation. First word with LDS, second word 192; non-LDS. 193; offset: 12-bit constant 194; vaddr: vgpr0-255 195; vdata: vgpr0-255 196; srsrc: sgpr0-102 197; soffset: sgpr0-102 198; flags: offen, idxen, glc, lds, slc, tfe 199; 200; mtbuf - Typed memory buffer operation. Two words 201; offset: 12-bit constant 202; dfmt: 4-bit constant 203; nfmt: 3-bit constant 204; vaddr: vgpr0-255 205; vdata: vgpr0-255 206; srsrc: sgpr0-102 207; soffset: sgpr0-102 208; flags: offen, idxen, glc, lds, slc, tfe 209; 210; flat - flat or global memory operations 211; flags: glc, slc 212; addr: vgpr0-255 213; data: vgpr0-255 214; vdst: vgpr0-255 215; 216; mult - expands to multiple instructions (pseudo encoding) 217; 218; vmult - as mult, when a vector instruction is used. 219 220(define_attr "type" 221 "unknown,sop1,sop2,sopk,sopc,sopp,smem,ds,vop2,vop1,vopc, 222 vop3a,vop3b,vop_sdwa,vop_dpp,mubuf,mtbuf,flat,mult,vmult" 223 (const_string "unknown")) 224 225; Set if instruction is executed in scalar or vector unit 226 227(define_attr "unit" "unknown,scalar,vector" 228 (cond [(eq_attr "type" "sop1,sop2,sopk,sopc,sopp,smem,mult") 229 (const_string "scalar") 230 (eq_attr "type" "vop2,vop1,vopc,vop3a,vop3b,ds, 231 vop_sdwa,vop_dpp,flat,vmult") 232 (const_string "vector")] 233 (const_string "unknown"))) 234 235; All vector instructions run as 64 threads as predicated by the EXEC 236; register. Scalar operations in vector register require a single lane 237; enabled, vector moves require a full set of lanes enabled, and most vector 238; operations handle the lane masking themselves. 239; The md_reorg pass is responsible for ensuring that EXEC is set appropriately 240; according to the following settings: 241; auto - md_reorg will inspect def/use to determine what to do. 242; none - exec is not needed. 243; single - disable all but lane zero. 244; full - enable all lanes. 245 246(define_attr "exec" "auto,none,single,full" 247 (const_string "auto")) 248 249; Infer the (worst-case) length from the instruction type by default. Many 250; types can have an optional immediate word following, which we include here. 251; "Multiple" types are counted as two 64-bit instructions. This is just a 252; default fallback: it can be overridden per-alternative in insn patterns for 253; greater accuracy. 254 255(define_attr "length" "" 256 (cond [(eq_attr "type" "sop1") (const_int 8) 257 (eq_attr "type" "sop2") (const_int 8) 258 (eq_attr "type" "sopk") (const_int 8) 259 (eq_attr "type" "sopc") (const_int 8) 260 (eq_attr "type" "sopp") (const_int 4) 261 (eq_attr "type" "smem") (const_int 8) 262 (eq_attr "type" "ds") (const_int 8) 263 (eq_attr "type" "vop1") (const_int 8) 264 (eq_attr "type" "vop2") (const_int 8) 265 (eq_attr "type" "vopc") (const_int 8) 266 (eq_attr "type" "vop3a") (const_int 8) 267 (eq_attr "type" "vop3b") (const_int 8) 268 (eq_attr "type" "vop_sdwa") (const_int 8) 269 (eq_attr "type" "vop_dpp") (const_int 8) 270 (eq_attr "type" "flat") (const_int 8) 271 (eq_attr "type" "mult") (const_int 16) 272 (eq_attr "type" "vmult") (const_int 16)] 273 (const_int 4))) 274 275; Disable alternatives that only apply to specific ISA variants. 276 277(define_attr "gcn_version" "gcn3,gcn5" (const_string "gcn3")) 278 279(define_attr "enabled" "" 280 (cond [(eq_attr "gcn_version" "gcn3") (const_int 1) 281 (and (eq_attr "gcn_version" "gcn5") 282 (ne (symbol_ref "TARGET_GCN5_PLUS") (const_int 0))) 283 (const_int 1)] 284 (const_int 0))) 285 286; We need to be able to identify v_readlane and v_writelane with 287; SGPR lane selection in order to handle "Manually Inserted Wait States". 288 289(define_attr "laneselect" "yes,no" (const_string "no")) 290 291; Identify instructions that require a "Manually Inserted Wait State" if 292; their inputs are overwritten by subsequent instructions. 293 294(define_attr "delayeduse" "yes,no" (const_string "no")) 295 296;; }}} 297;; {{{ Iterators useful across the wole machine description 298 299(define_mode_iterator SIDI [SI DI]) 300(define_mode_iterator SFDF [SF DF]) 301(define_mode_iterator SISF [SI SF]) 302(define_mode_iterator QIHI [QI HI]) 303(define_mode_iterator DIDF [DI DF]) 304(define_mode_iterator FP [HF SF DF]) 305(define_mode_iterator FP_1REG [HF SF]) 306 307;; }}} 308;; {{{ Attributes. 309 310; Translate RTX code into GCN instruction mnemonics with and without 311; suffixes such as _b32, etc. 312 313(define_code_attr mnemonic 314 [(minus "sub%i") 315 (plus "add%i") 316 (ashift "lshl%b") 317 (lshiftrt "lshr%b") 318 (ashiftrt "ashr%i") 319 (and "and%B") 320 (ior "or%B") 321 (xor "xor%B") 322 (mult "mul%i") 323 (smin "min%i") 324 (smax "max%i") 325 (umin "min%u") 326 (umax "max%u") 327 (not "not%B") 328 (popcount "bcnt_u32%b")]) 329 330(define_code_attr bare_mnemonic 331 [(plus "add") 332 (minus "sub") 333 (and "and") 334 (ior "or") 335 (xor "xor")]) 336 337(define_code_attr s_mnemonic 338 [(not "not%b") 339 (popcount "bcnt1_i32%b") 340 (clz "flbit_i32%b") 341 (ctz "ff1_i32%b")]) 342 343(define_code_attr revmnemonic 344 [(minus "subrev%i") 345 (ashift "lshlrev%b") 346 (lshiftrt "lshrrev%b") 347 (ashiftrt "ashrrev%i")]) 348 349; Translate RTX code into corresponding expander name. 350 351(define_code_attr expander 352 [(and "and") 353 (ior "ior") 354 (xor "xor") 355 (plus "add") 356 (minus "sub") 357 (ashift "ashl") 358 (lshiftrt "lshr") 359 (ashiftrt "ashr") 360 (mult "mul") 361 (smin "smin") 362 (smax "smax") 363 (umin "umin") 364 (umax "umax") 365 (not "one_cmpl") 366 (popcount "popcount") 367 (clz "clz") 368 (ctz "ctz") 369 (sign_extend "extend") 370 (zero_extend "zero_extend")]) 371 372;; }}} 373;; {{{ Miscellaneous instructions 374 375(define_insn "nop" 376 [(const_int 0)] 377 "" 378 "s_nop\t0x0" 379 [(set_attr "type" "sopp")]) 380 381; FIXME: What should the value of the immediate be? Zero is disallowed, so 382; pick 1 for now. 383(define_insn "trap" 384 [(trap_if (const_int 1) (const_int 0))] 385 "" 386 "s_trap\t1" 387 [(set_attr "type" "sopp")]) 388 389;; }}} 390;; {{{ Moves 391 392;; All scalar modes we support moves in. 393(define_mode_iterator MOV_MODE [BI QI HI SI DI TI SF DF]) 394 395; This is the entry point for creating all kinds of scalar moves, 396; including reloads and symbols. 397 398(define_expand "mov<mode>" 399 [(set (match_operand:MOV_MODE 0 "nonimmediate_operand") 400 (match_operand:MOV_MODE 1 "general_operand"))] 401 "" 402 { 403 if (SUBREG_P (operands[1]) 404 && GET_MODE (operands[1]) == SImode 405 && GET_MODE (SUBREG_REG (operands[1])) == BImode) 406 { 407 /* (reg:BI VCC) has nregs==2 to ensure it gets clobbered as a whole, 408 but (subreg:SI (reg:BI VCC)) doesn't, which causes the LRA liveness 409 checks to assert. Transform this: 410 (set (reg:SI) (subreg:SI (reg:BI))) 411 to this: 412 (set (subreg:BI (reg:SI)) (reg:BI)) */ 413 operands[0] = gen_rtx_SUBREG (BImode, operands[0], 0); 414 operands[1] = SUBREG_REG (operands[1]); 415 } 416 if (SUBREG_P (operands[0]) 417 && GET_MODE (operands[0]) == SImode 418 && GET_MODE (SUBREG_REG (operands[0])) == BImode) 419 { 420 /* Likewise, transform this: 421 (set (subreg:SI (reg:BI)) (reg:SI)) 422 to this: 423 (set (reg:BI) (subreg:BI (reg:SI))) */ 424 operands[0] = SUBREG_REG (operands[0]); 425 operands[1] = gen_rtx_SUBREG (BImode, operands[1], 0); 426 } 427 428 if (MEM_P (operands[0])) 429 operands[1] = force_reg (<MODE>mode, operands[1]); 430 431 if (!lra_in_progress && !reload_completed 432 && !gcn_valid_move_p (<MODE>mode, operands[0], operands[1])) 433 { 434 /* Something is probably trying to generate a move 435 which can only work indirectly. 436 E.g. Move from LDS memory to SGPR hardreg 437 or MEM:QI to SGPR. */ 438 rtx tmpreg = gen_reg_rtx (<MODE>mode); 439 emit_insn (gen_mov<mode> (tmpreg, operands[1])); 440 emit_insn (gen_mov<mode> (operands[0], tmpreg)); 441 DONE; 442 } 443 444 if (<MODE>mode == DImode 445 && (GET_CODE (operands[1]) == SYMBOL_REF 446 || GET_CODE (operands[1]) == LABEL_REF)) 447 { 448 if (lra_in_progress) 449 emit_insn (gen_movdi_symbol_save_scc (operands[0], operands[1])); 450 else 451 emit_insn (gen_movdi_symbol (operands[0], operands[1])); 452 DONE; 453 } 454 }) 455 456; Split invalid moves into two valid moves 457 458(define_split 459 [(set (match_operand:MOV_MODE 0 "nonimmediate_operand") 460 (match_operand:MOV_MODE 1 "general_operand"))] 461 "!reload_completed && !lra_in_progress 462 && !gcn_valid_move_p (<MODE>mode, operands[0], operands[1])" 463 [(set (match_dup 2) (match_dup 1)) 464 (set (match_dup 0) (match_dup 2))] 465 { 466 operands[2] = gen_reg_rtx(<MODE>mode); 467 }) 468 469; We need BImode move so we can reload flags registers. 470 471(define_insn "*movbi" 472 [(set (match_operand:BI 0 "nonimmediate_operand" 473 "=Sg, v,Sg,cs,cV,cV,Sm,RS, v,RF, v,RM") 474 (match_operand:BI 1 "gcn_load_operand" 475 "SSA,vSvA, v,SS, v,SS,RS,Sm,RF, v,RM, v"))] 476 "" 477 { 478 /* SCC as an operand is currently not accepted by the LLVM assembler, so 479 we emit bytes directly as a workaround. */ 480 switch (which_alternative) { 481 case 0: 482 if (REG_P (operands[1]) && REGNO (operands[1]) == SCC_REG) 483 return "; s_mov_b32\t%0,%1 is not supported by the assembler.\;" 484 ".byte\t0xfd\;" 485 ".byte\t0x0\;" 486 ".byte\t0x80|%R0\;" 487 ".byte\t0xbe"; 488 else 489 return "s_mov_b32\t%0, %1"; 490 case 1: 491 if (REG_P (operands[1]) && REGNO (operands[1]) == SCC_REG) 492 return "; v_mov_b32\t%0, %1\;" 493 ".byte\t0xfd\;" 494 ".byte\t0x2\;" 495 ".byte\t((%V0<<1)&0xff)\;" 496 ".byte\t0x7e|(%V0>>7)"; 497 else 498 return "v_mov_b32\t%0, %1"; 499 case 2: 500 return "v_readlane_b32\t%0, %1, 0"; 501 case 3: 502 return "s_cmpk_lg_u32\t%1, 0"; 503 case 4: 504 return "v_cmp_ne_u32\tvcc, 0, %1"; 505 case 5: 506 if (REGNO (operands[1]) == SCC_REG) 507 return "; s_mov_b32\t%0, %1 is not supported by the assembler.\;" 508 ".byte\t0xfd\;" 509 ".byte\t0x0\;" 510 ".byte\t0xea\;" 511 ".byte\t0xbe\;" 512 "s_mov_b32\tvcc_hi, 0"; 513 else 514 return "s_mov_b32\tvcc_lo, %1\;" 515 "s_mov_b32\tvcc_hi, 0"; 516 case 6: 517 return "s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)"; 518 case 7: 519 return "s_store_dword\t%1, %A0"; 520 case 8: 521 return "flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0"; 522 case 9: 523 return "flat_store_dword\t%A0, %1%O0%g0"; 524 case 10: 525 return "global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)"; 526 case 11: 527 return "global_store_dword\t%A0, %1%O0%g0"; 528 default: 529 gcc_unreachable (); 530 } 531 } 532 [(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,flat,flat, 533 flat,flat") 534 (set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*") 535 (set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12")]) 536 537; 32bit move pattern 538 539(define_insn "*mov<mode>_insn" 540 [(set (match_operand:SISF 0 "nonimmediate_operand" 541 "=SD,SD,SD,SD,RB,Sm,RS,v,Sg, v, v,RF,v,RLRG, v,SD, v,RM") 542 (match_operand:SISF 1 "gcn_load_operand" 543 "SSA, J, B,RB,Sm,RS,Sm,v, v,Sv,RF, v,B, v,RLRG, Y,RM, v"))] 544 "" 545 "@ 546 s_mov_b32\t%0, %1 547 s_movk_i32\t%0, %1 548 s_mov_b32\t%0, %1 549 s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0) 550 s_buffer_store%s1\t%1, s[0:3], %0 551 s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0) 552 s_store_dword\t%1, %A0 553 v_mov_b32\t%0, %1 554 v_readlane_b32\t%0, %1, 0 555 v_writelane_b32\t%0, %1, 0 556 flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0 557 flat_store_dword\t%A0, %1%O0%g0 558 v_mov_b32\t%0, %1 559 ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) 560 ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) 561 s_mov_b32\t%0, %1 562 global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) 563 global_store_dword\t%A0, %1%O0%g0" 564 [(set_attr "type" "sop1,sopk,sop1,smem,smem,smem,smem,vop1,vop3a,vop3a,flat, 565 flat,vop1,ds,ds,sop1,flat,flat") 566 (set_attr "exec" "*,*,*,*,*,*,*,*,none,none,*,*,*,*,*,*,*,*") 567 (set_attr "length" "4,4,8,12,12,12,12,4,8,8,12,12,8,12,12,8,12,12")]) 568 569; 8/16bit move pattern 570 571(define_insn "*mov<mode>_insn" 572 [(set (match_operand:QIHI 0 "nonimmediate_operand" 573 "=SD,SD,SD,v,Sg, v, v,RF,v,RLRG, v, v,RM") 574 (match_operand:QIHI 1 "gcn_load_operand" 575 "SSA, J, B,v, v,Sv,RF, v,B, v,RLRG,RM, v"))] 576 "gcn_valid_move_p (<MODE>mode, operands[0], operands[1])" 577 "@ 578 s_mov_b32\t%0, %1 579 s_movk_i32\t%0, %1 580 s_mov_b32\t%0, %1 581 v_mov_b32\t%0, %1 582 v_readlane_b32\t%0, %1, 0 583 v_writelane_b32\t%0, %1, 0 584 flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0 585 flat_store%s0\t%A0, %1%O0%g0 586 v_mov_b32\t%0, %1 587 ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) 588 ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) 589 global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) 590 global_store%s0\t%A0, %1%O0%g0" 591 [(set_attr "type" 592 "sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,vop1,ds,ds,flat,flat") 593 (set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*") 594 (set_attr "length" "4,4,8,4,4,4,12,12,8,12,12,12,12")]) 595 596; 64bit move pattern 597 598(define_insn_and_split "*mov<mode>_insn" 599 [(set (match_operand:DIDF 0 "nonimmediate_operand" 600 "=SD,SD,SD,RS,Sm,v, v,Sg, v, v,RF,RLRG, v, v,RM") 601 (match_operand:DIDF 1 "general_operand" 602 "SSA, C,DB,Sm,RS,v,DB, v,Sv,RF, v, v,RLRG,RM, v"))] 603 "GET_CODE(operands[1]) != SYMBOL_REF" 604 "@ 605 s_mov_b64\t%0, %1 606 s_mov_b64\t%0, %1 607 # 608 s_store_dwordx2\t%1, %A0 609 s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0) 610 # 611 # 612 # 613 # 614 flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0 615 flat_store_dwordx2\t%A0, %1%O0%g0 616 ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) 617 ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) 618 global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) 619 global_store_dwordx2\t%A0, %1%O0%g0" 620 "reload_completed 621 && ((!MEM_P (operands[0]) && !MEM_P (operands[1]) 622 && !gcn_sgpr_move_p (operands[0], operands[1])) 623 || (GET_CODE (operands[1]) == CONST_INT 624 && !gcn_constant64_p (operands[1])))" 625 [(set (match_dup 0) (match_dup 1)) 626 (set (match_dup 2) (match_dup 3))] 627 { 628 rtx inlo = gen_lowpart (SImode, operands[1]); 629 rtx inhi = gen_highpart_mode (SImode, <MODE>mode, operands[1]); 630 rtx outlo = gen_lowpart (SImode, operands[0]); 631 rtx outhi = gen_highpart_mode (SImode, <MODE>mode, operands[0]); 632 633 /* Ensure that overlapping registers aren't corrupted. */ 634 if (reg_overlap_mentioned_p (outlo, inhi)) 635 { 636 operands[0] = outhi; 637 operands[1] = inhi; 638 operands[2] = outlo; 639 operands[3] = inlo; 640 } 641 else 642 { 643 operands[0] = outlo; 644 operands[1] = inlo; 645 operands[2] = outhi; 646 operands[3] = inhi; 647 } 648 } 649 [(set_attr "type" "sop1,sop1,mult,smem,smem,vmult,vmult,vmult,vmult,flat, 650 flat,ds,ds,flat,flat") 651 (set_attr "length" "4,8,*,12,12,*,*,*,*,12,12,12,12,12,12")]) 652 653; 128-bit move. 654 655(define_insn_and_split "*movti_insn" 656 [(set (match_operand:TI 0 "nonimmediate_operand" 657 "=SD,RS,Sm,RF, v,v, v,SD,RM, v,RL, v") 658 (match_operand:TI 1 "general_operand" 659 "SSB,Sm,RS, v,RF,v,Sv, v, v,RM, v,RL"))] 660 "" 661 "@ 662 # 663 s_store_dwordx4\t%1, %A0 664 s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0) 665 flat_store_dwordx4\t%A0, %1%O0%g0 666 flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0 667 # 668 # 669 # 670 global_store_dwordx4\t%A0, %1%O0%g0 671 global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) 672 ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) 673 ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)" 674 "reload_completed 675 && REG_P (operands[0]) 676 && (REG_P (operands[1]) || GET_CODE (operands[1]) == CONST_INT)" 677 [(set (match_dup 0) (match_dup 1)) 678 (set (match_dup 2) (match_dup 3)) 679 (set (match_dup 4) (match_dup 5)) 680 (set (match_dup 6) (match_dup 7))] 681 { 682 gcc_assert (rtx_equal_p (operands[0], operands[1]) 683 || !reg_overlap_mentioned_p (operands[0], operands[1])); 684 operands[6] = gcn_operand_part (TImode, operands[0], 3); 685 operands[7] = gcn_operand_part (TImode, operands[1], 3); 686 operands[4] = gcn_operand_part (TImode, operands[0], 2); 687 operands[5] = gcn_operand_part (TImode, operands[1], 2); 688 operands[2] = gcn_operand_part (TImode, operands[0], 1); 689 operands[3] = gcn_operand_part (TImode, operands[1], 1); 690 operands[0] = gcn_operand_part (TImode, operands[0], 0); 691 operands[1] = gcn_operand_part (TImode, operands[1], 0); 692 } 693 [(set_attr "type" "mult,smem,smem,flat,flat,vmult,vmult,vmult,flat,flat,\ 694 ds,ds") 695 (set_attr "delayeduse" "*,*,yes,*,*,*,*,*,yes,*,*,*") 696 (set_attr "length" "*,12,12,12,12,*,*,*,12,12,12,12")]) 697 698;; }}} 699;; {{{ Prologue/Epilogue 700 701(define_insn "prologue_use" 702 [(unspec_volatile [(match_operand 0)] UNSPECV_PROLOGUE_USE)] 703 "" 704 "" 705 [(set_attr "length" "0")]) 706 707(define_expand "prologue" 708 [(const_int 0)] 709 "" 710 { 711 gcn_expand_prologue (); 712 DONE; 713 }) 714 715(define_expand "epilogue" 716 [(const_int 0)] 717 "" 718 { 719 gcn_expand_epilogue (); 720 DONE; 721 }) 722 723;; }}} 724;; {{{ Control flow 725 726; This pattern must satisfy simplejump_p, which means it cannot be a parallel 727; that clobbers SCC. Thus, we must preserve SCC if we're generating a long 728; branch sequence. 729 730(define_insn "jump" 731 [(set (pc) 732 (label_ref (match_operand 0)))] 733 "" 734 { 735 if (get_attr_length (insn) == 4) 736 return "s_branch\t%0"; 737 else 738 /* !!! This sequence clobbers EXEC_SAVE_REG and CC_SAVE_REG. */ 739 return "; s_mov_b32\ts22, scc is not supported by the assembler.\;" 740 ".long\t0xbe9600fd\;" 741 "s_getpc_b64\ts[20:21]\;" 742 "s_add_u32\ts20, s20, %0@rel32@lo+4\;" 743 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;" 744 "s_cmpk_lg_u32\ts22, 0\;" 745 "s_setpc_b64\ts[20:21]"; 746 } 747 [(set_attr "type" "sopp") 748 (set (attr "length") 749 (if_then_else (and (ge (minus (match_dup 0) (pc)) 750 (const_int -131072)) 751 (lt (minus (match_dup 0) (pc)) 752 (const_int 131072))) 753 (const_int 4) 754 (const_int 32)))]) 755 756(define_insn "indirect_jump" 757 [(set (pc) 758 (match_operand:DI 0 "register_operand" "Sg"))] 759 "" 760 "s_setpc_b64\t%0" 761 [(set_attr "type" "sop1") 762 (set_attr "length" "4")]) 763 764(define_insn "cjump" 765 [(set (pc) 766 (if_then_else 767 (match_operator:BI 1 "gcn_conditional_operator" 768 [(match_operand:BI 2 "gcn_conditional_register_operand" "ca,cV") 769 (const_int 0)]) 770 (label_ref (match_operand 0)) 771 (pc)))] 772 "" 773 { 774 if (get_attr_length (insn) == 4) 775 return "s_cbranch%C1\t%0"; 776 else 777 { 778 /* !!! This sequence clobbers EXEC_SAVE_REG and CC_SAVE_REG but 779 restores SCC. */ 780 if (REGNO (operands[2]) == SCC_REG) 781 { 782 if (GET_CODE (operands[1]) == EQ) 783 return "s_cbranch%c1\t.Lskip%=\;" 784 "s_getpc_b64\ts[20:21]\;" 785 "s_add_u32\ts20, s20, %0@rel32@lo+4\;" 786 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;" 787 "s_cmp_lg_u32\t0, 0\;" 788 "s_setpc_b64\ts[20:21]\n" 789 ".Lskip%=:"; 790 else 791 return "s_cbranch%c1\t.Lskip%=\;" 792 "s_getpc_b64\ts[20:21]\;" 793 "s_add_u32\ts20, s20, %0@rel32@lo+4\;" 794 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;" 795 "s_cmp_eq_u32\t0, 0\;" 796 "s_setpc_b64\ts[20:21]\n" 797 ".Lskip%=:"; 798 } 799 else 800 return "s_cbranch%c1\t.Lskip%=\;" 801 "; s_mov_b32\ts22, scc is not supported by the assembler.\;" 802 ".byte\t0xfd\;" 803 ".byte\t0x0\;" 804 ".byte\t0x80|22\;" 805 ".byte\t0xbe\;" 806 "s_getpc_b64\ts[20:21]\;" 807 "s_add_u32\ts20, s20, %0@rel32@lo+4\;" 808 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;" 809 "s_cmpk_lg_u32\ts22, 0\;" 810 "s_setpc_b64\ts[20:21]\n" 811 ".Lskip%=:"; 812 } 813 } 814 [(set_attr "type" "sopp") 815 (set (attr "length") 816 (if_then_else (and (ge (minus (match_dup 0) (pc)) 817 (const_int -131072)) 818 (lt (minus (match_dup 0) (pc)) 819 (const_int 131072))) 820 (const_int 4) 821 (const_int 36)))]) 822 823; Returning from a normal function is different to returning from a 824; kernel function. 825 826(define_insn "gcn_return" 827 [(return)] 828 "" 829 { 830 if (cfun && cfun->machine && cfun->machine->normal_function) 831 return "s_setpc_b64\ts[18:19]"; 832 else 833 return "s_waitcnt\tlgkmcnt(0)\;s_dcache_wb\;s_endpgm"; 834 } 835 [(set_attr "type" "sop1") 836 (set_attr "length" "12")]) 837 838(define_expand "call" 839 [(parallel [(call (match_operand 0 "") 840 (match_operand 1 "")) 841 (clobber (reg:DI LR_REGNUM)) 842 (clobber (match_scratch:DI 2))])] 843 "" 844 {}) 845 846(define_insn "gcn_simple_call" 847 [(call (mem (match_operand 0 "immediate_operand" "Y,B")) 848 (match_operand 1 "const_int_operand")) 849 (clobber (reg:DI LR_REGNUM)) 850 (clobber (match_scratch:DI 2 "=&Sg,X"))] 851 "" 852 "@ 853 s_getpc_b64\t%2\;s_add_u32\t%L2, %L2, %0@rel32@lo+4\;s_addc_u32\t%H2, %H2, %0@rel32@hi+4\;s_swappc_b64\ts[18:19], %2 854 s_swappc_b64\ts[18:19], %0" 855 [(set_attr "type" "mult,sop1") 856 (set_attr "length" "24,4")]) 857 858(define_insn "movdi_symbol" 859 [(set (match_operand:DI 0 "nonimmediate_operand" "=Sg") 860 (match_operand:DI 1 "general_operand" "Y")) 861 (clobber (reg:BI SCC_REG))] 862 "GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF" 863 { 864 if (SYMBOL_REF_P (operands[1]) 865 && SYMBOL_REF_WEAK (operands[1])) 866 return "s_getpc_b64\t%0\;" 867 "s_add_u32\t%L0, %L0, %1@gotpcrel32@lo+4\;" 868 "s_addc_u32\t%H0, %H0, %1@gotpcrel32@hi+4\;" 869 "s_load_dwordx2\t%0, %0\;" 870 "s_waitcnt\tlgkmcnt(0)"; 871 872 return "s_getpc_b64\t%0\;" 873 "s_add_u32\t%L0, %L0, %1@rel32@lo+4\;" 874 "s_addc_u32\t%H0, %H0, %1@rel32@hi+4"; 875 } 876 [(set_attr "type" "mult") 877 (set_attr "length" "32")]) 878 879(define_insn "movdi_symbol_save_scc" 880 [(set (match_operand:DI 0 "nonimmediate_operand" "=Sg") 881 (match_operand:DI 1 "general_operand" "Y")) 882 (clobber (reg:BI CC_SAVE_REG))] 883 "(GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF) 884 && (lra_in_progress || reload_completed)" 885 { 886 /* !!! These sequences clobber CC_SAVE_REG. */ 887 888 if (SYMBOL_REF_P (operands[1]) 889 && SYMBOL_REF_WEAK (operands[1])) 890 return "; s_mov_b32\ts22, scc is not supported by the assembler.\;" 891 ".long\t0xbe9600fd\;" 892 "s_getpc_b64\t%0\;" 893 "s_add_u32\t%L0, %L0, %1@gotpcrel32@lo+4\;" 894 "s_addc_u32\t%H0, %H0, %1@gotpcrel32@hi+4\;" 895 "s_load_dwordx2\t%0, %0\;" 896 "s_cmpk_lg_u32\ts22, 0\;" 897 "s_waitcnt\tlgkmcnt(0)"; 898 899 return "; s_mov_b32\ts22, scc is not supported by the assembler.\;" 900 ".long\t0xbe9600fd\;" 901 "s_getpc_b64\t%0\;" 902 "s_add_u32\t%L0, %L0, %1@rel32@lo+4\;" 903 "s_addc_u32\t%H0, %H0, %1@rel32@hi+4\;" 904 "s_cmpk_lg_u32\ts22, 0"; 905 } 906 [(set_attr "type" "mult") 907 (set_attr "length" "40")]) 908 909 910(define_insn "gcn_indirect_call" 911 [(call (mem (match_operand:DI 0 "register_operand" "Sg")) 912 (match_operand 1 "" "")) 913 (clobber (reg:DI LR_REGNUM)) 914 (clobber (match_scratch:DI 2 "=X"))] 915 "" 916 "s_swappc_b64\ts[18:19], %0" 917 [(set_attr "type" "sop1") 918 (set_attr "length" "4")]) 919 920(define_expand "call_value" 921 [(parallel [(set (match_operand 0 "") 922 (call (match_operand 1 "") 923 (match_operand 2 ""))) 924 (clobber (reg:DI LR_REGNUM)) 925 (clobber (match_scratch:DI 3))])] 926 "" 927 {}) 928 929(define_insn "gcn_call_value" 930 [(set (match_operand 0 "register_operand" "=Sg,Sg") 931 (call (mem (match_operand 1 "immediate_operand" "Y,B")) 932 (match_operand 2 "const_int_operand"))) 933 (clobber (reg:DI LR_REGNUM)) 934 (clobber (match_scratch:DI 3 "=&Sg,X"))] 935 "" 936 "@ 937 s_getpc_b64\t%3\;s_add_u32\t%L3, %L3, %1@rel32@lo+4\;s_addc_u32\t%H3, %H3, %1@rel32@hi+4\;s_swappc_b64\ts[18:19], %3 938 s_swappc_b64\ts[18:19], %1" 939 [(set_attr "type" "sop1") 940 (set_attr "length" "24")]) 941 942(define_insn "gcn_call_value_indirect" 943 [(set (match_operand 0 "register_operand" "=Sg") 944 (call (mem (match_operand:DI 1 "register_operand" "Sg")) 945 (match_operand 2 "" ""))) 946 (clobber (reg:DI LR_REGNUM)) 947 (clobber (match_scratch:DI 3 "=X"))] 948 "" 949 "s_swappc_b64\ts[18:19], %1" 950 [(set_attr "type" "sop1") 951 (set_attr "length" "4")]) 952 953; GCN does not have an instruction to clear only part of the instruction 954; cache, so the operands are ignored. 955 956(define_insn "clear_icache" 957 [(unspec_volatile 958 [(match_operand 0 "") (match_operand 1 "")] 959 UNSPECV_ICACHE_INV)] 960 "" 961 "s_icache_inv" 962 [(set_attr "type" "sopp") 963 (set_attr "length" "4")]) 964 965;; }}} 966;; {{{ Conditionals 967 968; 32-bit compare, scalar unit only 969 970(define_insn "cstoresi4" 971 [(set (match_operand:BI 0 "gcn_conditional_register_operand" 972 "=cs, cs, cs, cs") 973 (match_operator:BI 1 "gcn_compare_operator" 974 [(match_operand:SI 2 "gcn_alu_operand" "SSA,SSA,SSB, SS") 975 (match_operand:SI 3 "gcn_alu_operand" "SSA,SSL, SS,SSB")]))] 976 "" 977 "@ 978 s_cmp%D1\t%2, %3 979 s_cmpk%D1\t%2, %3 980 s_cmp%D1\t%2, %3 981 s_cmp%D1\t%2, %3" 982 [(set_attr "type" "sopc,sopk,sopk,sopk") 983 (set_attr "length" "4,4,8,8")]) 984 985(define_expand "cbranchsi4" 986 [(match_operator 0 "gcn_compare_operator" 987 [(match_operand:SI 1 "gcn_alu_operand") 988 (match_operand:SI 2 "gcn_alu_operand")]) 989 (match_operand 3)] 990 "" 991 { 992 rtx cc = gen_reg_rtx (BImode); 993 emit_insn (gen_cstoresi4 (cc, operands[0], operands[1], operands[2])); 994 emit_jump_insn (gen_cjump (operands[3], 995 gen_rtx_NE (BImode, cc, const0_rtx), cc)); 996 DONE; 997 }) 998 999; 64-bit compare; either unit, but scalar allows limited operators 1000 1001(define_expand "cstoredi4" 1002 [(set (match_operand:BI 0 "gcn_conditional_register_operand") 1003 (match_operator:BI 1 "gcn_compare_operator" 1004 [(match_operand:DI 2 "gcn_alu_operand") 1005 (match_operand:DI 3 "gcn_alu_operand")]))] 1006 "" 1007 {}) 1008 1009(define_insn "cstoredi4_vec_and_scalar" 1010 [(set (match_operand:BI 0 "gcn_conditional_register_operand" "= cs, cV") 1011 (match_operator:BI 1 "gcn_compare_64bit_operator" 1012 [(match_operand:DI 2 "gcn_alu_operand" "%SSA,vSvC") 1013 (match_operand:DI 3 "gcn_alu_operand" " SSC, v")]))] 1014 "" 1015 "@ 1016 s_cmp%D1\t%2, %3 1017 v_cmp%E1\tvcc, %2, %3" 1018 [(set_attr "type" "sopc,vopc") 1019 (set_attr "length" "8")]) 1020 1021(define_insn "cstoredi4_vector" 1022 [(set (match_operand:BI 0 "gcn_conditional_register_operand" "= cV") 1023 (match_operator:BI 1 "gcn_compare_operator" 1024 [(match_operand:DI 2 "gcn_alu_operand" "vSvB") 1025 (match_operand:DI 3 "gcn_alu_operand" " v")]))] 1026 "" 1027 "v_cmp%E1\tvcc, %2, %3" 1028 [(set_attr "type" "vopc") 1029 (set_attr "length" "8")]) 1030 1031(define_expand "cbranchdi4" 1032 [(match_operator 0 "gcn_compare_operator" 1033 [(match_operand:DI 1 "gcn_alu_operand") 1034 (match_operand:DI 2 "gcn_alu_operand")]) 1035 (match_operand 3)] 1036 "" 1037 { 1038 rtx cc = gen_reg_rtx (BImode); 1039 emit_insn (gen_cstoredi4 (cc, operands[0], operands[1], operands[2])); 1040 emit_jump_insn (gen_cjump (operands[3], 1041 gen_rtx_NE (BImode, cc, const0_rtx), cc)); 1042 DONE; 1043 }) 1044 1045; FP compare; vector unit only 1046 1047(define_insn "cstore<mode>4" 1048 [(set (match_operand:BI 0 "gcn_conditional_register_operand" "=cV") 1049 (match_operator:BI 1 "gcn_fp_compare_operator" 1050 [(match_operand:SFDF 2 "gcn_alu_operand" "vB") 1051 (match_operand:SFDF 3 "gcn_alu_operand" "v")]))] 1052 "" 1053 "v_cmp%E1\tvcc, %2, %3" 1054 [(set_attr "type" "vopc") 1055 (set_attr "length" "8")]) 1056 1057(define_expand "cbranch<mode>4" 1058 [(match_operator 0 "gcn_fp_compare_operator" 1059 [(match_operand:SFDF 1 "gcn_alu_operand") 1060 (match_operand:SFDF 2 "gcn_alu_operand")]) 1061 (match_operand 3)] 1062 "" 1063 { 1064 rtx cc = gen_reg_rtx (BImode); 1065 emit_insn (gen_cstore<mode>4 (cc, operands[0], operands[1], operands[2])); 1066 emit_jump_insn (gen_cjump (operands[3], 1067 gen_rtx_NE (BImode, cc, const0_rtx), cc)); 1068 DONE; 1069 }) 1070 1071;; }}} 1072;; {{{ ALU special cases: Plus 1073 1074(define_insn "addsi3" 1075 [(set (match_operand:SI 0 "register_operand" "= Sg, Sg, Sg, v") 1076 (plus:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, 0,SgA, v") 1077 (match_operand:SI 2 "gcn_alu_operand" " SgA,SgJ, B,vBSv"))) 1078 (clobber (match_scratch:BI 3 "= cs, cs, cs, X")) 1079 (clobber (match_scratch:DI 4 "= X, X, X, cV"))] 1080 "" 1081 "@ 1082 s_add_i32\t%0, %1, %2 1083 s_addk_i32\t%0, %2 1084 s_add_i32\t%0, %1, %2 1085 v_add%^_u32\t%0, vcc, %2, %1" 1086 [(set_attr "type" "sop2,sopk,sop2,vop2") 1087 (set_attr "length" "4,4,8,8")]) 1088 1089(define_expand "addsi3_scc" 1090 [(parallel [(set (match_operand:SI 0 "register_operand") 1091 (plus:SI (match_operand:SI 1 "gcn_alu_operand") 1092 (match_operand:SI 2 "gcn_alu_operand"))) 1093 (clobber (reg:BI SCC_REG)) 1094 (clobber (scratch:DI))])] 1095 "" 1096 {}) 1097 1098; Having this as an insn_and_split allows us to keep together DImode adds 1099; through some RTL optimisation passes, and means the CC reg we set isn't 1100; dependent on the constraint alternative (which doesn't seem to work well). 1101 1102; If v_addc_u32 is used to add with carry, a 32-bit literal constant cannot be 1103; used as an operand due to the read of VCC, so we restrict constants to the 1104; inlinable range for that alternative. 1105 1106(define_insn_and_split "adddi3" 1107 [(set (match_operand:DI 0 "register_operand" "=Sg, v") 1108 (plus:DI (match_operand:DI 1 "register_operand" " Sg, v") 1109 (match_operand:DI 2 "nonmemory_operand" "SgB,vA"))) 1110 (clobber (match_scratch:BI 3 "=cs, X")) 1111 (clobber (match_scratch:DI 4 "= X,cV"))] 1112 "" 1113 "#" 1114 "&& reload_completed" 1115 [(const_int 0)] 1116 { 1117 rtx cc = gen_rtx_REG (BImode, gcn_vgpr_register_operand (operands[1], 1118 DImode) 1119 ? VCC_REG : SCC_REG); 1120 1121 emit_insn (gen_addsi3_scalar_carry 1122 (gcn_operand_part (DImode, operands[0], 0), 1123 gcn_operand_part (DImode, operands[1], 0), 1124 gcn_operand_part (DImode, operands[2], 0), 1125 cc)); 1126 rtx val = gcn_operand_part (DImode, operands[2], 1); 1127 if (val != const0_rtx) 1128 emit_insn (gen_addcsi3_scalar 1129 (gcn_operand_part (DImode, operands[0], 1), 1130 gcn_operand_part (DImode, operands[1], 1), 1131 gcn_operand_part (DImode, operands[2], 1), 1132 cc, cc)); 1133 else 1134 emit_insn (gen_addcsi3_scalar_zero 1135 (gcn_operand_part (DImode, operands[0], 1), 1136 gcn_operand_part (DImode, operands[1], 1), 1137 cc)); 1138 DONE; 1139 } 1140 [(set_attr "type" "mult,vmult") 1141 (set_attr "length" "8")]) 1142 1143(define_expand "adddi3_scc" 1144 [(parallel [(set (match_operand:DI 0 "register_operand") 1145 (plus:DI (match_operand:DI 1 "register_operand") 1146 (match_operand:DI 2 "nonmemory_operand"))) 1147 (clobber (reg:BI SCC_REG)) 1148 (clobber (scratch:DI))])] 1149 "" 1150 {}) 1151 1152;; Add with carry. 1153 1154(define_insn "addsi3_scalar_carry" 1155 [(set (match_operand:SI 0 "register_operand" "= Sg, v") 1156 (plus:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, v") 1157 (match_operand:SI 2 "gcn_alu_operand" " SgB,vB"))) 1158 (set (match_operand:BI 3 "register_operand" "= cs,cV") 1159 (ltu:BI (plus:SI (match_dup 1) 1160 (match_dup 2)) 1161 (match_dup 1)))] 1162 "" 1163 "@ 1164 s_add_u32\t%0, %1, %2 1165 v_add%^_u32\t%0, vcc, %2, %1" 1166 [(set_attr "type" "sop2,vop2") 1167 (set_attr "length" "8,8")]) 1168 1169(define_insn "addsi3_scalar_carry_cst" 1170 [(set (match_operand:SI 0 "register_operand" "=Sg, v") 1171 (plus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA, v") 1172 (match_operand:SI 2 "const_int_operand" " n, n"))) 1173 (set (match_operand:BI 4 "register_operand" "=cs,cV") 1174 (geu:BI (plus:SI (match_dup 1) 1175 (match_dup 2)) 1176 (match_operand:SI 3 "const_int_operand" " n, n")))] 1177 "INTVAL (operands[2]) == -INTVAL (operands[3])" 1178 "@ 1179 s_add_u32\t%0, %1, %2 1180 v_add%^_u32\t%0, vcc, %2, %1" 1181 [(set_attr "type" "sop2,vop2") 1182 (set_attr "length" "4")]) 1183 1184(define_insn "addcsi3_scalar" 1185 [(set (match_operand:SI 0 "register_operand" "= Sg, v") 1186 (plus:SI (plus:SI (zero_extend:SI 1187 (match_operand:BI 3 "register_operand" "= cs,cV")) 1188 (match_operand:SI 1 "gcn_alu_operand" "%SgA, v")) 1189 (match_operand:SI 2 "gcn_alu_operand" " SgB,vA"))) 1190 (set (match_operand:BI 4 "register_operand" "= 3, 3") 1191 (ior:BI (ltu:BI (plus:SI 1192 (plus:SI 1193 (zero_extend:SI (match_dup 3)) 1194 (match_dup 1)) 1195 (match_dup 2)) 1196 (match_dup 2)) 1197 (ltu:BI (plus:SI (zero_extend:SI (match_dup 3)) (match_dup 1)) 1198 (match_dup 1))))] 1199 "" 1200 "@ 1201 s_addc_u32\t%0, %1, %2 1202 v_addc%^_u32\t%0, vcc, %2, %1, vcc" 1203 [(set_attr "type" "sop2,vop2") 1204 (set_attr "length" "8,4")]) 1205 1206(define_insn "addcsi3_scalar_zero" 1207 [(set (match_operand:SI 0 "register_operand" "=Sg, v") 1208 (plus:SI (zero_extend:SI 1209 (match_operand:BI 2 "register_operand" "=cs,cV")) 1210 (match_operand:SI 1 "gcn_alu_operand" "SgA, v"))) 1211 (set (match_dup 2) 1212 (ltu:BI (plus:SI (zero_extend:SI (match_dup 2)) 1213 (match_dup 1)) 1214 (match_dup 1)))] 1215 "" 1216 "@ 1217 s_addc_u32\t%0, %1, 0 1218 v_addc%^_u32\t%0, vcc, 0, %1, vcc" 1219 [(set_attr "type" "sop2,vop2") 1220 (set_attr "length" "4")]) 1221 1222; "addptr" is the same as "add" except that it must not write to VCC or SCC 1223; as a side-effect. Unfortunately GCN does not have a suitable instruction 1224; for this, so we use CC_SAVE_REG as a temp. 1225; Note that it is not safe to save/clobber/restore as separate insns because 1226; doing so will break data-flow analysis, so this must use multiple 1227; instructions in one insn. 1228; 1229; The "v0" should be just "v", but somehow the "0" helps LRA not loop forever 1230; on testcase pr54713-2.c with -O0. It's only an optimization hint anyway. 1231; 1232; The SGPR alternative is preferred as it is typically used with mov_sgprbase. 1233 1234(define_insn "addptrdi3" 1235 [(set (match_operand:DI 0 "register_operand" "= v, Sg") 1236 (unspec:DI [ 1237 (plus:DI (match_operand:DI 1 "register_operand" "^v0,Sg0") 1238 (match_operand:DI 2 "nonmemory_operand" "vDA,SgDB"))] 1239 UNSPEC_ADDPTR))] 1240 "" 1241 { 1242 if (which_alternative == 0) 1243 { 1244 rtx new_operands[4] = { operands[0], operands[1], operands[2], 1245 gen_rtx_REG (DImode, CC_SAVE_REG) }; 1246 1247 output_asm_insn ("v_add%^_u32\t%L0, %3, %L2, %L1", new_operands); 1248 output_asm_insn ("v_addc%^_u32\t%H0, %3, %H2, %H1, %3", new_operands); 1249 } 1250 else 1251 { 1252 rtx new_operands[4] = { operands[0], operands[1], operands[2], 1253 gen_rtx_REG (BImode, CC_SAVE_REG) }; 1254 1255 output_asm_insn ("s_mov_b32\t%3, scc", new_operands); 1256 output_asm_insn ("s_add_u32\t%L0, %L1, %L2", new_operands); 1257 output_asm_insn ("s_addc_u32\t%H0, %H1, %H2", new_operands); 1258 output_asm_insn ("s_cmpk_lg_u32\t%3, 0", new_operands); 1259 } 1260 1261 return ""; 1262 } 1263 [(set_attr "type" "vmult,mult") 1264 (set_attr "length" "16,24")]) 1265 1266;; }}} 1267;; {{{ ALU special cases: Minus 1268 1269(define_insn "subsi3" 1270 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, v, v") 1271 (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA,SgA, v,vBSv") 1272 (match_operand:SI 2 "gcn_alu_operand" "SgA, B, vBSv, v"))) 1273 (clobber (match_scratch:BI 3 "=cs, cs, X, X")) 1274 (clobber (match_scratch:DI 4 "= X, X, cV, cV"))] 1275 "" 1276 "@ 1277 s_sub_i32\t%0, %1, %2 1278 s_sub_i32\t%0, %1, %2 1279 v_subrev%^_u32\t%0, vcc, %2, %1 1280 v_sub%^_u32\t%0, vcc, %1, %2" 1281 [(set_attr "type" "sop2,sop2,vop2,vop2") 1282 (set_attr "length" "4,8,8,8")]) 1283 1284(define_insn_and_split "subdi3" 1285 [(set (match_operand:DI 0 "register_operand" "=Sg, Sg") 1286 (minus:DI 1287 (match_operand:DI 1 "gcn_alu_operand" "SgA,SgB") 1288 (match_operand:DI 2 "gcn_alu_operand" "SgB,SgA"))) 1289 (clobber (reg:BI SCC_REG))] 1290 "" 1291 "#" 1292 "reload_completed" 1293 [(const_int 0)] 1294 { 1295 emit_insn (gen_subsi3_scalar_carry 1296 (gcn_operand_part (DImode, operands[0], 0), 1297 gcn_operand_part (DImode, operands[1], 0), 1298 gcn_operand_part (DImode, operands[2], 0))); 1299 rtx val = gcn_operand_part (DImode, operands[2], 1); 1300 if (val != const0_rtx) 1301 emit_insn (gen_subcsi3_scalar 1302 (gcn_operand_part (DImode, operands[0], 1), 1303 gcn_operand_part (DImode, operands[1], 1), 1304 gcn_operand_part (DImode, operands[2], 1))); 1305 else 1306 emit_insn (gen_subcsi3_scalar_zero 1307 (gcn_operand_part (DImode, operands[0], 1), 1308 gcn_operand_part (DImode, operands[1], 1))); 1309 DONE; 1310 } 1311 [(set_attr "length" "8")]) 1312 1313(define_insn "subsi3_scalar_carry" 1314 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg") 1315 (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA,SgB") 1316 (match_operand:SI 2 "gcn_alu_operand" "SgB,SgA"))) 1317 (set (reg:BI SCC_REG) 1318 (gtu:BI (minus:SI (match_dup 1) 1319 (match_dup 2)) 1320 (match_dup 1)))] 1321 "" 1322 "s_sub_u32\t%0, %1, %2" 1323 [(set_attr "type" "sop2") 1324 (set_attr "length" "8")]) 1325 1326(define_insn "subsi3_scalar_carry_cst" 1327 [(set (match_operand:SI 0 "register_operand" "=Sg") 1328 (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA") 1329 (match_operand:SI 2 "const_int_operand" " n"))) 1330 (set (reg:BI SCC_REG) 1331 (leu:BI (minus:SI (match_dup 1) 1332 (match_dup 2)) 1333 (match_operand:SI 3 "const_int_operand" " n")))] 1334 "INTVAL (operands[2]) == -INTVAL (operands[3])" 1335 "s_sub_u32\t%0, %1, %2" 1336 [(set_attr "type" "sop2") 1337 (set_attr "length" "4")]) 1338 1339(define_insn "subcsi3_scalar" 1340 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg") 1341 (minus:SI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) 1342 (match_operand:SI 1 "gcn_alu_operand" "SgA,SgB")) 1343 (match_operand:SI 2 "gcn_alu_operand" "SgB,SgA"))) 1344 (set (reg:BI SCC_REG) 1345 (ior:BI (gtu:BI (minus:SI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) 1346 (match_dup 1)) 1347 (match_dup 2)) 1348 (match_dup 1)) 1349 (gtu:BI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) 1350 (match_dup 1)) 1351 (match_dup 1))))] 1352 "" 1353 "s_subb_u32\t%0, %1, %2" 1354 [(set_attr "type" "sop2") 1355 (set_attr "length" "8")]) 1356 1357(define_insn "subcsi3_scalar_zero" 1358 [(set (match_operand:SI 0 "register_operand" "=Sg") 1359 (minus:SI (zero_extend:SI (reg:BI SCC_REG)) 1360 (match_operand:SI 1 "gcn_alu_operand" "SgA"))) 1361 (set (reg:BI SCC_REG) 1362 (gtu:BI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) (match_dup 1)) 1363 (match_dup 1)))] 1364 "" 1365 "s_subb_u32\t%0, %1, 0" 1366 [(set_attr "type" "sop2") 1367 (set_attr "length" "4")]) 1368 1369;; }}} 1370;; {{{ ALU: mult 1371 1372; Vector multiply has vop3a encoding, but no corresponding vop2a, so no long 1373; immediate. 1374(define_insn "mulsi3" 1375 [(set (match_operand:SI 0 "register_operand" "= Sg,Sg, Sg, v") 1376 (mult:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, 0,SgA, v") 1377 (match_operand:SI 2 "gcn_alu_operand" " SgA, J, B,vASv")))] 1378 "" 1379 "@ 1380 s_mul_i32\t%0, %1, %2 1381 s_mulk_i32\t%0, %2 1382 s_mul_i32\t%0, %1, %2 1383 v_mul_lo_i32\t%0, %1, %2" 1384 [(set_attr "type" "sop2,sopk,sop2,vop3a") 1385 (set_attr "length" "4,4,8,4")]) 1386 1387(define_code_iterator any_extend [sign_extend zero_extend]) 1388(define_code_attr sgnsuffix [(sign_extend "%i") (zero_extend "%u")]) 1389(define_code_attr su [(sign_extend "s") (zero_extend "u")]) 1390(define_code_attr u [(sign_extend "") (zero_extend "u")]) 1391(define_code_attr iu [(sign_extend "i") (zero_extend "u")]) 1392(define_code_attr e [(sign_extend "e") (zero_extend "")]) 1393 1394(define_insn "<su>mulsi3_highpart" 1395 [(set (match_operand:SI 0 "register_operand" "= v") 1396 (truncate:SI 1397 (lshiftrt:DI 1398 (mult:DI 1399 (any_extend:DI 1400 (match_operand:SI 1 "register_operand" "% v")) 1401 (any_extend:DI 1402 (match_operand:SI 2 "register_operand" "vSv"))) 1403 (const_int 32))))] 1404 "" 1405 "v_mul_hi<sgnsuffix>0\t%0, %2, %1" 1406 [(set_attr "type" "vop3a") 1407 (set_attr "length" "8")]) 1408 1409(define_insn "<u>mulhisi3" 1410 [(set (match_operand:SI 0 "register_operand" "=v") 1411 (mult:SI 1412 (any_extend:SI (match_operand:HI 1 "register_operand" "%v")) 1413 (any_extend:SI (match_operand:HI 2 "register_operand" " v"))))] 1414 "" 1415 "v_mul_<iu>32_<iu>24_sdwa\t%0, %<e>1, %<e>2 src0_sel:WORD_0 src1_sel:WORD_0" 1416 [(set_attr "type" "vop_sdwa") 1417 (set_attr "length" "8")]) 1418 1419(define_insn "<u>mulqihi3_scalar" 1420 [(set (match_operand:HI 0 "register_operand" "=v") 1421 (mult:HI 1422 (any_extend:HI (match_operand:QI 1 "register_operand" "%v")) 1423 (any_extend:HI (match_operand:QI 2 "register_operand" " v"))))] 1424 "" 1425 "v_mul_<iu>32_<iu>24_sdwa\t%0, %<e>1, %<e>2 src0_sel:BYTE_0 src1_sel:BYTE_0" 1426 [(set_attr "type" "vop_sdwa") 1427 (set_attr "length" "8")]) 1428 1429;; }}} 1430;; {{{ ALU: generic 32-bit unop 1431 1432(define_code_iterator bitunop [not popcount]) 1433(define_code_attr popcount_extra_op [(not "") (popcount ", 0")]) 1434 1435(define_insn "<expander>si2" 1436 [(set (match_operand:SI 0 "register_operand" "=Sg, v") 1437 (bitunop:SI 1438 (match_operand:SI 1 "gcn_alu_operand" "SgB,vSvB"))) 1439 (clobber (match_scratch:BI 2 "=cs, X"))] 1440 "" 1441 "@ 1442 s_<s_mnemonic>0\t%0, %1 1443 v_<mnemonic>0\t%0, %1<popcount_extra_op>" 1444 [(set_attr "type" "sop1,vop1") 1445 (set_attr "length" "8")]) 1446 1447(define_code_iterator countzeros [clz ctz]) 1448 1449(define_insn "<expander>si2" 1450 [(set (match_operand:SI 0 "register_operand" "=Sg,Sg") 1451 (countzeros:SI 1452 (match_operand:SI 1 "gcn_alu_operand" "SgA, B")))] 1453 "" 1454 "s_<s_mnemonic>1\t%0, %1" 1455 [(set_attr "type" "sop1") 1456 (set_attr "length" "4,8")]) 1457 1458; The truncate ensures that a constant passed to operand 1 is treated as DImode 1459(define_insn "<expander>di2" 1460 [(set (match_operand:SI 0 "register_operand" "=Sg,Sg") 1461 (truncate:SI 1462 (countzeros:DI 1463 (match_operand:DI 1 "gcn_alu_operand" "SgA, B"))))] 1464 "" 1465 "s_<s_mnemonic>1\t%0, %1" 1466 [(set_attr "type" "sop1") 1467 (set_attr "length" "4,8")]) 1468 1469;; }}} 1470;; {{{ ALU: generic 32-bit binop 1471 1472; No plus and mult - they have variant with 16bit immediate 1473; and thus are defined later. 1474(define_code_iterator binop [and ior xor smin smax umin umax 1475 ashift lshiftrt ashiftrt]) 1476(define_code_iterator vec_and_scalar_com [and ior xor smin smax umin umax]) 1477(define_code_iterator vec_and_scalar_nocom [ashift lshiftrt ashiftrt]) 1478 1479(define_insn "<expander>si3" 1480 [(set (match_operand:SI 0 "gcn_valu_dst_operand" "= Sg, v,RD") 1481 (vec_and_scalar_com:SI 1482 (match_operand:SI 1 "gcn_valu_src0_operand" "%SgA,vSvB, 0") 1483 (match_operand:SI 2 "gcn_alu_operand" " SgB, v, v"))) 1484 (clobber (match_scratch:BI 3 "= cs, X, X"))] 1485 "" 1486 "@ 1487 s_<mnemonic>0\t%0, %1, %2 1488 v_<mnemonic>0\t%0, %1, %2 1489 ds_<mnemonic>0\t%A0, %2%O0" 1490 [(set_attr "type" "sop2,vop2,ds") 1491 (set_attr "length" "8")]) 1492 1493(define_insn "<expander>si3" 1494 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, v") 1495 (vec_and_scalar_nocom:SI 1496 (match_operand:SI 1 "gcn_alu_operand" "SgB,SgA, v") 1497 (match_operand:SI 2 "gcn_alu_operand" "SgA,SgB,vSvB"))) 1498 (clobber (match_scratch:BI 3 "=cs, cs, X"))] 1499 "" 1500 "@ 1501 s_<mnemonic>0\t%0, %1, %2 1502 s_<mnemonic>0\t%0, %1, %2 1503 v_<revmnemonic>0\t%0, %2, %1" 1504 [(set_attr "type" "sop2,sop2,vop2") 1505 (set_attr "length" "8")]) 1506 1507(define_expand "<expander>si3_scc" 1508 [(parallel [(set (match_operand:SI 0 "gcn_valu_dst_operand") 1509 (binop:SI 1510 (match_operand:SI 1 "gcn_valu_src0_operand") 1511 (match_operand:SI 2 "gcn_alu_operand"))) 1512 (clobber (reg:BI SCC_REG))])] 1513 "" 1514 {}) 1515 1516;; }}} 1517;; {{{ ALU: generic 64-bit 1518 1519(define_code_iterator vec_and_scalar64_com [and ior xor]) 1520 1521(define_insn_and_split "<expander>di3" 1522 [(set (match_operand:DI 0 "register_operand" "= Sg, v") 1523 (vec_and_scalar64_com:DI 1524 (match_operand:DI 1 "gcn_alu_operand" "%SgA,vSvDB") 1525 (match_operand:DI 2 "gcn_alu_operand" " SgC, v"))) 1526 (clobber (match_scratch:BI 3 "= cs, X"))] 1527 "" 1528 "@ 1529 s_<mnemonic>0\t%0, %1, %2 1530 #" 1531 "reload_completed && gcn_vgpr_register_operand (operands[0], DImode)" 1532 [(parallel [(set (match_dup 4) 1533 (vec_and_scalar64_com:SI (match_dup 5) (match_dup 6))) 1534 (clobber (match_dup 3))]) 1535 (parallel [(set (match_dup 7) 1536 (vec_and_scalar64_com:SI (match_dup 8) (match_dup 9))) 1537 (clobber (match_dup 3))])] 1538 { 1539 operands[4] = gcn_operand_part (DImode, operands[0], 0); 1540 operands[5] = gcn_operand_part (DImode, operands[1], 0); 1541 operands[6] = gcn_operand_part (DImode, operands[2], 0); 1542 operands[7] = gcn_operand_part (DImode, operands[0], 1); 1543 operands[8] = gcn_operand_part (DImode, operands[1], 1); 1544 operands[9] = gcn_operand_part (DImode, operands[2], 1); 1545 } 1546 [(set_attr "type" "sop2,vop2") 1547 (set_attr "length" "8")]) 1548 1549(define_insn "<expander>di3" 1550 [(set (match_operand:DI 0 "register_operand" "=Sg, Sg, v") 1551 (vec_and_scalar_nocom:DI 1552 (match_operand:DI 1 "gcn_alu_operand" "SgC,SgA, v") 1553 (match_operand:SI 2 "gcn_alu_operand" "SgA,SgC,vSvC"))) 1554 (clobber (match_scratch:BI 3 "=cs, cs, X"))] 1555 "" 1556 "@ 1557 s_<mnemonic>0\t%0, %1, %2 1558 s_<mnemonic>0\t%0, %1, %2 1559 v_<revmnemonic>0\t%0, %2, %1" 1560 [(set_attr "type" "sop2,sop2,vop2") 1561 (set_attr "length" "8")]) 1562 1563;; }}} 1564;; {{{ ALU: generic 128-bit binop 1565 1566; TImode shifts can't be synthesized by the middle-end 1567(define_expand "<expander>ti3" 1568 [(set (match_operand:TI 0 "register_operand") 1569 (vec_and_scalar_nocom:TI 1570 (match_operand:TI 1 "gcn_alu_operand") 1571 (match_operand:SI 2 "gcn_alu_operand")))] 1572 "" 1573 { 1574 rtx dest = operands[0]; 1575 rtx src = operands[1]; 1576 rtx shift = operands[2]; 1577 1578 enum {ashr, lshr, ashl} shiftop = <expander>; 1579 rtx (*inverse_shift_fn) (rtx, rtx, rtx) 1580 = (shiftop == ashl ? gen_lshrdi3 : gen_ashldi3); 1581 rtx (*logical_shift_fn) (rtx, rtx, rtx) 1582 = (shiftop == ashl ? gen_ashldi3 : gen_lshrdi3); 1583 1584 /* We shift "from" one subreg "to" the other, according to shiftop. */ 1585 int from = (shiftop == ashl ? 0 : 8); 1586 int to = (shiftop == ashl ? 8 : 0); 1587 rtx destfrom = simplify_gen_subreg (DImode, dest, TImode, from); 1588 rtx destto = simplify_gen_subreg (DImode, dest, TImode, to); 1589 rtx srcfrom = simplify_gen_subreg (DImode, src, TImode, from); 1590 rtx srcto = simplify_gen_subreg (DImode, src, TImode, to); 1591 1592 int shiftval = (CONST_INT_P (shift) ? INTVAL (shift) : -1); 1593 enum {RUNTIME, ZERO, SMALL, LARGE} shiftcomparison 1594 = (!CONST_INT_P (shift) ? RUNTIME 1595 : shiftval == 0 ? ZERO 1596 : shiftval < 64 ? SMALL 1597 : LARGE); 1598 1599 rtx large_label, zero_label, exit_label; 1600 1601 if (shiftcomparison == RUNTIME) 1602 { 1603 zero_label = gen_label_rtx (); 1604 large_label = gen_label_rtx (); 1605 exit_label = gen_label_rtx (); 1606 1607 rtx cond = gen_rtx_EQ (VOIDmode, shift, const0_rtx); 1608 emit_insn (gen_cbranchsi4 (cond, shift, const0_rtx, zero_label)); 1609 1610 rtx sixtyfour = GEN_INT (64); 1611 cond = gen_rtx_GE (VOIDmode, shift, sixtyfour); 1612 emit_insn (gen_cbranchsi4 (cond, shift, sixtyfour, large_label)); 1613 } 1614 1615 if (shiftcomparison == SMALL || shiftcomparison == RUNTIME) 1616 { 1617 /* Shift both parts by the same amount, then patch in the bits that 1618 cross the boundary. 1619 This does *not* work for zero-length shifts. */ 1620 rtx tmpto1 = gen_reg_rtx (DImode); 1621 rtx tmpto2 = gen_reg_rtx (DImode); 1622 emit_insn (gen_<expander>di3 (destfrom, srcfrom, shift)); 1623 emit_insn (logical_shift_fn (tmpto1, srcto, shift)); 1624 rtx lessershiftval = gen_reg_rtx (SImode); 1625 emit_insn (gen_subsi3 (lessershiftval, GEN_INT (64), shift)); 1626 emit_insn (inverse_shift_fn (tmpto2, srcfrom, lessershiftval)); 1627 emit_insn (gen_iordi3 (destto, tmpto1, tmpto2)); 1628 } 1629 1630 if (shiftcomparison == RUNTIME) 1631 { 1632 emit_jump_insn (gen_jump (exit_label)); 1633 emit_barrier (); 1634 1635 emit_label (zero_label); 1636 } 1637 1638 if (shiftcomparison == ZERO || shiftcomparison == RUNTIME) 1639 emit_move_insn (dest, src); 1640 1641 if (shiftcomparison == RUNTIME) 1642 { 1643 emit_jump_insn (gen_jump (exit_label)); 1644 emit_barrier (); 1645 1646 emit_label (large_label); 1647 } 1648 1649 if (shiftcomparison == LARGE || shiftcomparison == RUNTIME) 1650 { 1651 /* Do the shift within one part, and set the other part appropriately. 1652 Shifts of 128+ bits are an error. */ 1653 rtx lessershiftval = gen_reg_rtx (SImode); 1654 emit_insn (gen_subsi3 (lessershiftval, shift, GEN_INT (64))); 1655 emit_insn (gen_<expander>di3 (destto, srcfrom, lessershiftval)); 1656 if (shiftop == ashr) 1657 emit_insn (gen_ashrdi3 (destfrom, srcfrom, GEN_INT (63))); 1658 else 1659 emit_move_insn (destfrom, const0_rtx); 1660 } 1661 1662 if (shiftcomparison == RUNTIME) 1663 emit_label (exit_label); 1664 1665 DONE; 1666 }) 1667 1668;; }}} 1669;; {{{ Atomics 1670 1671; Each compute unit has it's own L1 cache. The L2 cache is shared between 1672; all the compute units. Any load or store instruction can skip L1 and 1673; access L2 directly using the "glc" flag. Atomic instructions also skip 1674; L1. The L1 cache can be flushed and invalidated using instructions. 1675; 1676; Therefore, in order for "acquire" and "release" atomic modes to work 1677; correctly across compute units we must flush before each "release" 1678; and invalidate the cache after each "acquire". It might seem like 1679; invalidation could be safely done before an "acquire", but since each 1680; compute unit can run up to 40 threads simultaneously, all reading values 1681; into the L1 cache, this is not actually safe. 1682; 1683; Additionally, scalar flat instructions access L2 via a different cache 1684; (the "constant cache"), so they have separate constrol instructions. We 1685; do not attempt to invalidate both caches at once; instead, atomics 1686; operating on scalar flat pointers will flush the constant cache, and 1687; atomics operating on flat or global pointers will flush L1. It is up to 1688; the programmer to get this right. 1689 1690(define_code_iterator atomicops [plus minus and ior xor]) 1691(define_mode_attr X [(SI "") (DI "_X2")]) 1692 1693;; TODO compare_and_swap test_and_set inc dec 1694;; Hardware also supports min and max, but GCC does not. 1695 1696(define_expand "memory_barrier" 1697 [(set (match_dup 0) 1698 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))] 1699 "" 1700 { 1701 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 1702 MEM_VOLATILE_P (operands[0]) = 1; 1703 }) 1704 1705(define_insn "*memory_barrier" 1706 [(set (match_operand:BLK 0) 1707 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))] 1708 "" 1709 "buffer_wbinvl1_vol" 1710 [(set_attr "type" "mubuf") 1711 (set_attr "length" "4")]) 1712 1713; FIXME: These patterns have been disabled as they do not seem to work 1714; reliably - they can cause hangs or incorrect results. 1715; TODO: flush caches according to memory model 1716(define_insn "atomic_fetch_<bare_mnemonic><mode>" 1717 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v") 1718 (match_operand:SIDI 1 "memory_operand" "+RS,RF,RM")) 1719 (set (match_dup 1) 1720 (unspec_volatile:SIDI 1721 [(atomicops:SIDI 1722 (match_dup 1) 1723 (match_operand:SIDI 2 "register_operand" " Sm, v, v"))] 1724 UNSPECV_ATOMIC)) 1725 (use (match_operand 3 "const_int_operand"))] 1726 "0 /* Disabled. */" 1727 "@ 1728 s_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0) 1729 flat_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\t0 1730 global_atomic_<bare_mnemonic><X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)" 1731 [(set_attr "type" "smem,flat,flat") 1732 (set_attr "length" "12") 1733 (set_attr "gcn_version" "gcn5,*,gcn5")]) 1734 1735; FIXME: These patterns are disabled because the instructions don't 1736; seem to work as advertised. Specifically, OMP "team distribute" 1737; reductions apparently "lose" some of the writes, similar to what 1738; you might expect from a concurrent non-atomic read-modify-write. 1739; TODO: flush caches according to memory model 1740(define_insn "atomic_<bare_mnemonic><mode>" 1741 [(set (match_operand:SIDI 0 "memory_operand" "+RS,RF,RM") 1742 (unspec_volatile:SIDI 1743 [(atomicops:SIDI 1744 (match_dup 0) 1745 (match_operand:SIDI 1 "register_operand" " Sm, v, v"))] 1746 UNSPECV_ATOMIC)) 1747 (use (match_operand 2 "const_int_operand"))] 1748 "0 /* Disabled. */" 1749 "@ 1750 s_atomic_<bare_mnemonic><X>\t%0, %1\;s_waitcnt\tlgkmcnt(0) 1751 flat_atomic_<bare_mnemonic><X>\t%0, %1\;s_waitcnt\t0 1752 global_atomic_<bare_mnemonic><X>\t%A0, %1%O0\;s_waitcnt\tvmcnt(0)" 1753 [(set_attr "type" "smem,flat,flat") 1754 (set_attr "length" "12") 1755 (set_attr "gcn_version" "gcn5,*,gcn5")]) 1756 1757(define_mode_attr x2 [(SI "DI") (DI "TI")]) 1758(define_mode_attr size [(SI "4") (DI "8")]) 1759(define_mode_attr bitsize [(SI "32") (DI "64")]) 1760 1761(define_expand "sync_compare_and_swap<mode>" 1762 [(match_operand:SIDI 0 "register_operand") 1763 (match_operand:SIDI 1 "memory_operand") 1764 (match_operand:SIDI 2 "register_operand") 1765 (match_operand:SIDI 3 "register_operand")] 1766 "" 1767 { 1768 if (MEM_ADDR_SPACE (operands[1]) == ADDR_SPACE_LDS) 1769 { 1770 emit_insn (gen_sync_compare_and_swap<mode>_lds_insn (operands[0], 1771 operands[1], 1772 operands[2], 1773 operands[3])); 1774 DONE; 1775 } 1776 1777 /* Operands 2 and 3 must be placed in consecutive registers, and passed 1778 as a combined value. */ 1779 rtx src_cmp = gen_reg_rtx (<x2>mode); 1780 emit_move_insn (gen_rtx_SUBREG (<MODE>mode, src_cmp, 0), operands[3]); 1781 emit_move_insn (gen_rtx_SUBREG (<MODE>mode, src_cmp, <size>), operands[2]); 1782 emit_insn (gen_sync_compare_and_swap<mode>_insn (operands[0], 1783 operands[1], 1784 src_cmp)); 1785 DONE; 1786 }) 1787 1788(define_insn "sync_compare_and_swap<mode>_insn" 1789 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v") 1790 (match_operand:SIDI 1 "memory_operand" "+RS,RF,RM")) 1791 (set (match_dup 1) 1792 (unspec_volatile:SIDI 1793 [(match_operand:<x2> 2 "register_operand" " Sm, v, v")] 1794 UNSPECV_ATOMIC))] 1795 "" 1796 "@ 1797 s_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0) 1798 flat_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\t0 1799 global_atomic_cmpswap<X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)" 1800 [(set_attr "type" "smem,flat,flat") 1801 (set_attr "length" "12") 1802 (set_attr "gcn_version" "gcn5,*,gcn5") 1803 (set_attr "delayeduse" "*,yes,yes")]) 1804 1805(define_insn "sync_compare_and_swap<mode>_lds_insn" 1806 [(set (match_operand:SIDI 0 "register_operand" "= v") 1807 (unspec_volatile:SIDI 1808 [(match_operand:SIDI 1 "memory_operand" "+RL")] 1809 UNSPECV_ATOMIC)) 1810 (set (match_dup 1) 1811 (unspec_volatile:SIDI 1812 [(match_operand:SIDI 2 "register_operand" " v") 1813 (match_operand:SIDI 3 "register_operand" " v")] 1814 UNSPECV_ATOMIC))] 1815 "" 1816 "ds_cmpst_rtn_b<bitsize> %0, %1, %2, %3\;s_waitcnt\tlgkmcnt(0)" 1817 [(set_attr "type" "ds") 1818 (set_attr "length" "12")]) 1819 1820(define_insn "atomic_load<mode>" 1821 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v") 1822 (unspec_volatile:SIDI 1823 [(match_operand:SIDI 1 "memory_operand" " RS,RF,RM")] 1824 UNSPECV_ATOMIC)) 1825 (use (match_operand:SIDI 2 "immediate_operand" " i, i, i"))] 1826 "" 1827 { 1828 switch (INTVAL (operands[2])) 1829 { 1830 case MEMMODEL_RELAXED: 1831 switch (which_alternative) 1832 { 1833 case 0: 1834 return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)"; 1835 case 1: 1836 return "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0"; 1837 case 2: 1838 return "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)"; 1839 } 1840 break; 1841 case MEMMODEL_CONSUME: 1842 case MEMMODEL_ACQUIRE: 1843 case MEMMODEL_SYNC_ACQUIRE: 1844 switch (which_alternative) 1845 { 1846 case 0: 1847 return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)\;" 1848 "s_dcache_wb_vol"; 1849 case 1: 1850 return "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;" 1851 "buffer_wbinvl1_vol"; 1852 case 2: 1853 return "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;" 1854 "buffer_wbinvl1_vol"; 1855 } 1856 break; 1857 case MEMMODEL_ACQ_REL: 1858 case MEMMODEL_SEQ_CST: 1859 case MEMMODEL_SYNC_SEQ_CST: 1860 switch (which_alternative) 1861 { 1862 case 0: 1863 return "s_dcache_wb_vol\;s_load%o0\t%0, %A1 glc\;" 1864 "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol"; 1865 case 1: 1866 return "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 glc\;" 1867 "s_waitcnt\t0\;buffer_wbinvl1_vol"; 1868 case 2: 1869 return "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 glc\;" 1870 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"; 1871 } 1872 break; 1873 } 1874 gcc_unreachable (); 1875 } 1876 [(set_attr "type" "smem,flat,flat") 1877 (set_attr "length" "20") 1878 (set_attr "gcn_version" "gcn5,*,gcn5")]) 1879 1880(define_insn "atomic_store<mode>" 1881 [(set (match_operand:SIDI 0 "memory_operand" "=RS,RF,RM") 1882 (unspec_volatile:SIDI 1883 [(match_operand:SIDI 1 "register_operand" " Sm, v, v")] 1884 UNSPECV_ATOMIC)) 1885 (use (match_operand:SIDI 2 "immediate_operand" " i, i, i"))] 1886 "" 1887 { 1888 switch (INTVAL (operands[2])) 1889 { 1890 case MEMMODEL_RELAXED: 1891 switch (which_alternative) 1892 { 1893 case 0: 1894 return "s_store%o1\t%1, %A0 glc\;s_waitcnt\tlgkmcnt(0)"; 1895 case 1: 1896 return "flat_store%o1\t%A0, %1%O0 glc\;s_waitcnt\t0"; 1897 case 2: 1898 return "global_store%o1\t%A0, %1%O0 glc\;s_waitcnt\tvmcnt(0)"; 1899 } 1900 break; 1901 case MEMMODEL_RELEASE: 1902 case MEMMODEL_SYNC_RELEASE: 1903 switch (which_alternative) 1904 { 1905 case 0: 1906 return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc"; 1907 case 1: 1908 return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc"; 1909 case 2: 1910 return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc"; 1911 } 1912 break; 1913 case MEMMODEL_ACQ_REL: 1914 case MEMMODEL_SEQ_CST: 1915 case MEMMODEL_SYNC_SEQ_CST: 1916 switch (which_alternative) 1917 { 1918 case 0: 1919 return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc\;" 1920 "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol"; 1921 case 1: 1922 return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc\;" 1923 "s_waitcnt\t0\;buffer_wbinvl1_vol"; 1924 case 2: 1925 return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc\;" 1926 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"; 1927 } 1928 break; 1929 } 1930 gcc_unreachable (); 1931 } 1932 [(set_attr "type" "smem,flat,flat") 1933 (set_attr "length" "20") 1934 (set_attr "gcn_version" "gcn5,*,gcn5")]) 1935 1936(define_insn "atomic_exchange<mode>" 1937 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v") 1938 (match_operand:SIDI 1 "memory_operand" "+RS,RF,RM")) 1939 (set (match_dup 1) 1940 (unspec_volatile:SIDI 1941 [(match_operand:SIDI 2 "register_operand" " Sm, v, v")] 1942 UNSPECV_ATOMIC)) 1943 (use (match_operand 3 "immediate_operand"))] 1944 "" 1945 { 1946 switch (INTVAL (operands[3])) 1947 { 1948 case MEMMODEL_RELAXED: 1949 switch (which_alternative) 1950 { 1951 case 0: 1952 return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)"; 1953 case 1: 1954 return "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0"; 1955 case 2: 1956 return "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;" 1957 "s_waitcnt\tvmcnt(0)"; 1958 } 1959 break; 1960 case MEMMODEL_CONSUME: 1961 case MEMMODEL_ACQUIRE: 1962 case MEMMODEL_SYNC_ACQUIRE: 1963 switch (which_alternative) 1964 { 1965 case 0: 1966 return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)\;" 1967 "s_dcache_wb_vol\;s_dcache_inv_vol"; 1968 case 1: 1969 return "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;" 1970 "buffer_wbinvl1_vol"; 1971 case 2: 1972 return "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;" 1973 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"; 1974 } 1975 break; 1976 case MEMMODEL_RELEASE: 1977 case MEMMODEL_SYNC_RELEASE: 1978 switch (which_alternative) 1979 { 1980 case 0: 1981 return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;" 1982 "s_waitcnt\tlgkmcnt(0)"; 1983 case 1: 1984 return "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;" 1985 "s_waitcnt\t0"; 1986 case 2: 1987 return "buffer_wbinvl1_vol\;" 1988 "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;" 1989 "s_waitcnt\tvmcnt(0)"; 1990 } 1991 break; 1992 case MEMMODEL_ACQ_REL: 1993 case MEMMODEL_SEQ_CST: 1994 case MEMMODEL_SYNC_SEQ_CST: 1995 switch (which_alternative) 1996 { 1997 case 0: 1998 return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;" 1999 "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol"; 2000 case 1: 2001 return "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;" 2002 "s_waitcnt\t0\;buffer_wbinvl1_vol"; 2003 case 2: 2004 return "buffer_wbinvl1_vol\;" 2005 "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;" 2006 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"; 2007 } 2008 break; 2009 } 2010 gcc_unreachable (); 2011 } 2012 [(set_attr "type" "smem,flat,flat") 2013 (set_attr "length" "20") 2014 (set_attr "gcn_version" "gcn5,*,gcn5")]) 2015 2016;; }}} 2017;; {{{ OpenACC / OpenMP 2018 2019(define_expand "oacc_dim_size" 2020 [(match_operand:SI 0 "register_operand") 2021 (match_operand:SI 1 "const_int_operand")] 2022 "" 2023 { 2024 rtx tmp = gcn_oacc_dim_size (INTVAL (operands[1])); 2025 emit_move_insn (operands[0], gen_lowpart (SImode, tmp)); 2026 DONE; 2027 }) 2028 2029(define_expand "oacc_dim_pos" 2030 [(match_operand:SI 0 "register_operand") 2031 (match_operand:SI 1 "const_int_operand")] 2032 "" 2033 { 2034 emit_move_insn (operands[0], gcn_oacc_dim_pos (INTVAL (operands[1]))); 2035 DONE; 2036 }) 2037 2038(define_expand "gcn_wavefront_barrier" 2039 [(set (match_dup 0) 2040 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_BARRIER))] 2041 "" 2042 { 2043 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 2044 MEM_VOLATILE_P (operands[0]) = 1; 2045 }) 2046 2047(define_insn "*gcn_wavefront_barrier" 2048 [(set (match_operand:BLK 0 "") 2049 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_BARRIER))] 2050 "" 2051 "s_barrier" 2052 [(set_attr "type" "sopp")]) 2053 2054(define_expand "oacc_fork" 2055 [(set (match_operand:SI 0 "") 2056 (match_operand:SI 1 "")) 2057 (use (match_operand:SI 2 ""))] 2058 "" 2059 { 2060 /* We need to have oacc_fork/oacc_join named patterns as a pair, 2061 but the fork isn't actually used. */ 2062 gcc_unreachable (); 2063 }) 2064 2065(define_expand "oacc_join" 2066 [(set (match_operand:SI 0 "") 2067 (match_operand:SI 1 "")) 2068 (use (match_operand:SI 2 ""))] 2069 "" 2070 { 2071 emit_insn (gen_gcn_wavefront_barrier ()); 2072 DONE; 2073 }) 2074 2075;; }}} 2076 2077(include "gcn-valu.md") 2078