1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-32BANK %s 3; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8-32BANK %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8-16BANK %s 5 6define amdgpu_ps half @interp_f16(float inreg %i, float inreg %j, i32 inreg %m0) #0 { 7; GFX9-32BANK-LABEL: interp_f16: 8; GFX9-32BANK: ; %bb.0: ; %main_body 9; GFX9-32BANK-NEXT: s_mov_b32 m0, s2 10; GFX9-32BANK-NEXT: v_mov_b32_e32 v0, s0 11; GFX9-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 12; GFX9-32BANK-NEXT: v_interp_p1ll_f16 v1, v0, attr2.y 13; GFX9-32BANK-NEXT: v_mov_b32_e32 v2, s1 14; GFX9-32BANK-NEXT: v_interp_p1ll_f16 v0, v0, attr2.y high 15; GFX9-32BANK-NEXT: v_interp_p2_legacy_f16 v1, v2, attr2.y, v1 16; GFX9-32BANK-NEXT: v_interp_p2_legacy_f16 v0, v2, attr2.y, v0 high 17; GFX9-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 18; GFX9-32BANK-NEXT: v_add_f16_e32 v0, v1, v0 19; GFX9-32BANK-NEXT: ; return to shader part epilog 20; 21; GFX8-32BANK-LABEL: interp_f16: 22; GFX8-32BANK: ; %bb.0: ; %main_body 23; GFX8-32BANK-NEXT: s_mov_b32 m0, s2 24; GFX8-32BANK-NEXT: v_mov_b32_e32 v0, s0 25; GFX8-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 26; GFX8-32BANK-NEXT: v_interp_p1ll_f16 v1, v0, attr2.y 27; GFX8-32BANK-NEXT: v_mov_b32_e32 v2, s1 28; GFX8-32BANK-NEXT: v_interp_p1ll_f16 v0, v0, attr2.y high 29; GFX8-32BANK-NEXT: v_interp_p2_f16 v1, v2, attr2.y, v1 30; GFX8-32BANK-NEXT: v_interp_p2_f16 v0, v2, attr2.y, v0 high 31; GFX8-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 32; GFX8-32BANK-NEXT: v_add_f16_e32 v0, v1, v0 33; GFX8-32BANK-NEXT: ; return to shader part epilog 34; 35; GFX8-16BANK-LABEL: interp_f16: 36; GFX8-16BANK: ; %bb.0: ; %main_body 37; GFX8-16BANK-NEXT: s_mov_b32 m0, s2 38; GFX8-16BANK-NEXT: v_interp_mov_f32_e32 v0, p0, attr2.y 39; GFX8-16BANK-NEXT: v_mov_b32_e32 v1, s0 40; GFX8-16BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 41; GFX8-16BANK-NEXT: v_interp_p1lv_f16 v2, v1, attr2.y, v0 42; GFX8-16BANK-NEXT: v_mov_b32_e32 v3, s1 43; GFX8-16BANK-NEXT: v_interp_p1lv_f16 v0, v1, attr2.y, v0 high 44; GFX8-16BANK-NEXT: v_interp_p2_f16 v2, v3, attr2.y, v2 45; GFX8-16BANK-NEXT: v_interp_p2_f16 v0, v3, attr2.y, v0 high 46; GFX8-16BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 47; GFX8-16BANK-NEXT: v_add_f16_e32 v0, v2, v0 48; GFX8-16BANK-NEXT: ; return to shader part epilog 49main_body: 50 %p1_0 = call float @llvm.amdgcn.interp.p1.f16(float %i, i32 1, i32 2, i1 0, i32 %m0) 51 %p2_0 = call half @llvm.amdgcn.interp.p2.f16(float %p1_0, float %j, i32 1, i32 2, i1 0, i32 %m0) 52 %p1_1 = call float @llvm.amdgcn.interp.p1.f16(float %i, i32 1, i32 2, i1 1, i32 %m0) 53 %p2_1 = call half @llvm.amdgcn.interp.p2.f16(float %p1_1, float %j, i32 1, i32 2, i1 1, i32 %m0) 54 %res = fadd half %p2_0, %p2_1 55 ret half %res 56} 57 58; check that m0 is setup correctly before the interp p1 instruction 59define amdgpu_ps half @interp_p1_m0_setup(float inreg %i, float inreg %j, i32 inreg %m0) #0 { 60; GFX9-32BANK-LABEL: interp_p1_m0_setup: 61; GFX9-32BANK: ; %bb.0: ; %main_body 62; GFX9-32BANK-NEXT: ;;#ASMSTART 63; GFX9-32BANK-NEXT: s_mov_b32 m0, 0 64; GFX9-32BANK-NEXT: ;;#ASMEND 65; GFX9-32BANK-NEXT: s_mov_b32 s3, m0 66; GFX9-32BANK-NEXT: v_mov_b32_e32 v0, s0 67; GFX9-32BANK-NEXT: s_mov_b32 m0, s2 68; GFX9-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 69; GFX9-32BANK-NEXT: v_interp_p1ll_f16 v0, v0, attr2.y 70; GFX9-32BANK-NEXT: v_mov_b32_e32 v1, s1 71; GFX9-32BANK-NEXT: v_interp_p2_legacy_f16 v0, v1, attr2.y, v0 72; GFX9-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 73; GFX9-32BANK-NEXT: v_add_f16_e32 v0, s3, v0 74; GFX9-32BANK-NEXT: ; return to shader part epilog 75; 76; GFX8-32BANK-LABEL: interp_p1_m0_setup: 77; GFX8-32BANK: ; %bb.0: ; %main_body 78; GFX8-32BANK-NEXT: ;;#ASMSTART 79; GFX8-32BANK-NEXT: s_mov_b32 m0, 0 80; GFX8-32BANK-NEXT: ;;#ASMEND 81; GFX8-32BANK-NEXT: s_mov_b32 s3, m0 82; GFX8-32BANK-NEXT: v_mov_b32_e32 v0, s0 83; GFX8-32BANK-NEXT: s_mov_b32 m0, s2 84; GFX8-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 85; GFX8-32BANK-NEXT: v_interp_p1ll_f16 v0, v0, attr2.y 86; GFX8-32BANK-NEXT: v_mov_b32_e32 v1, s1 87; GFX8-32BANK-NEXT: v_interp_p2_f16 v0, v1, attr2.y, v0 88; GFX8-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 89; GFX8-32BANK-NEXT: v_add_f16_e32 v0, s3, v0 90; GFX8-32BANK-NEXT: ; return to shader part epilog 91; 92; GFX8-16BANK-LABEL: interp_p1_m0_setup: 93; GFX8-16BANK: ; %bb.0: ; %main_body 94; GFX8-16BANK-NEXT: ;;#ASMSTART 95; GFX8-16BANK-NEXT: s_mov_b32 m0, 0 96; GFX8-16BANK-NEXT: ;;#ASMEND 97; GFX8-16BANK-NEXT: s_mov_b32 s3, m0 98; GFX8-16BANK-NEXT: s_mov_b32 m0, s2 99; GFX8-16BANK-NEXT: v_interp_mov_f32_e32 v0, p0, attr2.y 100; GFX8-16BANK-NEXT: v_mov_b32_e32 v1, s0 101; GFX8-16BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 102; GFX8-16BANK-NEXT: v_interp_p1lv_f16 v0, v1, attr2.y, v0 103; GFX8-16BANK-NEXT: v_mov_b32_e32 v1, s1 104; GFX8-16BANK-NEXT: v_interp_p2_f16 v0, v1, attr2.y, v0 105; GFX8-16BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 106; GFX8-16BANK-NEXT: v_add_f16_e32 v0, s3, v0 107; GFX8-16BANK-NEXT: ; return to shader part epilog 108main_body: 109 %mx = call i32 asm sideeffect "s_mov_b32 m0, 0", "={m0}"() #0 110 %p1_0 = call float @llvm.amdgcn.interp.p1.f16(float %i, i32 1, i32 2, i1 0, i32 %m0) 111 %p2_0 = call half @llvm.amdgcn.interp.p2.f16(float %p1_0, float %j, i32 1, i32 2, i1 0, i32 %m0) 112 %my = trunc i32 %mx to i16 113 %mh = bitcast i16 %my to half 114 %res = fadd half %p2_0, %mh 115 ret half %res 116} 117 118; check that m0 is setup correctly before the interp p2 instruction 119define amdgpu_ps half @interp_p2_m0_setup(float inreg %i, float inreg %j, i32 inreg %m0) #0 { 120; GFX9-32BANK-LABEL: interp_p2_m0_setup: 121; GFX9-32BANK: ; %bb.0: ; %main_body 122; GFX9-32BANK-NEXT: s_mov_b32 m0, s2 123; GFX9-32BANK-NEXT: v_mov_b32_e32 v0, s0 124; GFX9-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 125; GFX9-32BANK-NEXT: v_interp_p1ll_f16 v0, v0, attr2.y 126; GFX9-32BANK-NEXT: ;;#ASMSTART 127; GFX9-32BANK-NEXT: s_mov_b32 m0, 0 128; GFX9-32BANK-NEXT: ;;#ASMEND 129; GFX9-32BANK-NEXT: s_mov_b32 s0, m0 130; GFX9-32BANK-NEXT: v_mov_b32_e32 v1, s1 131; GFX9-32BANK-NEXT: s_mov_b32 m0, s2 132; GFX9-32BANK-NEXT: v_interp_p2_legacy_f16 v0, v1, attr2.y, v0 133; GFX9-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 134; GFX9-32BANK-NEXT: v_add_f16_e32 v0, s0, v0 135; GFX9-32BANK-NEXT: ; return to shader part epilog 136; 137; GFX8-32BANK-LABEL: interp_p2_m0_setup: 138; GFX8-32BANK: ; %bb.0: ; %main_body 139; GFX8-32BANK-NEXT: s_mov_b32 m0, s2 140; GFX8-32BANK-NEXT: v_mov_b32_e32 v0, s0 141; GFX8-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 142; GFX8-32BANK-NEXT: v_interp_p1ll_f16 v0, v0, attr2.y 143; GFX8-32BANK-NEXT: ;;#ASMSTART 144; GFX8-32BANK-NEXT: s_mov_b32 m0, 0 145; GFX8-32BANK-NEXT: ;;#ASMEND 146; GFX8-32BANK-NEXT: s_mov_b32 s0, m0 147; GFX8-32BANK-NEXT: v_mov_b32_e32 v1, s1 148; GFX8-32BANK-NEXT: s_mov_b32 m0, s2 149; GFX8-32BANK-NEXT: v_interp_p2_f16 v0, v1, attr2.y, v0 150; GFX8-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 151; GFX8-32BANK-NEXT: v_add_f16_e32 v0, s0, v0 152; GFX8-32BANK-NEXT: ; return to shader part epilog 153; 154; GFX8-16BANK-LABEL: interp_p2_m0_setup: 155; GFX8-16BANK: ; %bb.0: ; %main_body 156; GFX8-16BANK-NEXT: s_mov_b32 m0, s2 157; GFX8-16BANK-NEXT: v_interp_mov_f32_e32 v0, p0, attr2.y 158; GFX8-16BANK-NEXT: v_mov_b32_e32 v1, s0 159; GFX8-16BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 160; GFX8-16BANK-NEXT: v_interp_p1lv_f16 v0, v1, attr2.y, v0 161; GFX8-16BANK-NEXT: ;;#ASMSTART 162; GFX8-16BANK-NEXT: s_mov_b32 m0, 0 163; GFX8-16BANK-NEXT: ;;#ASMEND 164; GFX8-16BANK-NEXT: s_mov_b32 s0, m0 165; GFX8-16BANK-NEXT: v_mov_b32_e32 v1, s1 166; GFX8-16BANK-NEXT: s_mov_b32 m0, s2 167; GFX8-16BANK-NEXT: v_interp_p2_f16 v0, v1, attr2.y, v0 168; GFX8-16BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 169; GFX8-16BANK-NEXT: v_add_f16_e32 v0, s0, v0 170; GFX8-16BANK-NEXT: ; return to shader part epilog 171main_body: 172 %p1_0 = call float @llvm.amdgcn.interp.p1.f16(float %i, i32 1, i32 2, i1 0, i32 %m0) 173 %mx = call i32 asm sideeffect "s_mov_b32 m0, 0", "={m0}"() #0 174 %p2_0 = call half @llvm.amdgcn.interp.p2.f16(float %p1_0, float %j, i32 1, i32 2, i1 0, i32 %m0) 175 %my = trunc i32 %mx to i16 176 %mh = bitcast i16 %my to half 177 %res = fadd half %p2_0, %mh 178 ret half %res 179} 180 181; float @llvm.amdgcn.interp.p1.f16(i, attrchan, attr, high, m0) 182declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32) #0 183; half @llvm.amdgcn.interp.p1.f16(p1, j, attrchan, attr, high, m0) 184declare half @llvm.amdgcn.interp.p2.f16(float, float, i32, i32, i1, i32) #0 185declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #0 186 187attributes #0 = { nounwind readnone } 188