1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-32BANK %s
3; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8-32BANK %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8-16BANK %s
5
6define amdgpu_ps half @interp_f16(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
7; GFX9-32BANK-LABEL: interp_f16:
8; GFX9-32BANK:       ; %bb.0: ; %main_body
9; GFX9-32BANK-NEXT:    s_mov_b32 m0, s2
10; GFX9-32BANK-NEXT:    v_mov_b32_e32 v0, s0
11; GFX9-32BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
12; GFX9-32BANK-NEXT:    v_interp_p1ll_f16 v1, v0, attr2.y
13; GFX9-32BANK-NEXT:    v_mov_b32_e32 v2, s1
14; GFX9-32BANK-NEXT:    v_interp_p1ll_f16 v0, v0, attr2.y high
15; GFX9-32BANK-NEXT:    v_interp_p2_legacy_f16 v1, v2, attr2.y, v1
16; GFX9-32BANK-NEXT:    v_interp_p2_legacy_f16 v0, v2, attr2.y, v0 high
17; GFX9-32BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
18; GFX9-32BANK-NEXT:    v_add_f16_e32 v0, v1, v0
19; GFX9-32BANK-NEXT:    ; return to shader part epilog
20;
21; GFX8-32BANK-LABEL: interp_f16:
22; GFX8-32BANK:       ; %bb.0: ; %main_body
23; GFX8-32BANK-NEXT:    s_mov_b32 m0, s2
24; GFX8-32BANK-NEXT:    v_mov_b32_e32 v0, s0
25; GFX8-32BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
26; GFX8-32BANK-NEXT:    v_interp_p1ll_f16 v1, v0, attr2.y
27; GFX8-32BANK-NEXT:    v_mov_b32_e32 v2, s1
28; GFX8-32BANK-NEXT:    v_interp_p1ll_f16 v0, v0, attr2.y high
29; GFX8-32BANK-NEXT:    v_interp_p2_f16 v1, v2, attr2.y, v1
30; GFX8-32BANK-NEXT:    v_interp_p2_f16 v0, v2, attr2.y, v0 high
31; GFX8-32BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
32; GFX8-32BANK-NEXT:    v_add_f16_e32 v0, v1, v0
33; GFX8-32BANK-NEXT:    ; return to shader part epilog
34;
35; GFX8-16BANK-LABEL: interp_f16:
36; GFX8-16BANK:       ; %bb.0: ; %main_body
37; GFX8-16BANK-NEXT:    s_mov_b32 m0, s2
38; GFX8-16BANK-NEXT:    v_interp_mov_f32_e32 v0, p0, attr2.y
39; GFX8-16BANK-NEXT:    v_mov_b32_e32 v1, s0
40; GFX8-16BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
41; GFX8-16BANK-NEXT:    v_interp_p1lv_f16 v2, v1, attr2.y, v0
42; GFX8-16BANK-NEXT:    v_mov_b32_e32 v3, s1
43; GFX8-16BANK-NEXT:    v_interp_p1lv_f16 v0, v1, attr2.y, v0 high
44; GFX8-16BANK-NEXT:    v_interp_p2_f16 v2, v3, attr2.y, v2
45; GFX8-16BANK-NEXT:    v_interp_p2_f16 v0, v3, attr2.y, v0 high
46; GFX8-16BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
47; GFX8-16BANK-NEXT:    v_add_f16_e32 v0, v2, v0
48; GFX8-16BANK-NEXT:    ; return to shader part epilog
49main_body:
50  %p1_0 = call float @llvm.amdgcn.interp.p1.f16(float %i, i32 1, i32 2, i1 0, i32 %m0)
51  %p2_0 = call half @llvm.amdgcn.interp.p2.f16(float %p1_0, float %j, i32 1, i32 2, i1 0, i32 %m0)
52  %p1_1 = call float @llvm.amdgcn.interp.p1.f16(float %i, i32 1, i32 2, i1 1, i32 %m0)
53  %p2_1 = call half @llvm.amdgcn.interp.p2.f16(float %p1_1, float %j, i32 1, i32 2, i1 1, i32 %m0)
54  %res = fadd half %p2_0, %p2_1
55  ret half %res
56}
57
58; check that m0 is setup correctly before the interp p1 instruction
59define amdgpu_ps half @interp_p1_m0_setup(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
60; GFX9-32BANK-LABEL: interp_p1_m0_setup:
61; GFX9-32BANK:       ; %bb.0: ; %main_body
62; GFX9-32BANK-NEXT:    ;;#ASMSTART
63; GFX9-32BANK-NEXT:    s_mov_b32 m0, 0
64; GFX9-32BANK-NEXT:    ;;#ASMEND
65; GFX9-32BANK-NEXT:    s_mov_b32 s3, m0
66; GFX9-32BANK-NEXT:    v_mov_b32_e32 v0, s0
67; GFX9-32BANK-NEXT:    s_mov_b32 m0, s2
68; GFX9-32BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
69; GFX9-32BANK-NEXT:    v_interp_p1ll_f16 v0, v0, attr2.y
70; GFX9-32BANK-NEXT:    v_mov_b32_e32 v1, s1
71; GFX9-32BANK-NEXT:    v_interp_p2_legacy_f16 v0, v1, attr2.y, v0
72; GFX9-32BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
73; GFX9-32BANK-NEXT:    v_add_f16_e32 v0, s3, v0
74; GFX9-32BANK-NEXT:    ; return to shader part epilog
75;
76; GFX8-32BANK-LABEL: interp_p1_m0_setup:
77; GFX8-32BANK:       ; %bb.0: ; %main_body
78; GFX8-32BANK-NEXT:    ;;#ASMSTART
79; GFX8-32BANK-NEXT:    s_mov_b32 m0, 0
80; GFX8-32BANK-NEXT:    ;;#ASMEND
81; GFX8-32BANK-NEXT:    s_mov_b32 s3, m0
82; GFX8-32BANK-NEXT:    v_mov_b32_e32 v0, s0
83; GFX8-32BANK-NEXT:    s_mov_b32 m0, s2
84; GFX8-32BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
85; GFX8-32BANK-NEXT:    v_interp_p1ll_f16 v0, v0, attr2.y
86; GFX8-32BANK-NEXT:    v_mov_b32_e32 v1, s1
87; GFX8-32BANK-NEXT:    v_interp_p2_f16 v0, v1, attr2.y, v0
88; GFX8-32BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
89; GFX8-32BANK-NEXT:    v_add_f16_e32 v0, s3, v0
90; GFX8-32BANK-NEXT:    ; return to shader part epilog
91;
92; GFX8-16BANK-LABEL: interp_p1_m0_setup:
93; GFX8-16BANK:       ; %bb.0: ; %main_body
94; GFX8-16BANK-NEXT:    ;;#ASMSTART
95; GFX8-16BANK-NEXT:    s_mov_b32 m0, 0
96; GFX8-16BANK-NEXT:    ;;#ASMEND
97; GFX8-16BANK-NEXT:    s_mov_b32 s3, m0
98; GFX8-16BANK-NEXT:    s_mov_b32 m0, s2
99; GFX8-16BANK-NEXT:    v_interp_mov_f32_e32 v0, p0, attr2.y
100; GFX8-16BANK-NEXT:    v_mov_b32_e32 v1, s0
101; GFX8-16BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
102; GFX8-16BANK-NEXT:    v_interp_p1lv_f16 v0, v1, attr2.y, v0
103; GFX8-16BANK-NEXT:    v_mov_b32_e32 v1, s1
104; GFX8-16BANK-NEXT:    v_interp_p2_f16 v0, v1, attr2.y, v0
105; GFX8-16BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
106; GFX8-16BANK-NEXT:    v_add_f16_e32 v0, s3, v0
107; GFX8-16BANK-NEXT:    ; return to shader part epilog
108main_body:
109  %mx = call i32 asm sideeffect "s_mov_b32 m0, 0", "={m0}"() #0
110  %p1_0 = call float @llvm.amdgcn.interp.p1.f16(float %i, i32 1, i32 2, i1 0, i32 %m0)
111  %p2_0 = call half @llvm.amdgcn.interp.p2.f16(float %p1_0, float %j, i32 1, i32 2, i1 0, i32 %m0)
112  %my = trunc i32 %mx to i16
113  %mh = bitcast i16 %my to half
114  %res = fadd half %p2_0, %mh
115  ret half %res
116}
117
118; check that m0 is setup correctly before the interp p2 instruction
119define amdgpu_ps half @interp_p2_m0_setup(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
120; GFX9-32BANK-LABEL: interp_p2_m0_setup:
121; GFX9-32BANK:       ; %bb.0: ; %main_body
122; GFX9-32BANK-NEXT:    s_mov_b32 m0, s2
123; GFX9-32BANK-NEXT:    v_mov_b32_e32 v0, s0
124; GFX9-32BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
125; GFX9-32BANK-NEXT:    v_interp_p1ll_f16 v0, v0, attr2.y
126; GFX9-32BANK-NEXT:    ;;#ASMSTART
127; GFX9-32BANK-NEXT:    s_mov_b32 m0, 0
128; GFX9-32BANK-NEXT:    ;;#ASMEND
129; GFX9-32BANK-NEXT:    s_mov_b32 s0, m0
130; GFX9-32BANK-NEXT:    v_mov_b32_e32 v1, s1
131; GFX9-32BANK-NEXT:    s_mov_b32 m0, s2
132; GFX9-32BANK-NEXT:    v_interp_p2_legacy_f16 v0, v1, attr2.y, v0
133; GFX9-32BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
134; GFX9-32BANK-NEXT:    v_add_f16_e32 v0, s0, v0
135; GFX9-32BANK-NEXT:    ; return to shader part epilog
136;
137; GFX8-32BANK-LABEL: interp_p2_m0_setup:
138; GFX8-32BANK:       ; %bb.0: ; %main_body
139; GFX8-32BANK-NEXT:    s_mov_b32 m0, s2
140; GFX8-32BANK-NEXT:    v_mov_b32_e32 v0, s0
141; GFX8-32BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
142; GFX8-32BANK-NEXT:    v_interp_p1ll_f16 v0, v0, attr2.y
143; GFX8-32BANK-NEXT:    ;;#ASMSTART
144; GFX8-32BANK-NEXT:    s_mov_b32 m0, 0
145; GFX8-32BANK-NEXT:    ;;#ASMEND
146; GFX8-32BANK-NEXT:    s_mov_b32 s0, m0
147; GFX8-32BANK-NEXT:    v_mov_b32_e32 v1, s1
148; GFX8-32BANK-NEXT:    s_mov_b32 m0, s2
149; GFX8-32BANK-NEXT:    v_interp_p2_f16 v0, v1, attr2.y, v0
150; GFX8-32BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
151; GFX8-32BANK-NEXT:    v_add_f16_e32 v0, s0, v0
152; GFX8-32BANK-NEXT:    ; return to shader part epilog
153;
154; GFX8-16BANK-LABEL: interp_p2_m0_setup:
155; GFX8-16BANK:       ; %bb.0: ; %main_body
156; GFX8-16BANK-NEXT:    s_mov_b32 m0, s2
157; GFX8-16BANK-NEXT:    v_interp_mov_f32_e32 v0, p0, attr2.y
158; GFX8-16BANK-NEXT:    v_mov_b32_e32 v1, s0
159; GFX8-16BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
160; GFX8-16BANK-NEXT:    v_interp_p1lv_f16 v0, v1, attr2.y, v0
161; GFX8-16BANK-NEXT:    ;;#ASMSTART
162; GFX8-16BANK-NEXT:    s_mov_b32 m0, 0
163; GFX8-16BANK-NEXT:    ;;#ASMEND
164; GFX8-16BANK-NEXT:    s_mov_b32 s0, m0
165; GFX8-16BANK-NEXT:    v_mov_b32_e32 v1, s1
166; GFX8-16BANK-NEXT:    s_mov_b32 m0, s2
167; GFX8-16BANK-NEXT:    v_interp_p2_f16 v0, v1, attr2.y, v0
168; GFX8-16BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
169; GFX8-16BANK-NEXT:    v_add_f16_e32 v0, s0, v0
170; GFX8-16BANK-NEXT:    ; return to shader part epilog
171main_body:
172  %p1_0 = call float @llvm.amdgcn.interp.p1.f16(float %i, i32 1, i32 2, i1 0, i32 %m0)
173  %mx = call i32 asm sideeffect "s_mov_b32 m0, 0", "={m0}"() #0
174  %p2_0 = call half @llvm.amdgcn.interp.p2.f16(float %p1_0, float %j, i32 1, i32 2, i1 0, i32 %m0)
175  %my = trunc i32 %mx to i16
176  %mh = bitcast i16 %my to half
177  %res = fadd half %p2_0, %mh
178  ret half %res
179}
180
181; float @llvm.amdgcn.interp.p1.f16(i, attrchan, attr, high, m0)
182declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32) #0
183; half @llvm.amdgcn.interp.p1.f16(p1, j, attrchan, attr, high, m0)
184declare half @llvm.amdgcn.interp.p2.f16(float, float, i32, i32, i1, i32) #0
185declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #0
186
187attributes #0 = { nounwind readnone }
188