1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,PREGFX10,FUNC %s
2; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,PREGFX10,FUNC %s
3; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,PREGFX10,FUNC %s
4; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,FUNC %s
5; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
6
7; These tests check that fdiv is expanded correctly and also test that the
8; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate
9; instruction groups.
10
11; These test check that fdiv using unsafe_fp_math, coarse fp div, and IEEE754 fp div.
12
13; FUNC-LABEL: {{^}}fdiv_f32:
14; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
15; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
16
17; GCN: v_div_scale_f32 [[NUM_SCALE:v[0-9]+]]
18; GCN-DAG: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]]
19; GCN-DAG: v_rcp_f32_e32 [[NUM_RCP:v[0-9]+]], [[NUM_SCALE]]
20
21; PREGFX10: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
22; GFX10: s_denorm_mode 15
23; GCN: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
24; GCN: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]]
25; GCN: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]]
26; GCN: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]]
27; GCN: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]]
28; GCN: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]]
29; PREGFX10: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
30; GFX10: s_denorm_mode 12
31; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]]
32; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
33define amdgpu_kernel void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) #0 {
34entry:
35  %fdiv = fdiv ninf float %a, %b
36  store float %fdiv, float addrspace(1)* %out
37  ret void
38}
39
40; FUNC-LABEL: {{^}}fdiv_f32_denormals:
41; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
42; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
43
44; GCN: v_div_scale_f32 [[NUM_SCALE:v[0-9]+]]
45; GCN-DAG: v_rcp_f32_e32 [[NUM_RCP:v[0-9]+]], [[NUM_SCALE]]
46
47; PREGFX10-DAG: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]]
48; PREGFX10-NOT: s_setreg
49; PREGFX10: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
50; PREGFX10: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]]
51; PREGFX10: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]]
52; PREGFX10: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]]
53; PREGFX10: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]]
54; PREGFX10: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]]
55; PREGFX10-NOT: s_setreg
56
57; GFX10-NOT: s_denorm_mode
58; GFX10: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
59; GFX10: v_fmac_f32_e32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]]
60; GFX10: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]]
61; GFX10: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]]
62; GFX10: v_fma_f32 [[D:v[0-9]+]], [[C]], -[[NUM_SCALE]], [[DEN_SCALE]]
63; GFX10: v_fmac_f32_e32 [[E:v[0-9]+]], [[D]], [[B]]
64; GFX10: v_fmac_f32_e64 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]]
65; GFX10-NOT: s_denorm_mode
66
67; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]]
68; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
69define amdgpu_kernel void @fdiv_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 {
70entry:
71  %fdiv = fdiv float %a, %b
72  store float %fdiv, float addrspace(1)* %out
73  ret void
74}
75
76; FUNC-LABEL: {{^}}fdiv_25ulp_f32:
77; GCN: v_cndmask_b32
78; GCN: v_mul_f32
79; GCN: v_rcp_f32
80; GCN: v_mul_f32
81; GCN: v_mul_f32
82define amdgpu_kernel void @fdiv_25ulp_f32(float addrspace(1)* %out, float %a, float %b) #0 {
83entry:
84  %fdiv = fdiv float %a, %b, !fpmath !0
85  store float %fdiv, float addrspace(1)* %out
86  ret void
87}
88
89; Use correct fdiv
90; FUNC-LABEL: {{^}}fdiv_25ulp_denormals_f32:
91; GCN: v_fma_f32
92; GCN: v_div_fmas_f32
93; GCN: v_div_fixup_f32
94define amdgpu_kernel void @fdiv_25ulp_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 {
95entry:
96  %fdiv = fdiv float %a, %b, !fpmath !0
97  store float %fdiv, float addrspace(1)* %out
98  ret void
99}
100
101; FUNC-LABEL: {{^}}fdiv_fast_denormals_f32:
102; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
103; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
104; GCN-NOT: [[RESULT]]
105; PREGFX10-NOT: s_setreg
106; GFX10-NOT: s_denorm_mode
107; GCN: buffer_store_dword [[RESULT]]
108define amdgpu_kernel void @fdiv_fast_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 {
109entry:
110  %fdiv = fdiv fast float %a, %b
111  store float %fdiv, float addrspace(1)* %out
112  ret void
113}
114
115; FUNC-LABEL: {{^}}fdiv_f32_fast_math:
116; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
117; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, PS, KC0[2].Z,
118
119; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
120; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
121; GCN-NOT: [[RESULT]]
122; GCN: buffer_store_dword [[RESULT]]
123define amdgpu_kernel void @fdiv_f32_fast_math(float addrspace(1)* %out, float %a, float %b) #0 {
124entry:
125  %fdiv = fdiv fast float %a, %b
126  store float %fdiv, float addrspace(1)* %out
127  ret void
128}
129
130; FUNC-LABEL: {{^}}fdiv_ulp25_f32_fast_math:
131; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
132; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, PS, KC0[2].Z,
133
134; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
135; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
136; GCN-NOT: [[RESULT]]
137; GCN: buffer_store_dword [[RESULT]]
138define amdgpu_kernel void @fdiv_ulp25_f32_fast_math(float addrspace(1)* %out, float %a, float %b) #0 {
139entry:
140  %fdiv = fdiv fast float %a, %b, !fpmath !0
141  store float %fdiv, float addrspace(1)* %out
142  ret void
143}
144
145; FUNC-LABEL: {{^}}fdiv_f32_arcp_math:
146; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
147; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, PS, KC0[2].Z,
148
149; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
150; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
151; GCN-NOT: [[RESULT]]
152; GCN: buffer_store_dword [[RESULT]]
153define amdgpu_kernel void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) #0 {
154entry:
155  %fdiv = fdiv arcp ninf float %a, %b
156  store float %fdiv, float addrspace(1)* %out
157  ret void
158}
159
160; FUNC-LABEL: {{^}}fdiv_v2f32:
161; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
162; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
163; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
164; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
165
166; GCN: v_div_scale_f32
167; GCN: v_div_scale_f32
168; GCN: v_div_scale_f32
169; GCN: v_div_scale_f32
170define amdgpu_kernel void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
171entry:
172  %fdiv = fdiv <2 x float> %a, %b
173  store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
174  ret void
175}
176
177; FUNC-LABEL: {{^}}fdiv_ulp25_v2f32:
178; GCN: v_rcp_f32
179; GCN: v_rcp_f32
180; GCN-NOT: v_cmp_gt_f32
181define amdgpu_kernel void @fdiv_ulp25_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
182entry:
183  %fdiv = fdiv arcp <2 x float> %a, %b, !fpmath !0
184  store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
185  ret void
186}
187
188; FUNC-LABEL: {{^}}fdiv_v2f32_fast_math:
189; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
190; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
191; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, PS, KC0[3].X,
192; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, PS, KC0[2].W,
193
194; GCN: v_rcp_f32
195; GCN: v_rcp_f32
196define amdgpu_kernel void @fdiv_v2f32_fast_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
197entry:
198  %fdiv = fdiv fast <2 x float> %a, %b
199  store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
200  ret void
201}
202
203; FUNC-LABEL: {{^}}fdiv_v2f32_arcp_math:
204; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
205; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
206; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, PS, KC0[3].X,
207; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, PS, KC0[2].W,
208
209; GCN: v_rcp_f32
210; GCN: v_rcp_f32
211define amdgpu_kernel void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
212entry:
213  %fdiv = fdiv arcp ninf <2 x float> %a, %b
214  store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
215  ret void
216}
217
218; FUNC-LABEL: {{^}}fdiv_v4f32:
219; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
220; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
221; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
222; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
223; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
224; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
225; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
226; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
227
228; GCN: v_div_fixup_f32
229; GCN: v_div_fixup_f32
230; GCN: v_div_fixup_f32
231; GCN: v_div_fixup_f32
232define amdgpu_kernel void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
233  %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
234  %a = load <4 x float>, <4 x float> addrspace(1) * %in
235  %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
236  %result = fdiv <4 x float> %a, %b
237  store <4 x float> %result, <4 x float> addrspace(1)* %out
238  ret void
239}
240
241; FUNC-LABEL: {{^}}fdiv_v4f32_fast_math:
242; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
243; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
244; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
245; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
246; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], PS, T[0-9]+\.[XYZW]}},
247; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], PS, T[0-9]+\.[XYZW]}},
248; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], PS, T[0-9]+\.[XYZW]}},
249; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], PS, T[0-9]+\.[XYZW]}},
250
251; GCN: v_rcp_f32
252; GCN: v_rcp_f32
253; GCN: v_rcp_f32
254; GCN: v_rcp_f32
255define amdgpu_kernel void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
256  %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
257  %a = load <4 x float>, <4 x float> addrspace(1) * %in
258  %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
259  %result = fdiv fast <4 x float> %a, %b
260  store <4 x float> %result, <4 x float> addrspace(1)* %out
261  ret void
262}
263
264; FUNC-LABEL: {{^}}fdiv_v4f32_arcp_math:
265; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
266; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
267; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
268; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
269; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], PS, T[0-9]+\.[XYZW]}},
270; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], PS, T[0-9]+\.[XYZW]}},
271; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], PS, T[0-9]+\.[XYZW]}},
272; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], PS, T[0-9]+\.[XYZW]}},
273
274; GCN: v_rcp_f32
275; GCN: v_rcp_f32
276; GCN: v_rcp_f32
277; GCN: v_rcp_f32
278define amdgpu_kernel void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
279  %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
280  %a = load <4 x float>, <4 x float> addrspace(1) * %in
281  %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
282  %result = fdiv arcp ninf <4 x float> %a, %b
283  store <4 x float> %result, <4 x float> addrspace(1)* %out
284  ret void
285}
286
287; FUNC-LABEL: {{^}}fdiv_f32_correctly_rounded_divide_sqrt:
288
289; GCN: v_div_scale_f32 [[NUM_SCALE:v[0-9]+]]
290; GCN-DAG: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]]
291; GCN-DAG: v_rcp_f32_e32 [[NUM_RCP:v[0-9]+]], [[NUM_SCALE]]
292
293; PREGFX10: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
294; GFX10: s_denorm_mode 15
295; GCN: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
296; GCN: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]]
297; GCN: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]]
298; GCN: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]]
299; GCN: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]]
300; GCN: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]]
301; PREGFX10: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
302; GFX10: s_denorm_mode 12
303; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]]
304; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
305
306define amdgpu_kernel void @fdiv_f32_correctly_rounded_divide_sqrt(float addrspace(1)* %out, float %a) #0 {
307entry:
308  %fdiv = fdiv float 1.000000e+00, %a
309  store float %fdiv, float addrspace(1)* %out
310  ret void
311}
312
313
314; FUNC-LABEL: {{^}}fdiv_f32_denorms_correctly_rounded_divide_sqrt:
315
316; GCN: v_div_scale_f32 [[NUM_SCALE:v[0-9]+]]
317; GCN-DAG: v_rcp_f32_e32 [[NUM_RCP:v[0-9]+]], [[NUM_SCALE]]
318
319; PREGFX10-DAG: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]]
320; PREGFX10-NOT: s_setreg
321; PREGFX10: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
322; PREGFX10: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]]
323; PREGFX10: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]]
324; PREGFX10: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]]
325; PREGFX10: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]]
326; PREGFX10: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]]
327; PREGFX10-NOT: s_setreg
328
329; GFX10-NOT: s_denorm_mode
330; GFX10: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
331; GFX10: v_fmac_f32_e32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]]
332; GFX10: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]]
333; GFX10: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]]
334; GFX10: v_fma_f32 [[D:v[0-9]+]], [[C]], -[[NUM_SCALE]], [[DEN_SCALE]]
335; GFX10: v_fmac_f32_e32 [[E:v[0-9]+]], [[D]], [[B]]
336; GFX10: v_fmac_f32_e64 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]]
337; GFX10-NOT: s_denorm_mode
338
339; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]]
340; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
341define amdgpu_kernel void @fdiv_f32_denorms_correctly_rounded_divide_sqrt(float addrspace(1)* %out, float %a) #2 {
342entry:
343  %fdiv = fdiv float 1.000000e+00, %a
344  store float %fdiv, float addrspace(1)* %out
345  ret void
346}
347
348attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="-flat-for-global" }
349attributes #1 = { nounwind "enable-unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="-flat-for-global" }
350attributes #2 = { nounwind "enable-unsafe-fp-math"="false" "denormal-fp-math-f32"="ieee,ieee" "target-features"="-flat-for-global" }
351
352!0 = !{float 2.500000e+00}
353