1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,PREGFX10,FUNC %s 2; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,PREGFX10,FUNC %s 3; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,PREGFX10,FUNC %s 4; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,FUNC %s 5; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s 6 7; These tests check that fdiv is expanded correctly and also test that the 8; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate 9; instruction groups. 10 11; These test check that fdiv using unsafe_fp_math, coarse fp div, and IEEE754 fp div. 12 13; FUNC-LABEL: {{^}}fdiv_f32: 14; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W 15; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS 16 17; GCN: v_div_scale_f32 [[NUM_SCALE:v[0-9]+]] 18; GCN-DAG: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]] 19; GCN-DAG: v_rcp_f32_e32 [[NUM_RCP:v[0-9]+]], [[NUM_SCALE]] 20 21; PREGFX10: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 22; GFX10: s_denorm_mode 15 23; GCN: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0 24; GCN: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]] 25; GCN: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]] 26; GCN: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]] 27; GCN: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]] 28; GCN: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]] 29; PREGFX10: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 30; GFX10: s_denorm_mode 12 31; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]] 32; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]], 33define amdgpu_kernel void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) #0 { 34entry: 35 %fdiv = fdiv ninf float %a, %b 36 store float %fdiv, float addrspace(1)* %out 37 ret void 38} 39 40; FUNC-LABEL: {{^}}fdiv_f32_denormals: 41; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W 42; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS 43 44; GCN: v_div_scale_f32 [[NUM_SCALE:v[0-9]+]] 45; GCN-DAG: v_rcp_f32_e32 [[NUM_RCP:v[0-9]+]], [[NUM_SCALE]] 46 47; PREGFX10-DAG: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]] 48; PREGFX10-NOT: s_setreg 49; PREGFX10: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0 50; PREGFX10: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]] 51; PREGFX10: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]] 52; PREGFX10: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]] 53; PREGFX10: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]] 54; PREGFX10: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]] 55; PREGFX10-NOT: s_setreg 56 57; GFX10-NOT: s_denorm_mode 58; GFX10: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0 59; GFX10: v_fmac_f32_e32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]] 60; GFX10: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]] 61; GFX10: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]] 62; GFX10: v_fma_f32 [[D:v[0-9]+]], [[C]], -[[NUM_SCALE]], [[DEN_SCALE]] 63; GFX10: v_fmac_f32_e32 [[E:v[0-9]+]], [[D]], [[B]] 64; GFX10: v_fmac_f32_e64 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]] 65; GFX10-NOT: s_denorm_mode 66 67; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]] 68; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]], 69define amdgpu_kernel void @fdiv_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 { 70entry: 71 %fdiv = fdiv float %a, %b 72 store float %fdiv, float addrspace(1)* %out 73 ret void 74} 75 76; FUNC-LABEL: {{^}}fdiv_25ulp_f32: 77; GCN: v_cndmask_b32 78; GCN: v_mul_f32 79; GCN: v_rcp_f32 80; GCN: v_mul_f32 81; GCN: v_mul_f32 82define amdgpu_kernel void @fdiv_25ulp_f32(float addrspace(1)* %out, float %a, float %b) #0 { 83entry: 84 %fdiv = fdiv float %a, %b, !fpmath !0 85 store float %fdiv, float addrspace(1)* %out 86 ret void 87} 88 89; Use correct fdiv 90; FUNC-LABEL: {{^}}fdiv_25ulp_denormals_f32: 91; GCN: v_fma_f32 92; GCN: v_div_fmas_f32 93; GCN: v_div_fixup_f32 94define amdgpu_kernel void @fdiv_25ulp_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 { 95entry: 96 %fdiv = fdiv float %a, %b, !fpmath !0 97 store float %fdiv, float addrspace(1)* %out 98 ret void 99} 100 101; FUNC-LABEL: {{^}}fdiv_fast_denormals_f32: 102; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}} 103; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]] 104; GCN-NOT: [[RESULT]] 105; PREGFX10-NOT: s_setreg 106; GFX10-NOT: s_denorm_mode 107; GCN: buffer_store_dword [[RESULT]] 108define amdgpu_kernel void @fdiv_fast_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 { 109entry: 110 %fdiv = fdiv fast float %a, %b 111 store float %fdiv, float addrspace(1)* %out 112 ret void 113} 114 115; FUNC-LABEL: {{^}}fdiv_f32_fast_math: 116; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W 117; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, PS, KC0[2].Z, 118 119; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}} 120; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]] 121; GCN-NOT: [[RESULT]] 122; GCN: buffer_store_dword [[RESULT]] 123define amdgpu_kernel void @fdiv_f32_fast_math(float addrspace(1)* %out, float %a, float %b) #0 { 124entry: 125 %fdiv = fdiv fast float %a, %b 126 store float %fdiv, float addrspace(1)* %out 127 ret void 128} 129 130; FUNC-LABEL: {{^}}fdiv_ulp25_f32_fast_math: 131; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W 132; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, PS, KC0[2].Z, 133 134; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}} 135; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]] 136; GCN-NOT: [[RESULT]] 137; GCN: buffer_store_dword [[RESULT]] 138define amdgpu_kernel void @fdiv_ulp25_f32_fast_math(float addrspace(1)* %out, float %a, float %b) #0 { 139entry: 140 %fdiv = fdiv fast float %a, %b, !fpmath !0 141 store float %fdiv, float addrspace(1)* %out 142 ret void 143} 144 145; FUNC-LABEL: {{^}}fdiv_f32_arcp_math: 146; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W 147; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, PS, KC0[2].Z, 148 149; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}} 150; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]] 151; GCN-NOT: [[RESULT]] 152; GCN: buffer_store_dword [[RESULT]] 153define amdgpu_kernel void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) #0 { 154entry: 155 %fdiv = fdiv arcp ninf float %a, %b 156 store float %fdiv, float addrspace(1)* %out 157 ret void 158} 159 160; FUNC-LABEL: {{^}}fdiv_v2f32: 161; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z 162; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y 163; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS 164; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS 165 166; GCN: v_div_scale_f32 167; GCN: v_div_scale_f32 168; GCN: v_div_scale_f32 169; GCN: v_div_scale_f32 170define amdgpu_kernel void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { 171entry: 172 %fdiv = fdiv <2 x float> %a, %b 173 store <2 x float> %fdiv, <2 x float> addrspace(1)* %out 174 ret void 175} 176 177; FUNC-LABEL: {{^}}fdiv_ulp25_v2f32: 178; GCN: v_rcp_f32 179; GCN: v_rcp_f32 180; GCN-NOT: v_cmp_gt_f32 181define amdgpu_kernel void @fdiv_ulp25_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { 182entry: 183 %fdiv = fdiv arcp <2 x float> %a, %b, !fpmath !0 184 store <2 x float> %fdiv, <2 x float> addrspace(1)* %out 185 ret void 186} 187 188; FUNC-LABEL: {{^}}fdiv_v2f32_fast_math: 189; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z 190; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y 191; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, PS, KC0[3].X, 192; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, PS, KC0[2].W, 193 194; GCN: v_rcp_f32 195; GCN: v_rcp_f32 196define amdgpu_kernel void @fdiv_v2f32_fast_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { 197entry: 198 %fdiv = fdiv fast <2 x float> %a, %b 199 store <2 x float> %fdiv, <2 x float> addrspace(1)* %out 200 ret void 201} 202 203; FUNC-LABEL: {{^}}fdiv_v2f32_arcp_math: 204; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z 205; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y 206; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, PS, KC0[3].X, 207; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, PS, KC0[2].W, 208 209; GCN: v_rcp_f32 210; GCN: v_rcp_f32 211define amdgpu_kernel void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { 212entry: 213 %fdiv = fdiv arcp ninf <2 x float> %a, %b 214 store <2 x float> %fdiv, <2 x float> addrspace(1)* %out 215 ret void 216} 217 218; FUNC-LABEL: {{^}}fdiv_v4f32: 219; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 220; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 221; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 222; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 223; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 224; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 225; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 226; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 227 228; GCN: v_div_fixup_f32 229; GCN: v_div_fixup_f32 230; GCN: v_div_fixup_f32 231; GCN: v_div_fixup_f32 232define amdgpu_kernel void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 { 233 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 234 %a = load <4 x float>, <4 x float> addrspace(1) * %in 235 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr 236 %result = fdiv <4 x float> %a, %b 237 store <4 x float> %result, <4 x float> addrspace(1)* %out 238 ret void 239} 240 241; FUNC-LABEL: {{^}}fdiv_v4f32_fast_math: 242; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 243; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 244; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 245; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 246; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], PS, T[0-9]+\.[XYZW]}}, 247; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], PS, T[0-9]+\.[XYZW]}}, 248; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], PS, T[0-9]+\.[XYZW]}}, 249; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], PS, T[0-9]+\.[XYZW]}}, 250 251; GCN: v_rcp_f32 252; GCN: v_rcp_f32 253; GCN: v_rcp_f32 254; GCN: v_rcp_f32 255define amdgpu_kernel void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 { 256 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 257 %a = load <4 x float>, <4 x float> addrspace(1) * %in 258 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr 259 %result = fdiv fast <4 x float> %a, %b 260 store <4 x float> %result, <4 x float> addrspace(1)* %out 261 ret void 262} 263 264; FUNC-LABEL: {{^}}fdiv_v4f32_arcp_math: 265; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 266; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 267; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 268; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 269; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], PS, T[0-9]+\.[XYZW]}}, 270; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], PS, T[0-9]+\.[XYZW]}}, 271; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], PS, T[0-9]+\.[XYZW]}}, 272; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], PS, T[0-9]+\.[XYZW]}}, 273 274; GCN: v_rcp_f32 275; GCN: v_rcp_f32 276; GCN: v_rcp_f32 277; GCN: v_rcp_f32 278define amdgpu_kernel void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 { 279 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 280 %a = load <4 x float>, <4 x float> addrspace(1) * %in 281 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr 282 %result = fdiv arcp ninf <4 x float> %a, %b 283 store <4 x float> %result, <4 x float> addrspace(1)* %out 284 ret void 285} 286 287; FUNC-LABEL: {{^}}fdiv_f32_correctly_rounded_divide_sqrt: 288 289; GCN: v_div_scale_f32 [[NUM_SCALE:v[0-9]+]] 290; GCN-DAG: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]] 291; GCN-DAG: v_rcp_f32_e32 [[NUM_RCP:v[0-9]+]], [[NUM_SCALE]] 292 293; PREGFX10: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 294; GFX10: s_denorm_mode 15 295; GCN: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0 296; GCN: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]] 297; GCN: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]] 298; GCN: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]] 299; GCN: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]] 300; GCN: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]] 301; PREGFX10: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 302; GFX10: s_denorm_mode 12 303; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]] 304; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]], 305 306define amdgpu_kernel void @fdiv_f32_correctly_rounded_divide_sqrt(float addrspace(1)* %out, float %a) #0 { 307entry: 308 %fdiv = fdiv float 1.000000e+00, %a 309 store float %fdiv, float addrspace(1)* %out 310 ret void 311} 312 313 314; FUNC-LABEL: {{^}}fdiv_f32_denorms_correctly_rounded_divide_sqrt: 315 316; GCN: v_div_scale_f32 [[NUM_SCALE:v[0-9]+]] 317; GCN-DAG: v_rcp_f32_e32 [[NUM_RCP:v[0-9]+]], [[NUM_SCALE]] 318 319; PREGFX10-DAG: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]] 320; PREGFX10-NOT: s_setreg 321; PREGFX10: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0 322; PREGFX10: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]] 323; PREGFX10: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]] 324; PREGFX10: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]] 325; PREGFX10: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]] 326; PREGFX10: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]] 327; PREGFX10-NOT: s_setreg 328 329; GFX10-NOT: s_denorm_mode 330; GFX10: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0 331; GFX10: v_fmac_f32_e32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]] 332; GFX10: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]] 333; GFX10: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]] 334; GFX10: v_fma_f32 [[D:v[0-9]+]], [[C]], -[[NUM_SCALE]], [[DEN_SCALE]] 335; GFX10: v_fmac_f32_e32 [[E:v[0-9]+]], [[D]], [[B]] 336; GFX10: v_fmac_f32_e64 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]] 337; GFX10-NOT: s_denorm_mode 338 339; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]] 340; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]], 341define amdgpu_kernel void @fdiv_f32_denorms_correctly_rounded_divide_sqrt(float addrspace(1)* %out, float %a) #2 { 342entry: 343 %fdiv = fdiv float 1.000000e+00, %a 344 store float %fdiv, float addrspace(1)* %out 345 ret void 346} 347 348attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="-flat-for-global" } 349attributes #1 = { nounwind "enable-unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="-flat-for-global" } 350attributes #2 = { nounwind "enable-unsafe-fp-math"="false" "denormal-fp-math-f32"="ieee,ieee" "target-features"="-flat-for-global" } 351 352!0 = !{float 2.500000e+00} 353