1; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck --check-prefixes=GCN,GCN-DENORM %s 2; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck --check-prefixes=GCN,GCN-FLUSH %s 3 4; GCN-LABEL: {{^}}div_1_by_x_25ulp: 5; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 6; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 7; GCN-DAG: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9:]+}}], 0x0{{$}} 8; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |[[VAL]]|, [[L]] 9; GCN-DENORM-DAG: v_cndmask_b32_e32 [[SCALE:v[0-9]+]], 1.0, [[S]], vcc 10; GCN-DENORM: v_mul_f32_e32 [[PRESCALED:v[0-9]+]], [[VAL]], [[SCALE]] 11; GCN-DENORM: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[PRESCALED]] 12; GCN-DENORM: v_mul_f32_e32 [[OUT:v[0-9]+]], [[SCALE]], [[RCP]] 13 14; GCN-FLUSH: v_rcp_f32_e32 [[OUT:v[0-9]+]], [[VAL]] 15 16; GCN: global_store_dword v[{{[0-9:]+}}], [[OUT]], off 17define amdgpu_kernel void @div_1_by_x_25ulp(float addrspace(1)* %arg) { 18 %load = load float, float addrspace(1)* %arg, align 4 19 %div = fdiv float 1.000000e+00, %load, !fpmath !0 20 store float %div, float addrspace(1)* %arg, align 4 21 ret void 22} 23 24; GCN-LABEL: {{^}}div_minus_1_by_x_25ulp: 25; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 26; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 27; GCN-DAG: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9:]+}}], 0x0{{$}} 28; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |[[VAL]]|, [[L]] 29; GCN-DENORM-DAG: v_cndmask_b32_e32 [[SCALE:v[0-9]+]], 1.0, [[S]], vcc 30; GCN-DENORM: v_mul_f32_e64 [[PRESCALED:v[0-9]+]], [[VAL]], -[[SCALE]] 31; GCN-DENORM: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[PRESCALED]] 32; GCN-DENORM: v_mul_f32_e32 [[OUT:v[0-9]+]], [[SCALE]], [[RCP]] 33 34; GCN-FLUSH: v_rcp_f32_e64 [[OUT:v[0-9]+]], -[[VAL]] 35 36; GCN: global_store_dword v[{{[0-9:]+}}], [[OUT]], off 37define amdgpu_kernel void @div_minus_1_by_x_25ulp(float addrspace(1)* %arg) { 38 %load = load float, float addrspace(1)* %arg, align 4 39 %div = fdiv float -1.000000e+00, %load, !fpmath !0 40 store float %div, float addrspace(1)* %arg, align 4 41 ret void 42} 43 44; GCN-LABEL: {{^}}div_1_by_minus_x_25ulp: 45; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 46; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 47; GCN-DAG: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9:]+}}], 0x0{{$}} 48; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |[[VAL]]|, [[L]] 49; GCN-DENORM-DAG: v_cndmask_b32_e32 [[SCALE:v[0-9]+]], 1.0, [[S]], vcc 50; GCN-DENORM: v_mul_f32_e64 [[PRESCALED:v[0-9]+]], -[[VAL]], [[SCALE]] 51; GCN-DENORM: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[PRESCALED]] 52; GCN-DENORM: v_mul_f32_e32 [[OUT:v[0-9]+]], [[SCALE]], [[RCP]] 53 54; GCN-FLUSH: v_rcp_f32_e64 [[OUT:v[0-9]+]], -[[VAL]] 55 56; GCN: global_store_dword v[{{[0-9:]+}}], [[OUT]], off 57define amdgpu_kernel void @div_1_by_minus_x_25ulp(float addrspace(1)* %arg) { 58 %load = load float, float addrspace(1)* %arg, align 4 59 %neg = fsub float -0.000000e+00, %load 60 %div = fdiv float 1.000000e+00, %neg, !fpmath !0 61 store float %div, float addrspace(1)* %arg, align 4 62 ret void 63} 64 65; GCN-LABEL: {{^}}div_minus_1_by_minus_x_25ulp: 66; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 67; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 68; GCN-DAG: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9:]+}}], 0x0{{$}} 69; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |[[VAL]]|, [[L]] 70; GCN-DENORM-DAG: v_cndmask_b32_e32 [[SCALE:v[0-9]+]], 1.0, [[S]], vcc 71; GCN-DENORM: v_mul_f32_e32 [[PRESCALED:v[0-9]+]], [[VAL]], [[SCALE]] 72; GCN-DENORM: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[PRESCALED]] 73; GCN-DENORM: v_mul_f32_e32 [[OUT:v[0-9]+]], [[SCALE]], [[RCP]] 74 75; GCN-FLUSH: v_rcp_f32_e32 [[OUT:v[0-9]+]], [[VAL]] 76 77; GCN: global_store_dword v[{{[0-9:]+}}], [[OUT]], off 78define amdgpu_kernel void @div_minus_1_by_minus_x_25ulp(float addrspace(1)* %arg) { 79 %load = load float, float addrspace(1)* %arg, align 4 80 %neg = fsub float -0.000000e+00, %load 81 %div = fdiv float -1.000000e+00, %neg, !fpmath !0 82 store float %div, float addrspace(1)* %arg, align 4 83 ret void 84} 85 86; GCN-LABEL: {{^}}div_v4_1_by_x_25ulp: 87; GCN-DAG: s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}} 88; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 89; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 90; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 91; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 92; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 93; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 94; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 95; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 96; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 97; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 98; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 99; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 100; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 101; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 102; GCN-DENORM-DAG: v_rcp_f32_e32 103; GCN-DENORM-DAG: v_rcp_f32_e32 104; GCN-DENORM-DAG: v_rcp_f32_e32 105; GCN-DENORM-DAG: v_rcp_f32_e32 106; GCN-DENORM-DAG: v_mul_f32_e32 107; GCN-DENORM-DAG: v_mul_f32_e32 108; GCN-DENORM-DAG: v_mul_f32_e32 109; GCN-DENORM-DAG: v_mul_f32_e32 110 111; GCN-FLUSH: v_rcp_f32_e32 v[[OUT0:[0-9]+]], s[[VAL0]] 112; GCN-FLUSH: v_rcp_f32_e32 113; GCN-FLUSH: v_rcp_f32_e32 114; GCN-FLUSH: v_rcp_f32_e32 v[[OUT3:[0-9]+]], s[[VAL3]] 115; GCN-FLUSH: global_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[OUT0]]:[[OUT3]]], off 116define amdgpu_kernel void @div_v4_1_by_x_25ulp(<4 x float> addrspace(1)* %arg) { 117 %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16 118 %div = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %load, !fpmath !0 119 store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16 120 ret void 121} 122 123; GCN-LABEL: {{^}}div_v4_minus_1_by_x_25ulp: 124; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 125; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 126; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 127; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 128; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 129; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 130; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 131; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 132; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 133; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 134; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}} 135; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}} 136; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}} 137; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}} 138; GCN-DENORM-DAG: v_rcp_f32_e32 139; GCN-DENORM-DAG: v_rcp_f32_e32 140; GCN-DENORM-DAG: v_rcp_f32_e32 141; GCN-DENORM-DAG: v_rcp_f32_e32 142; GCN-DENORM-DAG: v_mul_f32_e32 143; GCN-DENORM-DAG: v_mul_f32_e32 144; GCN-DENORM-DAG: v_mul_f32_e32 145; GCN-DENORM-DAG: v_mul_f32_e32 146 147; GCN-FLUSH: v_rcp_f32_e64 v[[OUT0:[0-9]+]], -s[[VAL0]] 148; GCN-FLUSH: v_rcp_f32_e64 149; GCN-FLUSH: v_rcp_f32_e64 150; GCN-FLUSH: v_rcp_f32_e64 v[[OUT3:[0-9]+]], -s[[VAL3]] 151define amdgpu_kernel void @div_v4_minus_1_by_x_25ulp(<4 x float> addrspace(1)* %arg) { 152 %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16 153 %div = fdiv <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, %load, !fpmath !0 154 store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16 155 ret void 156} 157 158; GCN-LABEL: {{^}}div_v4_1_by_minus_x_25ulp: 159; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 160; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 161; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 162; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 163; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 164; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 165; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 166; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 167; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 168; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 169; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}} 170; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}} 171; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}} 172; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}} 173; GCN-DENORM-DAG: v_rcp_f32_e32 174; GCN-DENORM-DAG: v_rcp_f32_e32 175; GCN-DENORM-DAG: v_rcp_f32_e32 176; GCN-DENORM-DAG: v_rcp_f32_e32 177; GCN-DENORM-DAG: v_mul_f32_e32 178; GCN-DENORM-DAG: v_mul_f32_e32 179; GCN-DENORM-DAG: v_mul_f32_e32 180; GCN-DENORM-DAG: v_mul_f32_e32 181 182; GCN-FLUSH: v_rcp_f32_e64 v[[OUT0:[0-9]+]], -s[[VAL0]] 183; GCN-FLUSH: v_rcp_f32_e64 184; GCN-FLUSH: v_rcp_f32_e64 185; GCN-FLUSH: v_rcp_f32_e64 v[[OUT3:[0-9]+]], -s[[VAL3]] 186; GCN-FLUSH: global_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[OUT0]]:[[OUT3]]], off 187define amdgpu_kernel void @div_v4_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) { 188 %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16 189 %neg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %load 190 %div = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %neg, !fpmath !0 191 store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16 192 ret void 193} 194 195; GCN-LABEL: {{^}}div_v4_minus_1_by_minus_x_25ulp: 196; GCN-DAG: s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}} 197; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 198; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 199; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 200; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 201; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 202; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 203; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 204; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 205; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 206; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 207; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 208; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 209; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 210; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 211; GCN-DENORM-DAG: v_rcp_f32_e32 212; GCN-DENORM-DAG: v_rcp_f32_e32 213; GCN-DENORM-DAG: v_rcp_f32_e32 214; GCN-DENORM-DAG: v_rcp_f32_e32 215; GCN-DENORM-DAG: v_mul_f32_e32 216; GCN-DENORM-DAG: v_mul_f32_e32 217; GCN-DENORM-DAG: v_mul_f32_e32 218; GCN-DENORM-DAG: v_mul_f32_e32 219 220; GCN-FLUSH: v_rcp_f32_e32 v[[OUT0:[0-9]+]], s[[VAL0]] 221; GCN-FLUSH: v_rcp_f32_e32 222; GCN-FLUSH: v_rcp_f32_e32 223; GCN-FLUSH: v_rcp_f32_e32 v[[OUT3:[0-9]+]], s[[VAL3]] 224; GCN-FLUSH: global_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[OUT0]]:[[OUT3]]], off 225define amdgpu_kernel void @div_v4_minus_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) { 226 %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16 227 %neg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %load 228 %div = fdiv <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, %neg, !fpmath !0 229 store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16 230 ret void 231} 232 233; GCN-LABEL: {{^}}div_v4_c_by_x_25ulp: 234; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}} 235; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}} 236; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}} 237; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}} 238; GCN-DENORM-DAG: v_rcp_f32_e32 239; GCN-DENORM-DAG: v_rcp_f32_e32 240 241; GCN-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 242; GCN-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 243 244; GCN-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 245; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 246; GCN-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 247; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 248 249; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 250; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}} 251; GCN-DENORM-DAG: v_rcp_f32_e32 [[RCP1:v[0-9]+]], v{{[0-9]+}} 252; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[RCP1]] 253; GCN-DENORM-DAG: v_rcp_f32_e32 [[RCP2:v[0-9]+]], v{{[0-9]+}} 254; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[RCP2]] 255 256; GCN-DENORM-DAG: v_div_fmas_f32 257; GCN-DENORM-DAG: v_div_fmas_f32 258; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, 2.0{{$}} 259; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, -2.0{{$}} 260 261; GCN-FLUSH-DAG: v_rcp_f32_e32 262; GCN-FLUSH-DAG: v_rcp_f32_e64 263 264; GCN-NOT: v_cmp_gt_f32_e64 265; GCN-NOT: v_cndmask_b32_e32 266; GCN-FLUSH-NOT: v_div 267 268; GCN: global_store_dwordx4 269define amdgpu_kernel void @div_v4_c_by_x_25ulp(<4 x float> addrspace(1)* %arg) { 270 %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16 271 %div = fdiv <4 x float> <float 2.000000e+00, float 1.000000e+00, float -1.000000e+00, float -2.000000e+00>, %load, !fpmath !0 272 store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16 273 ret void 274} 275 276; GCN-LABEL: {{^}}div_v4_c_by_minus_x_25ulp: 277; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}} 278; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}} 279; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}} 280; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}} 281; GCN-DENORM-DAG: v_rcp_f32_e32 282; GCN-DENORM-DAG: v_rcp_f32_e32 283 284; GCN-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 285; GCN-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 286 287; GCN-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 288; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 289; GCN-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 290; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 291 292; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 293; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}} 294; GCN-DENORM-DAG: v_rcp_f32_e32 [[RCP1:v[0-9]+]], v{{[0-9]+}} 295; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[RCP1]] 296; GCN-DENORM-DAG: v_rcp_f32_e32 [[RCP2:v[0-9]+]], v{{[0-9]+}} 297; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[RCP2]] 298 299; GCN-DENORM-DAG: v_div_fmas_f32 300; GCN-DENORM-DAG: v_div_fmas_f32 301; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, -2.0{{$}} 302; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, -2.0{{$}} 303 304; GCN-FLUSH-DAG: v_rcp_f32_e32 305; GCN-FLUSH-DAG: v_rcp_f32_e64 306 307; GCN-NOT: v_cmp_gt_f32_e64 308; GCN-NOT: v_cndmask_b32_e32 309; GCN-FLUSH-NOT: v_div 310 311; GCN: global_store_dwordx4 312define amdgpu_kernel void @div_v4_c_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) { 313 %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16 314 %neg = fneg <4 x float> %load 315 %div = fdiv <4 x float> <float 2.000000e+00, float 1.000000e+00, float -1.000000e+00, float -2.000000e+00>, %neg, !fpmath !0 316 store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16 317 ret void 318} 319 320; GCN-LABEL: {{^}}div_v_by_x_25ulp: 321; GCN-DAG: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9:]+}}], 0x0{{$}} 322 323; GCN-DENORM-DAG: v_div_scale_f32 324; GCN-DENORM-DAG: v_rcp_f32_e32 325; GCN-DENORM-DAG: v_div_scale_f32 326; GCN-DENORM: v_div_fmas_f32 327; GCN-DENORM: v_div_fixup_f32 [[OUT:v[0-9]+]], 328 329; GCN-FLUSH-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 330; GCN-FLUSH-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 331; GCN-FLUSH-DAG: v_cmp_gt_f32_e64 vcc, |[[VAL]]|, [[L]] 332; GCN-FLUSH-DAG: v_cndmask_b32_e32 [[SCALE:v[0-9]+]], 1.0, [[S]], vcc 333; GCN-FLUSH: v_mul_f32_e32 [[PRESCALED:v[0-9]+]], [[VAL]], [[SCALE]] 334; GCN-FLUSH: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[PRESCALED]] 335; GCN-FLUSH: v_mul_f32_e32 [[OUT:v[0-9]+]], [[SCALE]], [[RCP]] 336 337; GCN: global_store_dword v[{{[0-9:]+}}], [[OUT]], off 338define amdgpu_kernel void @div_v_by_x_25ulp(float addrspace(1)* %arg, float %num) { 339 %load = load float, float addrspace(1)* %arg, align 4 340 %div = fdiv float %num, %load, !fpmath !0 341 store float %div, float addrspace(1)* %arg, align 4 342 ret void 343} 344 345; GCN-LABEL: {{^}}div_1_by_x_fast: 346; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 347; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[VAL]] 348; GCN: global_store_dword v[{{[0-9:]+}}], [[RCP]], off 349define amdgpu_kernel void @div_1_by_x_fast(float addrspace(1)* %arg) { 350 %load = load float, float addrspace(1)* %arg, align 4 351 %div = fdiv fast float 1.000000e+00, %load, !fpmath !0 352 store float %div, float addrspace(1)* %arg, align 4 353 ret void 354} 355 356; GCN-LABEL: {{^}}div_minus_1_by_x_fast: 357; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 358; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[VAL]] 359; GCN: global_store_dword v[{{[0-9:]+}}], [[RCP]], off 360define amdgpu_kernel void @div_minus_1_by_x_fast(float addrspace(1)* %arg) { 361 %load = load float, float addrspace(1)* %arg, align 4 362 %div = fdiv fast float -1.000000e+00, %load, !fpmath !0 363 store float %div, float addrspace(1)* %arg, align 4 364 ret void 365} 366 367; GCN-LABEL: {{^}}div_1_by_minus_x_fast: 368; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 369; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[VAL]] 370; GCN: global_store_dword v[{{[0-9:]+}}], [[RCP]], off 371define amdgpu_kernel void @div_1_by_minus_x_fast(float addrspace(1)* %arg) { 372 %load = load float, float addrspace(1)* %arg, align 4 373 %neg = fsub float -0.000000e+00, %load, !fpmath !0 374 %div = fdiv fast float 1.000000e+00, %neg 375 store float %div, float addrspace(1)* %arg, align 4 376 ret void 377} 378 379; GCN-LABEL: {{^}}div_minus_1_by_minus_x_fast: 380; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 381; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[VAL]] 382; GCN: global_store_dword v[{{[0-9:]+}}], [[RCP]], off 383define amdgpu_kernel void @div_minus_1_by_minus_x_fast(float addrspace(1)* %arg) { 384 %load = load float, float addrspace(1)* %arg, align 4 385 %neg = fsub float -0.000000e+00, %load, !fpmath !0 386 %div = fdiv fast float -1.000000e+00, %neg 387 store float %div, float addrspace(1)* %arg, align 4 388 ret void 389} 390 391; GCN-LABEL: {{^}}div_1_by_x_correctly_rounded: 392; GCN-DAG: v_div_scale_f32 393; GCN-DAG: v_rcp_f32_e32 394; GCN-DAG: v_div_scale_f32 395; GCN: v_div_fmas_f32 396; GCN: v_div_fixup_f32 397define amdgpu_kernel void @div_1_by_x_correctly_rounded(float addrspace(1)* %arg) { 398 %load = load float, float addrspace(1)* %arg, align 4 399 %div = fdiv float 1.000000e+00, %load 400 store float %div, float addrspace(1)* %arg, align 4 401 ret void 402} 403 404; GCN-LABEL: {{^}}div_minus_1_by_x_correctly_rounded: 405; GCN-DAG: v_div_scale_f32 406; GCN-DAG: v_rcp_f32_e32 407; GCN-DAG: v_div_scale_f32 408; GCN: v_div_fmas_f32 409; GCN: v_div_fixup_f32 410define amdgpu_kernel void @div_minus_1_by_x_correctly_rounded(float addrspace(1)* %arg) { 411 %load = load float, float addrspace(1)* %arg, align 4 412 %div = fdiv float -1.000000e+00, %load 413 store float %div, float addrspace(1)* %arg, align 4 414 ret void 415} 416 417; GCN-LABEL: {{^}}div_1_by_minus_x_correctly_rounded: 418; GCN-DAG: v_div_scale_f32 419; GCN-DAG: v_rcp_f32_e32 420; GCN-DAG: v_div_scale_f32 421; GCN: v_div_fmas_f32 422; GCN: v_div_fixup_f32 423define amdgpu_kernel void @div_1_by_minus_x_correctly_rounded(float addrspace(1)* %arg) { 424 %load = load float, float addrspace(1)* %arg, align 4 425 %neg = fsub float -0.000000e+00, %load 426 %div = fdiv float 1.000000e+00, %neg 427 store float %div, float addrspace(1)* %arg, align 4 428 ret void 429} 430 431; GCN-LABEL: {{^}}div_minus_1_by_minus_x_correctly_rounded: 432; GCN-DAG: v_div_scale_f32 433; GCN-DAG: v_rcp_f32_e32 434; GCN-DAG: v_div_scale_f32 435; GCN: v_div_fmas_f32 436; GCN: v_div_fixup_f32 437define amdgpu_kernel void @div_minus_1_by_minus_x_correctly_rounded(float addrspace(1)* %arg) { 438 %load = load float, float addrspace(1)* %arg, align 4 439 %neg = fsub float -0.000000e+00, %load 440 %div = fdiv float -1.000000e+00, %neg 441 store float %div, float addrspace(1)* %arg, align 4 442 ret void 443} 444 445!0 = !{float 2.500000e+00} 446