1; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GCN-FLUSH %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals,+fp-exceptions < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-EXCEPT,VI,GCN-FLUSH %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-DENORM,GCN-DENORM %s 4; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLUSH,GCN-FLUSH %s 5 6; GCN-LABEL: {{^}}test_no_fold_canonicalize_loaded_value_f32: 7; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} 8; GFX9-DENORM: v_max_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 9define amdgpu_kernel void @test_no_fold_canonicalize_loaded_value_f32(float addrspace(1)* %arg) { 10 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 11 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 12 %v = load float, float addrspace(1)* %gep, align 4 13 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 14 store float %canonicalized, float addrspace(1)* %gep, align 4 15 ret void 16} 17 18; GCN-LABEL: {{^}}test_fold_canonicalize_fmul_value_f32: 19; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}} 20; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 21; GCN-NOT: 1.0 22define amdgpu_kernel void @test_fold_canonicalize_fmul_value_f32(float addrspace(1)* %arg) { 23 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 24 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 25 %load = load float, float addrspace(1)* %gep, align 4 26 %v = fmul float %load, 15.0 27 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 28 store float %canonicalized, float addrspace(1)* %gep, align 4 29 ret void 30} 31 32; GCN-LABEL: {{^}}test_fold_canonicalize_sub_value_f32: 33; GCN: v_sub_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}} 34; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 35; GCN-NOT: 1.0 36define amdgpu_kernel void @test_fold_canonicalize_sub_value_f32(float addrspace(1)* %arg) { 37 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 38 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 39 %load = load float, float addrspace(1)* %gep, align 4 40 %v = fsub float 15.0, %load 41 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 42 store float %canonicalized, float addrspace(1)* %gep, align 4 43 ret void 44} 45 46; GCN-LABEL: {{^}}test_fold_canonicalize_add_value_f32: 47; GCN: v_add_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}} 48; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 49; GCN-NOT: 1.0 50define amdgpu_kernel void @test_fold_canonicalize_add_value_f32(float addrspace(1)* %arg) { 51 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 52 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 53 %load = load float, float addrspace(1)* %gep, align 4 54 %v = fadd float %load, 15.0 55 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 56 store float %canonicalized, float addrspace(1)* %gep, align 4 57 ret void 58} 59 60; GCN-LABEL: {{^}}test_fold_canonicalize_sqrt_value_f32: 61; GCN: v_sqrt_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}} 62; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 63; GCN-NOT: 1.0 64define amdgpu_kernel void @test_fold_canonicalize_sqrt_value_f32(float addrspace(1)* %arg) { 65 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 66 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 67 %load = load float, float addrspace(1)* %gep, align 4 68 %v = call float @llvm.sqrt.f32(float %load) 69 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 70 store float %canonicalized, float addrspace(1)* %gep, align 4 71 ret void 72} 73 74; GCN-LABEL: test_fold_canonicalize_fceil_value_f32: 75; GCN: v_ceil_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}} 76; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 77; GCN-NOT: 1.0 78define amdgpu_kernel void @test_fold_canonicalize_fceil_value_f32(float addrspace(1)* %arg) { 79 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 80 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 81 %load = load float, float addrspace(1)* %gep, align 4 82 %v = call float @llvm.ceil.f32(float %load) 83 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 84 store float %canonicalized, float addrspace(1)* %gep, align 4 85 ret void 86} 87 88; GCN-LABEL: test_fold_canonicalize_floor_value_f32: 89; GCN: v_floor_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}} 90; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 91; GCN-NOT: 1.0 92define amdgpu_kernel void @test_fold_canonicalize_floor_value_f32(float addrspace(1)* %arg) { 93 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 94 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 95 %load = load float, float addrspace(1)* %gep, align 4 96 %v = call float @llvm.floor.f32(float %load) 97 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 98 store float %canonicalized, float addrspace(1)* %gep, align 4 99 ret void 100} 101 102; GCN-LABEL: test_fold_canonicalize_fma_value_f32: 103; GCN: v_fma_f32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 104; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 105; GCN-NOT: 1.0 106define amdgpu_kernel void @test_fold_canonicalize_fma_value_f32(float addrspace(1)* %arg) { 107 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 108 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 109 %load = load float, float addrspace(1)* %gep, align 4 110 %v = call float @llvm.fma.f32(float %load, float 15.0, float 15.0) 111 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 112 store float %canonicalized, float addrspace(1)* %gep, align 4 113 ret void 114} 115 116; GCN-LABEL: test_fold_canonicalize_fmuladd_value_f32: 117; GCN-FLUSH: v_mac_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} 118; GFX9-DENORM: v_fma_f32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 119; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 120; GCN-NOT: 1.0 121define amdgpu_kernel void @test_fold_canonicalize_fmuladd_value_f32(float addrspace(1)* %arg) { 122 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 123 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 124 %load = load float, float addrspace(1)* %gep, align 4 125 %v = call float @llvm.fmuladd.f32(float %load, float 15.0, float 15.0) 126 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 127 store float %canonicalized, float addrspace(1)* %gep, align 4 128 ret void 129} 130 131; GCN-LABEL: test_fold_canonicalize_canonicalize_value_f32: 132; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]], 133; GCN-FLUSH: v_mul_f32_e32 [[V:v[0-9]+]], 1.0, [[LOAD]] 134; GCN-DENORM: v_max_f32_e32 [[V:v[0-9]+]], [[LOAD]], [[LOAD]] 135; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 136; GCN-NOT: 1.0 137define amdgpu_kernel void @test_fold_canonicalize_canonicalize_value_f32(float addrspace(1)* %arg) { 138 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 139 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 140 %load = load float, float addrspace(1)* %gep, align 4 141 %v = call float @llvm.canonicalize.f32(float %load) 142 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 143 store float %canonicalized, float addrspace(1)* %gep, align 4 144 ret void 145} 146 147; GCN-LABEL: test_fold_canonicalize_fpextend_value_f64_f32: 148; GCN: v_cvt_f64_f32_e32 [[V:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} 149; GCN: {{flat|global}}_store_dwordx2 v[{{[0-9:]+}}], [[V]] 150; GCN-NOT: 1.0 151define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f64_f32(float addrspace(1)* %arg, double addrspace(1)* %out) { 152 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 153 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 154 %load = load float, float addrspace(1)* %gep, align 4 155 %v = fpext float %load to double 156 %canonicalized = tail call double @llvm.canonicalize.f64(double %v) 157 %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id 158 store double %canonicalized, double addrspace(1)* %gep2, align 8 159 ret void 160} 161 162; GCN-LABEL: test_fold_canonicalize_fpextend_value_f32_f16: 163; GCN: v_cvt_f32_f16_e32 [[V:v[0-9]+]], v{{[0-9]+}} 164; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 165; GCN-NOT: 1.0 166define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16(half addrspace(1)* %arg, float addrspace(1)* %out) { 167 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 168 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id 169 %load = load half, half addrspace(1)* %gep, align 2 170 %v = fpext half %load to float 171 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 172 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id 173 store float %canonicalized, float addrspace(1)* %gep2, align 4 174 ret void 175} 176 177; GCN-LABEL: test_fold_canonicalize_fpround_value_f32_f64: 178; GCN: v_cvt_f32_f64_e32 [[V:v[0-9]+]], v[{{[0-9:]+}}] 179; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 180; GCN-NOT: 1.0 181define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f32_f64(double addrspace(1)* %arg, float addrspace(1)* %out) { 182 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 183 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id 184 %load = load double, double addrspace(1)* %gep, align 8 185 %v = fptrunc double %load to float 186 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 187 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id 188 store float %canonicalized, float addrspace(1)* %gep2, align 4 189 ret void 190} 191 192; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32: 193; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}} 194; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]] 195; GCN-NOT: 1.0 196define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(float addrspace(1)* %arg, half addrspace(1)* %out) { 197 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 198 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 199 %load = load float, float addrspace(1)* %gep, align 4 200 %v = fptrunc float %load to half 201 %canonicalized = tail call half @llvm.canonicalize.f16(half %v) 202 %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id 203 store half %canonicalized, half addrspace(1)* %gep2, align 2 204 ret void 205} 206 207; GCN-LABEL: test_fold_canonicalize_fpround_value_v2f16_v2f32: 208; GCN-DAG: v_cvt_f16_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}} 209; VI-DAG: v_cvt_f16_f32_sdwa [[V1:v[0-9]+]], v{{[0-9]+}} 210; VI: v_or_b32_e32 [[V:v[0-9]+]], [[V0]], [[V1]] 211; GFX9: v_cvt_f16_f32_e32 [[V1:v[0-9]+]], v{{[0-9]+}} 212; GFX9: v_and_b32_e32 [[V0_16:v[0-9]+]], 0xffff, [[V0]] 213; GFX9: v_lshl_or_b32 [[V:v[0-9]+]], [[V1]], 16, [[V0_16]] 214; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 215; GCN-NOT: 1.0 216define amdgpu_kernel void @test_fold_canonicalize_fpround_value_v2f16_v2f32(<2 x float> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) { 217 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 218 %gep = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %arg, i32 %id 219 %load = load <2 x float>, <2 x float> addrspace(1)* %gep, align 8 220 %v = fptrunc <2 x float> %load to <2 x half> 221 %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v) 222 %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id 223 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 4 224 ret void 225} 226 227; GCN-LABEL: test_no_fold_canonicalize_fneg_value_f32: 228; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, -1.0, v{{[0-9]+}} 229; GCN-DENORM: v_max_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}} 230define amdgpu_kernel void @test_no_fold_canonicalize_fneg_value_f32(float addrspace(1)* %arg) { 231 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 232 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 233 %load = load float, float addrspace(1)* %gep, align 4 234 %v = fsub float -0.0, %load 235 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 236 store float %canonicalized, float addrspace(1)* %gep, align 4 237 ret void 238} 239 240; GCN-LABEL: test_fold_canonicalize_fneg_value_f32: 241; GCN: v_xor_b32_e32 [[V:v[0-9]+]], 0x80000000, v{{[0-9]+}} 242; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 243; GCN-NOT: 1.0 244define amdgpu_kernel void @test_fold_canonicalize_fneg_value_f32(float addrspace(1)* %arg) { 245 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 246 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 247 %load = load float, float addrspace(1)* %gep, align 4 248 %v0 = fadd float %load, 0.0 249 %v = fsub float -0.0, %v0 250 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 251 store float %canonicalized, float addrspace(1)* %gep, align 4 252 ret void 253} 254 255; GCN-LABEL: test_no_fold_canonicalize_fabs_value_f32: 256; GCN-FLUSH: v_mul_f32_e64 v{{[0-9]+}}, 1.0, |v{{[0-9]+}}| 257; GCN-DENORM: v_max_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}| 258define amdgpu_kernel void @test_no_fold_canonicalize_fabs_value_f32(float addrspace(1)* %arg) { 259 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 260 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 261 %load = load float, float addrspace(1)* %gep, align 4 262 %v = tail call float @llvm.fabs.f32(float %load) 263 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 264 store float %canonicalized, float addrspace(1)* %gep, align 4 265 ret void 266} 267 268; GCN-LABEL: test_fold_canonicalize_fabs_value_f32: 269; GCN: v_and_b32_e32 [[V:v[0-9]+]], 0x7fffffff, v{{[0-9]+}} 270; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 271; GCN-NOT: 1.0 272define amdgpu_kernel void @test_fold_canonicalize_fabs_value_f32(float addrspace(1)* %arg) { 273 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 274 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 275 %load = load float, float addrspace(1)* %gep, align 4 276 %v0 = fadd float %load, 0.0 277 %v = tail call float @llvm.fabs.f32(float %v0) 278 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 279 store float %canonicalized, float addrspace(1)* %gep, align 4 280 ret void 281} 282 283; GCN-LABEL: test_fold_canonicalize_sin_value_f32: 284; GCN: v_sin_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}} 285; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 286; GCN-NOT: 1.0 287define amdgpu_kernel void @test_fold_canonicalize_sin_value_f32(float addrspace(1)* %arg) { 288 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 289 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 290 %load = load float, float addrspace(1)* %gep, align 4 291 %v = tail call float @llvm.sin.f32(float %load) 292 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 293 store float %canonicalized, float addrspace(1)* %gep, align 4 294 ret void 295} 296 297; GCN-LABEL: test_fold_canonicalize_cos_value_f32: 298; GCN: v_cos_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}} 299; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 300; GCN-NOT: 1.0 301define amdgpu_kernel void @test_fold_canonicalize_cos_value_f32(float addrspace(1)* %arg) { 302 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 303 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 304 %load = load float, float addrspace(1)* %gep, align 4 305 %v = tail call float @llvm.cos.f32(float %load) 306 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 307 store float %canonicalized, float addrspace(1)* %gep, align 4 308 ret void 309} 310 311; GCN-LABEL: test_fold_canonicalize_sin_value_f16: 312; GCN: v_sin_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}} 313; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], [[V0]] 314; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]] 315; GCN-NOT: 1.0 316define amdgpu_kernel void @test_fold_canonicalize_sin_value_f16(half addrspace(1)* %arg) { 317 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 318 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id 319 %load = load half, half addrspace(1)* %gep, align 2 320 %v = tail call half @llvm.sin.f16(half %load) 321 %canonicalized = tail call half @llvm.canonicalize.f16(half %v) 322 store half %canonicalized, half addrspace(1)* %gep, align 2 323 ret void 324} 325 326; GCN-LABEL: test_fold_canonicalize_cos_value_f16: 327; GCN: v_cos_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}} 328; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], [[V0]] 329; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]] 330; GCN-NOT: 1.0 331define amdgpu_kernel void @test_fold_canonicalize_cos_value_f16(half addrspace(1)* %arg) { 332 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 333 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id 334 %load = load half, half addrspace(1)* %gep, align 2 335 %v = tail call half @llvm.cos.f16(half %load) 336 %canonicalized = tail call half @llvm.canonicalize.f16(half %v) 337 store half %canonicalized, half addrspace(1)* %gep, align 2 338 ret void 339} 340 341; GCN-LABEL: test_fold_canonicalize_qNaN_value_f32: 342; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 0x7fc00000 343; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 344; GCN-NOT: 1.0 345define amdgpu_kernel void @test_fold_canonicalize_qNaN_value_f32(float addrspace(1)* %arg) { 346 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 347 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 348 %canonicalized = tail call float @llvm.canonicalize.f32(float 0x7FF8000000000000) 349 store float %canonicalized, float addrspace(1)* %gep, align 4 350 ret void 351} 352 353; GCN-LABEL: test_fold_canonicalize_minnum_value_from_load_f32: 354; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} 355; GFX9: v_min_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}} 356; GFX9: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 357define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32(float addrspace(1)* %arg) { 358 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 359 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 360 %load = load float, float addrspace(1)* %gep, align 4 361 %v = tail call float @llvm.minnum.f32(float %load, float 0.0) 362 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 363 store float %canonicalized, float addrspace(1)* %gep, align 4 364 ret void 365} 366 367; GCN-LABEL: test_fold_canonicalize_minnum_value_f32: 368; GCN: v_min_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}} 369; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 370; GCN-NOT: 1.0 371define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(float addrspace(1)* %arg) { 372 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 373 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 374 %load = load float, float addrspace(1)* %gep, align 4 375 %v0 = fadd float %load, 0.0 376 %v = tail call float @llvm.minnum.f32(float %v0, float 0.0) 377 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 378 store float %canonicalized, float addrspace(1)* %gep, align 4 379 ret void 380} 381 382; FIXME: Should there be more checks here? minnum with NaN operand is simplified away. 383 384; GCN-LABEL: test_fold_canonicalize_sNaN_value_f32: 385; VI: v_add_u32_e32 v{{[0-9]+}} 386; GFX9: v_add_co_u32_e32 v{{[0-9]+}} 387; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}] 388define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(float addrspace(1)* %arg) { 389 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 390 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 391 %load = load float, float addrspace(1)* %gep, align 4 392 %v = tail call float @llvm.minnum.f32(float %load, float bitcast (i32 2139095041 to float)) 393 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 394 store float %canonicalized, float addrspace(1)* %gep, align 4 395 ret void 396} 397 398; GCN-LABEL: test_fold_canonicalize_denorm_value_f32: 399; GFX9: v_min_f32_e32 [[RESULT:v[0-9]+]], 0x7fffff, v{{[0-9]+}} 400; VI: v_min_f32_e32 [[V0:v[0-9]+]], 0x7fffff, v{{[0-9]+}} 401; VI: v_mul_f32_e32 [[RESULT:v[0-9]+]], 1.0, [[V0]] 402; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[RESULT]] 403; GFX9-NOT: 1.0 404define amdgpu_kernel void @test_fold_canonicalize_denorm_value_f32(float addrspace(1)* %arg) { 405 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 406 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 407 %load = load float, float addrspace(1)* %gep, align 4 408 %v = tail call float @llvm.minnum.f32(float %load, float bitcast (i32 8388607 to float)) 409 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 410 store float %canonicalized, float addrspace(1)* %gep, align 4 411 ret void 412} 413 414; GCN-LABEL: test_fold_canonicalize_maxnum_value_from_load_f32: 415; GFX9: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, v{{[0-9]+}} 416; VI: v_max_f32_e32 [[V0:v[0-9]+]], 0, v{{[0-9]+}} 417; VI: v_mul_f32_e32 [[RESULT:v[0-9]+]], 1.0, [[V0]] 418; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[RESULT]] 419; GFX9-NOT: 1.0 420define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_from_load_f32(float addrspace(1)* %arg) { 421 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 422 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 423 %load = load float, float addrspace(1)* %gep, align 4 424 %v = tail call float @llvm.maxnum.f32(float %load, float 0.0) 425 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 426 store float %canonicalized, float addrspace(1)* %gep, align 4 427 ret void 428} 429 430; GCN-LABEL: test_fold_canonicalize_maxnum_value_f32: 431; GCN: v_max_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}} 432; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 433; GCN-NOT: 1.0 434define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f32(float addrspace(1)* %arg) { 435 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 436 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 437 %load = load float, float addrspace(1)* %gep, align 4 438 %v0 = fadd float %load, 0.0 439 %v = tail call float @llvm.maxnum.f32(float %v0, float 0.0) 440 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 441 store float %canonicalized, float addrspace(1)* %gep, align 4 442 ret void 443} 444 445; GCN-LABEL: test_fold_canonicalize_maxnum_value_f64: 446; GCN: v_max_f64 [[V:v\[[0-9]+:[0-9]+\]]], v[{{[0-9:]+}}], 0 447; GCN: {{flat|global}}_store_dwordx2 v[{{[0-9:]+}}], [[V]] 448; GCN-NOT: 1.0 449define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f64(double addrspace(1)* %arg) { 450 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 451 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id 452 %load = load double, double addrspace(1)* %gep, align 8 453 %v0 = fadd double %load, 0.0 454 %v = tail call double @llvm.maxnum.f64(double %v0, double 0.0) 455 %canonicalized = tail call double @llvm.canonicalize.f64(double %v) 456 store double %canonicalized, double addrspace(1)* %gep, align 8 457 ret void 458} 459 460; GCN-LABEL: test_no_fold_canonicalize_fmul_value_f32_no_ieee: 461; GCN-EXCEPT: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} 462define amdgpu_ps float @test_no_fold_canonicalize_fmul_value_f32_no_ieee(float %arg) { 463entry: 464 %v = fmul float %arg, 15.0 465 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 466 ret float %canonicalized 467} 468 469; GCN-LABEL: test_fold_canonicalize_fmul_nnan_value_f32_no_ieee: 470; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}} 471; GCN-NEXT: ; return 472; GCN-NOT: 1.0 473define amdgpu_ps float @test_fold_canonicalize_fmul_nnan_value_f32_no_ieee(float %arg) { 474entry: 475 %v = fmul nnan float %arg, 15.0 476 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 477 ret float %canonicalized 478} 479 480; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f32 481; GFX9-DENORM: global_load_dword [[V:v[0-9]+]], 482; GFX9-DENORM: global_store_dword v[{{[0-9:]+}}], [[V]] 483; GFX9-DENORM-NOT: 1.0 484; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} 485define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f32(float addrspace(1)* %arg, float addrspace(1)* %out) #1 { 486 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 487 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 488 %v = load float, float addrspace(1)* %gep, align 4 489 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 490 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id 491 store float %canonicalized, float addrspace(1)* %gep2, align 4 492 ret void 493} 494 495; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f64 496; GCN: {{flat|global}}_load_dwordx2 [[V:v\[[0-9:]+\]]], 497; GCN: {{flat|global}}_store_dwordx2 v[{{[0-9:]+}}], [[V]] 498; GCN-NOT: 1.0 499define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f64(double addrspace(1)* %arg, double addrspace(1)* %out) #1 { 500 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 501 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id 502 %v = load double, double addrspace(1)* %gep, align 8 503 %canonicalized = tail call double @llvm.canonicalize.f64(double %v) 504 %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id 505 store double %canonicalized, double addrspace(1)* %gep2, align 8 506 ret void 507} 508 509; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f16 510; GCN: {{flat|global}}_load_ushort [[V:v[0-9]+]], 511; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]] 512; GCN-NOT: 1.0 513define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f16(half addrspace(1)* %arg, half addrspace(1)* %out) #1 { 514 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 515 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id 516 %v = load half, half addrspace(1)* %gep, align 2 517 %canonicalized = tail call half @llvm.canonicalize.f16(half %v) 518 %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id 519 store half %canonicalized, half addrspace(1)* %gep2, align 2 520 ret void 521} 522 523; Avoid failing the test on FreeBSD11.0 which will match the GCN-NOT: 1.0 524; in the .amd_amdgpu_isa "amdgcn-unknown-freebsd11.0--gfx802" directive 525; CHECK: .amd_amdgpu_isa 526 527declare float @llvm.canonicalize.f32(float) #0 528declare double @llvm.canonicalize.f64(double) #0 529declare half @llvm.canonicalize.f16(half) #0 530declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0 531declare i32 @llvm.amdgcn.workitem.id.x() #0 532declare float @llvm.sqrt.f32(float) #0 533declare float @llvm.ceil.f32(float) #0 534declare float @llvm.floor.f32(float) #0 535declare float @llvm.fma.f32(float, float, float) #0 536declare float @llvm.fmuladd.f32(float, float, float) #0 537declare float @llvm.fabs.f32(float) #0 538declare float @llvm.sin.f32(float) #0 539declare float @llvm.cos.f32(float) #0 540declare half @llvm.sin.f16(half) #0 541declare half @llvm.cos.f16(half) #0 542declare float @llvm.minnum.f32(float, float) #0 543declare float @llvm.maxnum.f32(float, float) #0 544declare double @llvm.maxnum.f64(double, double) #0 545 546attributes #0 = { nounwind readnone } 547attributes #1 = { "no-nans-fp-math"="true" } 548