1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 3 4; GCN-LABEL: {{^}}test_fmax_f32: 5; GCN: v_max_f32_e32 6define amdgpu_kernel void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) #0 { 7 %val = call float @llvm.maxnum.f32(float %a, float %b) 8 store float %val, float addrspace(1)* %out, align 4 9 ret void 10} 11 12; GCN-LABEL: {{^}}test_fmax_v2f32: 13; GCN: v_max_f32_e32 14; GCN: v_max_f32_e32 15define amdgpu_kernel void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { 16 %val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b) 17 store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8 18 ret void 19} 20 21; GCN-LABEL: {{^}}test_fmax_v3f32: 22; GCN: v_max_f32_e32 23; GCN: v_max_f32_e32 24; GCN: v_max_f32_e32 25; GCN-NOT: v_max_f32 26define amdgpu_kernel void @test_fmax_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %a, <3 x float> %b) nounwind { 27 %val = call <3 x float> @llvm.maxnum.v3f32(<3 x float> %a, <3 x float> %b) #0 28 store <3 x float> %val, <3 x float> addrspace(1)* %out, align 16 29 ret void 30} 31 32; GCN-LABEL: {{^}}test_fmax_v4f32: 33; GCN: v_max_f32_e32 34; GCN: v_max_f32_e32 35; GCN: v_max_f32_e32 36; GCN: v_max_f32_e32 37define amdgpu_kernel void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) #0 { 38 %val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) 39 store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16 40 ret void 41} 42 43; GCN-LABEL: {{^}}test_fmax_v8f32: 44; GCN: v_max_f32_e32 45; GCN: v_max_f32_e32 46; GCN: v_max_f32_e32 47; GCN: v_max_f32_e32 48; GCN: v_max_f32_e32 49; GCN: v_max_f32_e32 50; GCN: v_max_f32_e32 51; GCN: v_max_f32_e32 52define amdgpu_kernel void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 { 53 %val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b) 54 store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32 55 ret void 56} 57 58; GCN-LABEL: {{^}}test_fmax_v16f32: 59; GCN: v_max_f32_e32 60; GCN: v_max_f32_e32 61; GCN: v_max_f32_e32 62; GCN: v_max_f32_e32 63; GCN: v_max_f32_e32 64; GCN: v_max_f32_e32 65; GCN: v_max_f32_e32 66; GCN: v_max_f32_e32 67; GCN: v_max_f32_e32 68; GCN: v_max_f32_e32 69; GCN: v_max_f32_e32 70; GCN: v_max_f32_e32 71; GCN: v_max_f32_e32 72; GCN: v_max_f32_e32 73; GCN: v_max_f32_e32 74; GCN: v_max_f32_e32 75define amdgpu_kernel void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) #0 { 76 %val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b) 77 store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64 78 ret void 79} 80 81; GCN-LABEL: {{^}}constant_fold_fmax_f32: 82; GCN-NOT: v_max_f32_e32 83; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0 84; GCN: buffer_store_dword [[REG]] 85define amdgpu_kernel void @constant_fold_fmax_f32(float addrspace(1)* %out) #0 { 86 %val = call float @llvm.maxnum.f32(float 1.0, float 2.0) 87 store float %val, float addrspace(1)* %out, align 4 88 ret void 89} 90 91; GCN-LABEL: {{^}}constant_fold_fmax_f32_nan_nan: 92; GCN-NOT: v_max_f32_e32 93; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000 94; GCN: buffer_store_dword [[REG]] 95define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) #0 { 96 %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) 97 store float %val, float addrspace(1)* %out, align 4 98 ret void 99} 100 101; GCN-LABEL: {{^}}constant_fold_fmax_f32_val_nan: 102; GCN-NOT: v_max_f32_e32 103; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0 104; GCN: buffer_store_dword [[REG]] 105define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) #0 { 106 %val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000) 107 store float %val, float addrspace(1)* %out, align 4 108 ret void 109} 110 111; GCN-LABEL: {{^}}constant_fold_fmax_f32_nan_val: 112; GCN-NOT: v_max_f32_e32 113; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0 114; GCN: buffer_store_dword [[REG]] 115define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) #0 { 116 %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0) 117 store float %val, float addrspace(1)* %out, align 4 118 ret void 119} 120 121; GCN-LABEL: {{^}}constant_fold_fmax_f32_p0_p0: 122; GCN-NOT: v_max_f32_e32 123; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0 124; GCN: buffer_store_dword [[REG]] 125define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) #0 { 126 %val = call float @llvm.maxnum.f32(float 0.0, float 0.0) 127 store float %val, float addrspace(1)* %out, align 4 128 ret void 129} 130 131; GCN-LABEL: {{^}}constant_fold_fmax_f32_p0_n0: 132; GCN-NOT: v_max_f32_e32 133; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0 134; GCN: buffer_store_dword [[REG]] 135define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) #0 { 136 %val = call float @llvm.maxnum.f32(float 0.0, float -0.0) 137 store float %val, float addrspace(1)* %out, align 4 138 ret void 139} 140 141; GCN-LABEL: {{^}}constant_fold_fmax_f32_n0_p0: 142; GCN-NOT: v_max_f32_e32 143; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}} 144; GCN: buffer_store_dword [[REG]] 145define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) #0 { 146 %val = call float @llvm.maxnum.f32(float -0.0, float 0.0) 147 store float %val, float addrspace(1)* %out, align 4 148 ret void 149} 150 151; GCN-LABEL: {{^}}constant_fold_fmax_f32_n0_n0: 152; GCN-NOT: v_max_f32_e32 153; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}} 154; GCN: buffer_store_dword [[REG]] 155define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) #0 { 156 %val = call float @llvm.maxnum.f32(float -0.0, float -0.0) 157 store float %val, float addrspace(1)* %out, align 4 158 ret void 159} 160 161; GCN-LABEL: {{^}}fmax_var_immediate_f32: 162; GCN: v_max_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0 163define amdgpu_kernel void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) #0 { 164 %val = call float @llvm.maxnum.f32(float %a, float 2.0) 165 store float %val, float addrspace(1)* %out, align 4 166 ret void 167} 168 169; GCN-LABEL: {{^}}fmax_immediate_var_f32: 170; GCN: v_max_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0 171define amdgpu_kernel void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) #0 { 172 %val = call float @llvm.maxnum.f32(float 2.0, float %a) 173 store float %val, float addrspace(1)* %out, align 4 174 ret void 175} 176 177; GCN-LABEL: {{^}}fmax_var_literal_f32: 178; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000 179; GCN: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]] 180define amdgpu_kernel void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) #0 { 181 %val = call float @llvm.maxnum.f32(float %a, float 99.0) 182 store float %val, float addrspace(1)* %out, align 4 183 ret void 184} 185 186; GCN-LABEL: {{^}}fmax_literal_var_f32: 187; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000 188; GCN: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]] 189define amdgpu_kernel void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) #0 { 190 %val = call float @llvm.maxnum.f32(float 99.0, float %a) 191 store float %val, float addrspace(1)* %out, align 4 192 ret void 193} 194 195; GCN-LABEL: {{^}}test_func_fmax_v3f32: 196; GCN: v_max_f32_e32 197; GCN: v_max_f32_e32 198; GCN: v_max_f32_e32 199; GCN-NOT: v_max_f32 200define <3 x float> @test_func_fmax_v3f32(<3 x float> %a, <3 x float> %b) nounwind { 201 %val = call <3 x float> @llvm.maxnum.v3f32(<3 x float> %a, <3 x float> %b) #0 202 ret <3 x float> %val 203} 204 205declare float @llvm.maxnum.f32(float, float) #1 206declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #1 207declare <3 x float> @llvm.maxnum.v3f32(<3 x float>, <3 x float>) #1 208declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #1 209declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #1 210declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #1 211declare double @llvm.maxnum.f64(double, double) 212 213attributes #0 = { nounwind } 214attributes #1 = { nounwind readnone } 215