1; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s 2 3declare float @llvm.fma.f32(float, float, float) #1 4declare float @llvm.fmuladd.f32(float, float, float) #1 5declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) #1 6 7 8; SI-LABEL: {{^}}test_sgpr_use_twice_binop: 9; SI: s_load_dword [[SGPR:s[0-9]+]], 10; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]] 11; SI: buffer_store_dword [[RESULT]] 12define void @test_sgpr_use_twice_binop(float addrspace(1)* %out, float %a) #0 { 13 %dbl = fadd float %a, %a 14 store float %dbl, float addrspace(1)* %out, align 4 15 ret void 16} 17 18; SI-LABEL: {{^}}test_sgpr_use_three_ternary_op: 19; SI: s_load_dword [[SGPR:s[0-9]+]], 20; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]] 21; SI: buffer_store_dword [[RESULT]] 22define void @test_sgpr_use_three_ternary_op(float addrspace(1)* %out, float %a) #0 { 23 %fma = call float @llvm.fma.f32(float %a, float %a, float %a) #1 24 store float %fma, float addrspace(1)* %out, align 4 25 ret void 26} 27 28; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b: 29; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb 30; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc 31; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] 32; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]] 33; SI: buffer_store_dword [[RESULT]] 34define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, float %a, float %b) #0 { 35 %fma = call float @llvm.fma.f32(float %a, float %a, float %b) #1 36 store float %fma, float addrspace(1)* %out, align 4 37 ret void 38} 39 40; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a: 41; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb 42; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc 43; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] 44; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]] 45; SI: buffer_store_dword [[RESULT]] 46define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 { 47 %fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1 48 store float %fma, float addrspace(1)* %out, align 4 49 ret void 50} 51 52; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a: 53; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb 54; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc 55; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] 56; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]] 57; SI: buffer_store_dword [[RESULT]] 58define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 { 59 %fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1 60 store float %fma, float addrspace(1)* %out, align 4 61 ret void 62} 63 64; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm: 65; SI: s_load_dword [[SGPR:s[0-9]+]] 66; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0 67; SI: buffer_store_dword [[RESULT]] 68define void @test_sgpr_use_twice_ternary_op_a_a_imm(float addrspace(1)* %out, float %a) #0 { 69 %fma = call float @llvm.fma.f32(float %a, float %a, float 2.0) #1 70 store float %fma, float addrspace(1)* %out, align 4 71 ret void 72} 73 74; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a: 75; SI: s_load_dword [[SGPR:s[0-9]+]] 76; SI: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]] 77; SI: buffer_store_dword [[RESULT]] 78define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, float %a) #0 { 79 %fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) #1 80 store float %fma, float addrspace(1)* %out, align 4 81 ret void 82} 83 84; Don't use fma since fma c, x, y is canonicalized to fma x, c, y 85; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a: 86; SI: s_load_dword [[SGPR:s[0-9]+]] 87; SI: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]] 88; SI: buffer_store_dword [[RESULT]] 89define void @test_sgpr_use_twice_ternary_op_imm_a_a(i32 addrspace(1)* %out, i32 %a) #0 { 90 %fma = call i32 @llvm.AMDGPU.imad24(i32 2, i32 %a, i32 %a) #1 91 store i32 %fma, i32 addrspace(1)* %out, align 4 92 ret void 93} 94 95attributes #0 = { nounwind } 96attributes #1 = { nounwind readnone } 97