1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s 4 5; Make sure we don't violate the constant bus restriction 6 7define amdgpu_ps float @fmul_s_s(float inreg %src0, float inreg %src1) { 8; GFX9-LABEL: fmul_s_s: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: v_mov_b32_e32 v0, s3 11; GFX9-NEXT: v_mul_f32_e32 v0, s2, v0 12; GFX9-NEXT: ; return to shader part epilog 13; 14; GFX10-LABEL: fmul_s_s: 15; GFX10: ; %bb.0: 16; GFX10-NEXT: v_mul_f32_e64 v0, s2, s3 17; GFX10-NEXT: ; return to shader part epilog 18 %result = fmul float %src0, %src1 19 ret float %result 20} 21 22define amdgpu_ps float @fmul_ss(float inreg %src) { 23; GFX9-LABEL: fmul_ss: 24; GFX9: ; %bb.0: 25; GFX9-NEXT: v_mul_f32_e64 v0, s2, s2 26; GFX9-NEXT: ; return to shader part epilog 27; 28; GFX10-LABEL: fmul_ss: 29; GFX10: ; %bb.0: 30; GFX10-NEXT: v_mul_f32_e64 v0, s2, s2 31; GFX10-NEXT: ; return to shader part epilog 32 %result = fmul float %src, %src 33 ret float %result 34} 35 36; Ternary operation with 3 different SGPRs 37define amdgpu_ps float @fma_s_s_s(float inreg %src0, float inreg %src1, float inreg %src2) { 38; GFX9-LABEL: fma_s_s_s: 39; GFX9: ; %bb.0: 40; GFX9-NEXT: v_mov_b32_e32 v0, s3 41; GFX9-NEXT: v_mov_b32_e32 v1, s4 42; GFX9-NEXT: v_fma_f32 v0, s2, v0, v1 43; GFX9-NEXT: ; return to shader part epilog 44; 45; GFX10-LABEL: fma_s_s_s: 46; GFX10: ; %bb.0: 47; GFX10-NEXT: v_mov_b32_e32 v0, s4 48; GFX10-NEXT: v_fma_f32 v0, s3, s2, v0 49; GFX10-NEXT: ; return to shader part epilog 50 %result = call float @llvm.fma.f32(float %src0, float %src1, float %src2) 51 ret float %result 52} 53 54; Ternary operation with 3 identical SGPRs 55define amdgpu_ps float @fma_sss(float inreg %src) { 56; GFX9-LABEL: fma_sss: 57; GFX9: ; %bb.0: 58; GFX9-NEXT: v_fma_f32 v0, s2, s2, s2 59; GFX9-NEXT: ; return to shader part epilog 60; 61; GFX10-LABEL: fma_sss: 62; GFX10: ; %bb.0: 63; GFX10-NEXT: v_fma_f32 v0, s2, s2, s2 64; GFX10-NEXT: ; return to shader part epilog 65 %result = call float @llvm.fma.f32(float %src, float %src, float %src) 66 ret float %result 67} 68 69; src0/1 are same SGPR 70define amdgpu_ps float @fma_ss_s(float inreg %src01, float inreg %src2) { 71; GFX9-LABEL: fma_ss_s: 72; GFX9: ; %bb.0: 73; GFX9-NEXT: v_mov_b32_e32 v0, s3 74; GFX9-NEXT: v_fma_f32 v0, s2, s2, v0 75; GFX9-NEXT: ; return to shader part epilog 76; 77; GFX10-LABEL: fma_ss_s: 78; GFX10: ; %bb.0: 79; GFX10-NEXT: v_fma_f32 v0, s2, s2, s3 80; GFX10-NEXT: ; return to shader part epilog 81 %result = call float @llvm.fma.f32(float %src01, float %src01, float %src2) 82 ret float %result 83} 84 85; src1/2 are same SGPR 86define amdgpu_ps float @fma_s_ss(float inreg %src0, float inreg %src12) { 87; GFX9-LABEL: fma_s_ss: 88; GFX9: ; %bb.0: 89; GFX9-NEXT: v_mov_b32_e32 v0, s3 90; GFX9-NEXT: v_fma_f32 v0, s2, v0, v0 91; GFX9-NEXT: ; return to shader part epilog 92; 93; GFX10-LABEL: fma_s_ss: 94; GFX10: ; %bb.0: 95; GFX10-NEXT: v_fma_f32 v0, s2, s3, s3 96; GFX10-NEXT: ; return to shader part epilog 97 %result = call float @llvm.fma.f32(float %src0, float %src12, float %src12) 98 ret float %result 99} 100 101; src0/2 are same SGPR 102define amdgpu_ps float @fma_ss_s_same_outer(float inreg %src02, float inreg %src1) { 103; GFX9-LABEL: fma_ss_s_same_outer: 104; GFX9: ; %bb.0: 105; GFX9-NEXT: v_mov_b32_e32 v0, s3 106; GFX9-NEXT: v_fma_f32 v0, s2, v0, s2 107; GFX9-NEXT: ; return to shader part epilog 108; 109; GFX10-LABEL: fma_ss_s_same_outer: 110; GFX10: ; %bb.0: 111; GFX10-NEXT: v_fma_f32 v0, s2, s3, s2 112; GFX10-NEXT: ; return to shader part epilog 113 %result = call float @llvm.fma.f32(float %src02, float %src1, float %src02) 114 ret float %result 115} 116 117define amdgpu_ps float @fcmp_s_s(float inreg %src0, float inreg %src1) { 118; GFX9-LABEL: fcmp_s_s: 119; GFX9: ; %bb.0: 120; GFX9-NEXT: v_mov_b32_e32 v0, s3 121; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, s2, v0 122; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc 123; GFX9-NEXT: ; return to shader part epilog 124; 125; GFX10-LABEL: fcmp_s_s: 126; GFX10: ; %bb.0: 127; GFX10-NEXT: v_cmp_eq_f32_e64 s0, s2, s3 128; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s0 129; GFX10-NEXT: ; return to shader part epilog 130 %cmp = fcmp oeq float %src0, %src1 131 %result = select i1 %cmp, float 1.0, float 0.0 132 ret float %result 133} 134 135define amdgpu_ps float @select_vcc_s_s(float %cmp0, float %cmp1, float inreg %src0, float inreg %src1) { 136; GFX9-LABEL: select_vcc_s_s: 137; GFX9: ; %bb.0: 138; GFX9-NEXT: v_mov_b32_e32 v2, s2 139; GFX9-NEXT: v_mov_b32_e32 v3, s3 140; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v0, v1 141; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 142; GFX9-NEXT: ; return to shader part epilog 143; 144; GFX10-LABEL: select_vcc_s_s: 145; GFX10: ; %bb.0: 146; GFX10-NEXT: v_mov_b32_e32 v2, s3 147; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v0, v1 148; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, s2, vcc_lo 149; GFX10-NEXT: ; return to shader part epilog 150 %cmp = fcmp oeq float %cmp0, %cmp1 151 %result = select i1 %cmp, float %src0, float %src1 152 ret float %result 153} 154 155define amdgpu_ps float @select_vcc_fneg_s_s(float %cmp0, float %cmp1, float inreg %src0, float inreg %src1) { 156; GFX9-LABEL: select_vcc_fneg_s_s: 157; GFX9: ; %bb.0: 158; GFX9-NEXT: v_mov_b32_e32 v2, s3 159; GFX9-NEXT: v_mov_b32_e32 v3, s2 160; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v0, v1 161; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, -v3, vcc 162; GFX9-NEXT: ; return to shader part epilog 163; 164; GFX10-LABEL: select_vcc_fneg_s_s: 165; GFX10: ; %bb.0: 166; GFX10-NEXT: v_mov_b32_e32 v2, s2 167; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v0, v1 168; GFX10-NEXT: v_cndmask_b32_e64 v0, s3, -v2, vcc_lo 169; GFX10-NEXT: ; return to shader part epilog 170 %cmp = fcmp oeq float %cmp0, %cmp1 171 %neg.src0 = fneg float %src0 172 %result = select i1 %cmp, float %neg.src0, float %src1 173 ret float %result 174} 175 176; Constant bus used by vcc 177define amdgpu_ps float @amdgcn_div_fmas_sss(float inreg %src, float %cmp.src) { 178; GFX9-LABEL: amdgcn_div_fmas_sss: 179; GFX9: ; %bb.0: 180; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 181; GFX9-NEXT: v_mov_b32_e32 v0, s2 182; GFX9-NEXT: s_nop 2 183; GFX9-NEXT: v_div_fmas_f32 v0, v0, v0, v0 184; GFX9-NEXT: ; return to shader part epilog 185; 186; GFX10-LABEL: amdgcn_div_fmas_sss: 187; GFX10: ; %bb.0: 188; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0 189; GFX10-NEXT: v_div_fmas_f32 v0, s2, s2, s2 190; GFX10-NEXT: ; return to shader part epilog 191 %vcc = fcmp oeq float %cmp.src, 0.0 192 %result = call float @llvm.amdgcn.div.fmas.f32(float %src, float %src, float %src, i1 %vcc) 193 ret float %result 194} 195 196define amdgpu_ps float @class_s_s(float inreg %src0, i32 inreg %src1) { 197; GFX9-LABEL: class_s_s: 198; GFX9: ; %bb.0: 199; GFX9-NEXT: v_mov_b32_e32 v0, s3 200; GFX9-NEXT: v_cmp_class_f32_e32 vcc, s2, v0 201; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc 202; GFX9-NEXT: ; return to shader part epilog 203; 204; GFX10-LABEL: class_s_s: 205; GFX10: ; %bb.0: 206; GFX10-NEXT: v_cmp_class_f32_e64 s0, s2, s3 207; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s0 208; GFX10-NEXT: ; return to shader part epilog 209 %class = call i1 @llvm.amdgcn.class.f32(float %src0, i32 %src1) 210 %result = select i1 %class, float 1.0, float 0.0 211 ret float %result 212} 213 214define amdgpu_ps float @div_scale_s_s_true(float inreg %src0, float inreg %src1) { 215; GFX9-LABEL: div_scale_s_s_true: 216; GFX9: ; %bb.0: 217; GFX9-NEXT: v_mov_b32_e32 v0, s3 218; GFX9-NEXT: v_div_scale_f32 v0, s[0:1], s2, v0, s2 219; GFX9-NEXT: ; return to shader part epilog 220; 221; GFX10-LABEL: div_scale_s_s_true: 222; GFX10: ; %bb.0: 223; GFX10-NEXT: v_div_scale_f32 v0, s0, s2, s3, s2 224; GFX10-NEXT: ; return to shader part epilog 225 %div.scale = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %src0, float %src1, i1 true) 226 %result = extractvalue { float, i1 } %div.scale, 0 227 ret float %result 228} 229 230define amdgpu_ps float @div_scale_s_s_false(float inreg %src0, float inreg %src1) { 231; GFX9-LABEL: div_scale_s_s_false: 232; GFX9: ; %bb.0: 233; GFX9-NEXT: v_mov_b32_e32 v0, s3 234; GFX9-NEXT: v_div_scale_f32 v0, s[0:1], v0, v0, s2 235; GFX9-NEXT: ; return to shader part epilog 236; 237; GFX10-LABEL: div_scale_s_s_false: 238; GFX10: ; %bb.0: 239; GFX10-NEXT: v_div_scale_f32 v0, s0, s3, s3, s2 240; GFX10-NEXT: ; return to shader part epilog 241 %div.scale = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %src0, float %src1, i1 false) 242 %result = extractvalue { float, i1 } %div.scale, 0 243 ret float %result 244} 245 246declare float @llvm.fma.f32(float, float, float) #0 247declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) #1 248declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1 immarg) #1 249declare i1 @llvm.amdgcn.class.f32(float, i32) #1 250 251attributes #0 = { nounwind readnone speculatable willreturn } 252attributes #1 = { nounwind readnone speculatable } 253