1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX906 %s 3; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s 4; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s 5 6define float @v_fdot2(<2 x half> %a, <2 x half> %b, float %c) { 7; GFX906-LABEL: v_fdot2: 8; GFX906: ; %bb.0: 9; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GFX906-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 11; GFX906-NEXT: s_setpc_b64 s[30:31] 12; 13; GFX10-LABEL: v_fdot2: 14; GFX10: ; %bb.0: 15; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 17; GFX10-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 18; GFX10-NEXT: ; implicit-def: $vcc_hi 19; GFX10-NEXT: s_setpc_b64 s[30:31] 20 %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 false) 21 ret float %r 22} 23 24define float @v_fdot2_clamp(<2 x half> %a, <2 x half> %b, float %c) { 25; GFX906-LABEL: v_fdot2_clamp: 26; GFX906: ; %bb.0: 27; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 28; GFX906-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 clamp 29; GFX906-NEXT: s_setpc_b64 s[30:31] 30; 31; GFX10-LABEL: v_fdot2_clamp: 32; GFX10: ; %bb.0: 33; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 35; GFX10-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 clamp 36; GFX10-NEXT: ; implicit-def: $vcc_hi 37; GFX10-NEXT: s_setpc_b64 s[30:31] 38 %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 true) 39 ret float %r 40} 41 42define float @v_fdot2_neg_a(<2 x half> %a, <2 x half> %b, float %c) { 43; GFX906-LABEL: v_fdot2_neg_a: 44; GFX906: ; %bb.0: 45; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46; GFX906-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] 47; GFX906-NEXT: s_setpc_b64 s[30:31] 48; 49; GFX10-LABEL: v_fdot2_neg_a: 50; GFX10: ; %bb.0: 51; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 52; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 53; GFX10-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] 54; GFX10-NEXT: ; implicit-def: $vcc_hi 55; GFX10-NEXT: s_setpc_b64 s[30:31] 56 %neg.a = fneg <2 x half> %a 57 %r = call float @llvm.amdgcn.fdot2(<2 x half> %neg.a, <2 x half> %b, float %c, i1 false) 58 ret float %r 59} 60 61define float @v_fdot2_neg_b(<2 x half> %a, <2 x half> %b, float %c) { 62; GFX906-LABEL: v_fdot2_neg_b: 63; GFX906: ; %bb.0: 64; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 65; GFX906-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0] 66; GFX906-NEXT: s_setpc_b64 s[30:31] 67; 68; GFX10-LABEL: v_fdot2_neg_b: 69; GFX10: ; %bb.0: 70; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 71; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 72; GFX10-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0] 73; GFX10-NEXT: ; implicit-def: $vcc_hi 74; GFX10-NEXT: s_setpc_b64 s[30:31] 75 %neg.b = fneg <2 x half> %b 76 %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %neg.b, float %c, i1 false) 77 ret float %r 78} 79 80define float @v_fdot2_neg_a_neg_b(<2 x half> %a, <2 x half> %b, float %c) { 81; GFX906-LABEL: v_fdot2_neg_a_neg_b: 82; GFX906: ; %bb.0: 83; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 84; GFX906-NEXT: v_dot2_f32_f16 v0, v1, v1, v2 neg_lo:[1,1,0] neg_hi:[1,1,0] 85; GFX906-NEXT: s_setpc_b64 s[30:31] 86; 87; GFX10-LABEL: v_fdot2_neg_a_neg_b: 88; GFX10: ; %bb.0: 89; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 90; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 91; GFX10-NEXT: v_dot2_f32_f16 v0, v1, v1, v2 neg_lo:[1,1,0] neg_hi:[1,1,0] 92; GFX10-NEXT: ; implicit-def: $vcc_hi 93; GFX10-NEXT: s_setpc_b64 s[30:31] 94 %neg.a = fneg <2 x half> %b 95 %neg.b = fneg <2 x half> %b 96 %r = call float @llvm.amdgcn.fdot2(<2 x half> %neg.a, <2 x half> %neg.b, float %c, i1 false) 97 ret float %r 98} 99 100define float @v_fdot2_neg_c(<2 x half> %a, <2 x half> %b, float %c) { 101; GFX906-LABEL: v_fdot2_neg_c: 102; GFX906: ; %bb.0: 103; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 104; GFX906-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 105; GFX906-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 106; GFX906-NEXT: s_setpc_b64 s[30:31] 107; 108; GFX10-LABEL: v_fdot2_neg_c: 109; GFX10: ; %bb.0: 110; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 111; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 112; GFX10-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 113; GFX10-NEXT: ; implicit-def: $vcc_hi 114; GFX10-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 115; GFX10-NEXT: s_setpc_b64 s[30:31] 116 %neg.c = fneg float %c 117 %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %neg.c, i1 false) 118 ret float %r 119} 120 121define float @v_fdot2_inline_literal_a(<2 x half> %b, float %c) { 122; GFX906-LABEL: v_fdot2_inline_literal_a: 123; GFX906: ; %bb.0: 124; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 125; GFX906-NEXT: s_movk_i32 s4, 0x4000 126; GFX906-NEXT: s_pack_ll_b32_b16 s4, s4, s4 127; GFX906-NEXT: v_dot2_f32_f16 v0, s4, v0, v1 128; GFX906-NEXT: s_setpc_b64 s[30:31] 129; 130; GFX10-LABEL: v_fdot2_inline_literal_a: 131; GFX10: ; %bb.0: 132; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 133; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 134; GFX10-NEXT: s_movk_i32 s4, 0x4000 135; GFX10-NEXT: ; implicit-def: $vcc_hi 136; GFX10-NEXT: s_pack_ll_b32_b16 s4, s4, s4 137; GFX10-NEXT: v_dot2_f32_f16 v0, s4, v0, v1 138; GFX10-NEXT: s_setpc_b64 s[30:31] 139 %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> <half 2.0, half 2.0>, <2 x half> %b, float %c, i1 false) 140 ret float %ret 141} 142 143define float @v_fdot2_inline_literal_b(<2 x half> %a, float %c) { 144; GFX906-LABEL: v_fdot2_inline_literal_b: 145; GFX906: ; %bb.0: 146; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 147; GFX906-NEXT: s_movk_i32 s4, 0x4000 148; GFX906-NEXT: s_pack_ll_b32_b16 s4, s4, s4 149; GFX906-NEXT: v_dot2_f32_f16 v0, v0, s4, v1 150; GFX906-NEXT: s_setpc_b64 s[30:31] 151; 152; GFX10-LABEL: v_fdot2_inline_literal_b: 153; GFX10: ; %bb.0: 154; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 155; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 156; GFX10-NEXT: s_movk_i32 s4, 0x4000 157; GFX10-NEXT: ; implicit-def: $vcc_hi 158; GFX10-NEXT: s_pack_ll_b32_b16 s4, s4, s4 159; GFX10-NEXT: v_dot2_f32_f16 v0, v0, s4, v1 160; GFX10-NEXT: s_setpc_b64 s[30:31] 161 %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> <half 2.0, half 2.0>, float %c, i1 false) 162 ret float %ret 163} 164 165define float @v_fdot2_inline_literal_c(<2 x half> %a, <2 x half> %b) { 166; GFX906-LABEL: v_fdot2_inline_literal_c: 167; GFX906: ; %bb.0: 168; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 169; GFX906-NEXT: v_dot2_f32_f16 v0, v0, v1, 1.0 170; GFX906-NEXT: s_setpc_b64 s[30:31] 171; 172; GFX10-LABEL: v_fdot2_inline_literal_c: 173; GFX10: ; %bb.0: 174; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 175; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 176; GFX10-NEXT: v_dot2_f32_f16 v0, v0, v1, 1.0 177; GFX10-NEXT: ; implicit-def: $vcc_hi 178; GFX10-NEXT: s_setpc_b64 s[30:31] 179 %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float 1.0, i1 false) 180 ret float %ret 181} 182 183declare float @llvm.amdgcn.fdot2(<2 x half>, <2 x half>, float, i1 immarg) #0 184 185attributes #0 = { nounwind readnone speculatable } 186