1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX906 %s
3; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
4; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
5
6define float @v_fdot2(<2 x half> %a, <2 x half> %b, float %c) {
7; GFX906-LABEL: v_fdot2:
8; GFX906:       ; %bb.0:
9; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2
11; GFX906-NEXT:    s_setpc_b64 s[30:31]
12;
13; GFX10-LABEL: v_fdot2:
14; GFX10:       ; %bb.0:
15; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
17; GFX10-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2
18; GFX10-NEXT:    s_setpc_b64 s[30:31]
19  %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 false)
20  ret float %r
21}
22
23define float @v_fdot2_clamp(<2 x half> %a, <2 x half> %b, float %c) {
24; GFX906-LABEL: v_fdot2_clamp:
25; GFX906:       ; %bb.0:
26; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2 clamp
28; GFX906-NEXT:    s_setpc_b64 s[30:31]
29;
30; GFX10-LABEL: v_fdot2_clamp:
31; GFX10:       ; %bb.0:
32; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
34; GFX10-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2 clamp
35; GFX10-NEXT:    s_setpc_b64 s[30:31]
36  %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 true)
37  ret float %r
38}
39
40define float @v_fdot2_neg_a(<2 x half> %a, <2 x half> %b, float %c) {
41; GFX906-LABEL: v_fdot2_neg_a:
42; GFX906:       ; %bb.0:
43; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
45; GFX906-NEXT:    s_setpc_b64 s[30:31]
46;
47; GFX10-LABEL: v_fdot2_neg_a:
48; GFX10:       ; %bb.0:
49; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
51; GFX10-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
52; GFX10-NEXT:    s_setpc_b64 s[30:31]
53  %neg.a = fneg <2 x half> %a
54  %r = call float @llvm.amdgcn.fdot2(<2 x half> %neg.a, <2 x half> %b, float %c, i1 false)
55  ret float %r
56}
57
58define float @v_fdot2_neg_b(<2 x half> %a, <2 x half> %b, float %c) {
59; GFX906-LABEL: v_fdot2_neg_b:
60; GFX906:       ; %bb.0:
61; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
62; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
63; GFX906-NEXT:    s_setpc_b64 s[30:31]
64;
65; GFX10-LABEL: v_fdot2_neg_b:
66; GFX10:       ; %bb.0:
67; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
69; GFX10-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
70; GFX10-NEXT:    s_setpc_b64 s[30:31]
71  %neg.b = fneg <2 x half> %b
72  %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %neg.b, float %c, i1 false)
73  ret float %r
74}
75
76define float @v_fdot2_neg_a_neg_b(<2 x half> %a, <2 x half> %b, float %c) {
77; GFX906-LABEL: v_fdot2_neg_a_neg_b:
78; GFX906:       ; %bb.0:
79; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80; GFX906-NEXT:    v_dot2_f32_f16 v0, v1, v1, v2 neg_lo:[1,1,0] neg_hi:[1,1,0]
81; GFX906-NEXT:    s_setpc_b64 s[30:31]
82;
83; GFX10-LABEL: v_fdot2_neg_a_neg_b:
84; GFX10:       ; %bb.0:
85; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
87; GFX10-NEXT:    v_dot2_f32_f16 v0, v1, v1, v2 neg_lo:[1,1,0] neg_hi:[1,1,0]
88; GFX10-NEXT:    s_setpc_b64 s[30:31]
89  %neg.a = fneg <2 x half> %b
90  %neg.b = fneg <2 x half> %b
91  %r = call float @llvm.amdgcn.fdot2(<2 x half> %neg.a, <2 x half> %neg.b, float %c, i1 false)
92  ret float %r
93}
94
95define float @v_fdot2_neg_c(<2 x half> %a, <2 x half> %b, float %c) {
96; GFX906-LABEL: v_fdot2_neg_c:
97; GFX906:       ; %bb.0:
98; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99; GFX906-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
100; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2
101; GFX906-NEXT:    s_setpc_b64 s[30:31]
102;
103; GFX10-LABEL: v_fdot2_neg_c:
104; GFX10:       ; %bb.0:
105; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
107; GFX10-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
108; GFX10-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2
109; GFX10-NEXT:    s_setpc_b64 s[30:31]
110  %neg.c = fneg float %c
111  %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %neg.c, i1 false)
112  ret float %r
113}
114
115define float @v_fdot2_inline_literal_a(<2 x half> %b, float %c) {
116; GFX906-LABEL: v_fdot2_inline_literal_a:
117; GFX906:       ; %bb.0:
118; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
119; GFX906-NEXT:    v_dot2_f32_f16 v0, 2.0, v0, v1 op_sel_hi:[0,1,1]
120; GFX906-NEXT:    s_setpc_b64 s[30:31]
121;
122; GFX10-LABEL: v_fdot2_inline_literal_a:
123; GFX10:       ; %bb.0:
124; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
126; GFX10-NEXT:    v_dot2_f32_f16 v0, 2.0, v0, v1 op_sel_hi:[0,1,1]
127; GFX10-NEXT:    s_setpc_b64 s[30:31]
128  %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> <half 2.0, half 2.0>, <2 x half> %b, float %c, i1 false)
129  ret float %ret
130}
131
132define float @v_fdot2_inline_literal_b(<2 x half> %a, float %c) {
133; GFX906-LABEL: v_fdot2_inline_literal_b:
134; GFX906:       ; %bb.0:
135; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, 2.0, v1 op_sel_hi:[1,0,1]
137; GFX906-NEXT:    s_setpc_b64 s[30:31]
138;
139; GFX10-LABEL: v_fdot2_inline_literal_b:
140; GFX10:       ; %bb.0:
141; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
143; GFX10-NEXT:    v_dot2_f32_f16 v0, v0, 2.0, v1 op_sel_hi:[1,0,1]
144; GFX10-NEXT:    s_setpc_b64 s[30:31]
145  %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> <half 2.0, half 2.0>, float %c, i1 false)
146  ret float %ret
147}
148
149define float @v_fdot2_inline_literal_c(<2 x half> %a, <2 x half> %b) {
150; GFX906-LABEL: v_fdot2_inline_literal_c:
151; GFX906:       ; %bb.0:
152; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, 1.0
154; GFX906-NEXT:    s_setpc_b64 s[30:31]
155;
156; GFX10-LABEL: v_fdot2_inline_literal_c:
157; GFX10:       ; %bb.0:
158; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
159; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
160; GFX10-NEXT:    v_dot2_f32_f16 v0, v0, v1, 1.0
161; GFX10-NEXT:    s_setpc_b64 s[30:31]
162  %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float 1.0, i1 false)
163  ret float %ret
164}
165
166declare float @llvm.amdgcn.fdot2(<2 x half>, <2 x half>, float, i1 immarg) #0
167
168attributes #0 = { nounwind readnone speculatable }
169