1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -global-isel -march=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
4; RUN: llc -global-isel -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
5; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
6; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
7
8define float @v_roundeven_f32(float %x) {
9; GFX6-LABEL: v_roundeven_f32:
10; GFX6:       ; %bb.0:
11; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12; GFX6-NEXT:    v_rndne_f32_e32 v0, v0
13; GFX6-NEXT:    s_setpc_b64 s[30:31]
14;
15; GFX7-LABEL: v_roundeven_f32:
16; GFX7:       ; %bb.0:
17; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18; GFX7-NEXT:    v_rndne_f32_e32 v0, v0
19; GFX7-NEXT:    s_setpc_b64 s[30:31]
20;
21; GFX8-LABEL: v_roundeven_f32:
22; GFX8:       ; %bb.0:
23; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24; GFX8-NEXT:    v_rndne_f32_e32 v0, v0
25; GFX8-NEXT:    s_setpc_b64 s[30:31]
26;
27; GFX9-LABEL: v_roundeven_f32:
28; GFX9:       ; %bb.0:
29; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
31; GFX9-NEXT:    s_setpc_b64 s[30:31]
32;
33; GFX10-LABEL: v_roundeven_f32:
34; GFX10:       ; %bb.0:
35; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
37; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
38; GFX10-NEXT:    s_setpc_b64 s[30:31]
39  %roundeven = call float @llvm.roundeven.f32(float %x)
40  ret float %roundeven
41}
42
43define <2 x float> @v_roundeven_v2f32(<2 x float> %x) {
44; GFX6-LABEL: v_roundeven_v2f32:
45; GFX6:       ; %bb.0:
46; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47; GFX6-NEXT:    v_rndne_f32_e32 v0, v0
48; GFX6-NEXT:    v_rndne_f32_e32 v1, v1
49; GFX6-NEXT:    s_setpc_b64 s[30:31]
50;
51; GFX7-LABEL: v_roundeven_v2f32:
52; GFX7:       ; %bb.0:
53; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54; GFX7-NEXT:    v_rndne_f32_e32 v0, v0
55; GFX7-NEXT:    v_rndne_f32_e32 v1, v1
56; GFX7-NEXT:    s_setpc_b64 s[30:31]
57;
58; GFX8-LABEL: v_roundeven_v2f32:
59; GFX8:       ; %bb.0:
60; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61; GFX8-NEXT:    v_rndne_f32_e32 v0, v0
62; GFX8-NEXT:    v_rndne_f32_e32 v1, v1
63; GFX8-NEXT:    s_setpc_b64 s[30:31]
64;
65; GFX9-LABEL: v_roundeven_v2f32:
66; GFX9:       ; %bb.0:
67; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
69; GFX9-NEXT:    v_rndne_f32_e32 v1, v1
70; GFX9-NEXT:    s_setpc_b64 s[30:31]
71;
72; GFX10-LABEL: v_roundeven_v2f32:
73; GFX10:       ; %bb.0:
74; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
76; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
77; GFX10-NEXT:    v_rndne_f32_e32 v1, v1
78; GFX10-NEXT:    s_setpc_b64 s[30:31]
79  %roundeven = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %x)
80  ret <2 x float> %roundeven
81}
82
83define <3 x float> @v_roundeven_v3f32(<3 x float> %x) {
84; GFX6-LABEL: v_roundeven_v3f32:
85; GFX6:       ; %bb.0:
86; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
87; GFX6-NEXT:    v_rndne_f32_e32 v0, v0
88; GFX6-NEXT:    v_rndne_f32_e32 v1, v1
89; GFX6-NEXT:    v_rndne_f32_e32 v2, v2
90; GFX6-NEXT:    s_setpc_b64 s[30:31]
91;
92; GFX7-LABEL: v_roundeven_v3f32:
93; GFX7:       ; %bb.0:
94; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95; GFX7-NEXT:    v_rndne_f32_e32 v0, v0
96; GFX7-NEXT:    v_rndne_f32_e32 v1, v1
97; GFX7-NEXT:    v_rndne_f32_e32 v2, v2
98; GFX7-NEXT:    s_setpc_b64 s[30:31]
99;
100; GFX8-LABEL: v_roundeven_v3f32:
101; GFX8:       ; %bb.0:
102; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103; GFX8-NEXT:    v_rndne_f32_e32 v0, v0
104; GFX8-NEXT:    v_rndne_f32_e32 v1, v1
105; GFX8-NEXT:    v_rndne_f32_e32 v2, v2
106; GFX8-NEXT:    s_setpc_b64 s[30:31]
107;
108; GFX9-LABEL: v_roundeven_v3f32:
109; GFX9:       ; %bb.0:
110; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
112; GFX9-NEXT:    v_rndne_f32_e32 v1, v1
113; GFX9-NEXT:    v_rndne_f32_e32 v2, v2
114; GFX9-NEXT:    s_setpc_b64 s[30:31]
115;
116; GFX10-LABEL: v_roundeven_v3f32:
117; GFX10:       ; %bb.0:
118; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
119; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
120; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
121; GFX10-NEXT:    v_rndne_f32_e32 v1, v1
122; GFX10-NEXT:    v_rndne_f32_e32 v2, v2
123; GFX10-NEXT:    s_setpc_b64 s[30:31]
124  %roundeven = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x)
125  ret <3 x float> %roundeven
126}
127
128define <4 x float> @v_roundeven_v4f32(<4 x float> %x) {
129; GFX6-LABEL: v_roundeven_v4f32:
130; GFX6:       ; %bb.0:
131; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132; GFX6-NEXT:    v_rndne_f32_e32 v0, v0
133; GFX6-NEXT:    v_rndne_f32_e32 v1, v1
134; GFX6-NEXT:    v_rndne_f32_e32 v2, v2
135; GFX6-NEXT:    v_rndne_f32_e32 v3, v3
136; GFX6-NEXT:    s_setpc_b64 s[30:31]
137;
138; GFX7-LABEL: v_roundeven_v4f32:
139; GFX7:       ; %bb.0:
140; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141; GFX7-NEXT:    v_rndne_f32_e32 v0, v0
142; GFX7-NEXT:    v_rndne_f32_e32 v1, v1
143; GFX7-NEXT:    v_rndne_f32_e32 v2, v2
144; GFX7-NEXT:    v_rndne_f32_e32 v3, v3
145; GFX7-NEXT:    s_setpc_b64 s[30:31]
146;
147; GFX8-LABEL: v_roundeven_v4f32:
148; GFX8:       ; %bb.0:
149; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150; GFX8-NEXT:    v_rndne_f32_e32 v0, v0
151; GFX8-NEXT:    v_rndne_f32_e32 v1, v1
152; GFX8-NEXT:    v_rndne_f32_e32 v2, v2
153; GFX8-NEXT:    v_rndne_f32_e32 v3, v3
154; GFX8-NEXT:    s_setpc_b64 s[30:31]
155;
156; GFX9-LABEL: v_roundeven_v4f32:
157; GFX9:       ; %bb.0:
158; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
159; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
160; GFX9-NEXT:    v_rndne_f32_e32 v1, v1
161; GFX9-NEXT:    v_rndne_f32_e32 v2, v2
162; GFX9-NEXT:    v_rndne_f32_e32 v3, v3
163; GFX9-NEXT:    s_setpc_b64 s[30:31]
164;
165; GFX10-LABEL: v_roundeven_v4f32:
166; GFX10:       ; %bb.0:
167; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
168; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
169; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
170; GFX10-NEXT:    v_rndne_f32_e32 v1, v1
171; GFX10-NEXT:    v_rndne_f32_e32 v2, v2
172; GFX10-NEXT:    v_rndne_f32_e32 v3, v3
173; GFX10-NEXT:    s_setpc_b64 s[30:31]
174  %roundeven = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %x)
175  ret <4 x float> %roundeven
176}
177
178define half @v_roundeven_f16(half %x) {
179; GFX6-LABEL: v_roundeven_f16:
180; GFX6:       ; %bb.0:
181; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
183; GFX6-NEXT:    v_rndne_f32_e32 v0, v0
184; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
185; GFX6-NEXT:    s_setpc_b64 s[30:31]
186;
187; GFX7-LABEL: v_roundeven_f16:
188; GFX7:       ; %bb.0:
189; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
191; GFX7-NEXT:    v_rndne_f32_e32 v0, v0
192; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
193; GFX7-NEXT:    s_setpc_b64 s[30:31]
194;
195; GFX8-LABEL: v_roundeven_f16:
196; GFX8:       ; %bb.0:
197; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
198; GFX8-NEXT:    v_rndne_f16_e32 v0, v0
199; GFX8-NEXT:    s_setpc_b64 s[30:31]
200;
201; GFX9-LABEL: v_roundeven_f16:
202; GFX9:       ; %bb.0:
203; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
204; GFX9-NEXT:    v_rndne_f16_e32 v0, v0
205; GFX9-NEXT:    s_setpc_b64 s[30:31]
206;
207; GFX10-LABEL: v_roundeven_f16:
208; GFX10:       ; %bb.0:
209; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
211; GFX10-NEXT:    v_rndne_f16_e32 v0, v0
212; GFX10-NEXT:    s_setpc_b64 s[30:31]
213  %roundeven = call half @llvm.roundeven.f16(half %x)
214  ret half %roundeven
215}
216
217define <2 x half> @v_roundeven_v2f16(<2 x half> %x) {
218; GFX6-LABEL: v_roundeven_v2f16:
219; GFX6:       ; %bb.0:
220; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
221; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
222; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
223; GFX6-NEXT:    v_rndne_f32_e32 v0, v0
224; GFX6-NEXT:    v_rndne_f32_e32 v1, v1
225; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
226; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
227; GFX6-NEXT:    s_setpc_b64 s[30:31]
228;
229; GFX7-LABEL: v_roundeven_v2f16:
230; GFX7:       ; %bb.0:
231; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
233; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
234; GFX7-NEXT:    v_rndne_f32_e32 v0, v0
235; GFX7-NEXT:    v_rndne_f32_e32 v1, v1
236; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
237; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
238; GFX7-NEXT:    s_setpc_b64 s[30:31]
239;
240; GFX8-LABEL: v_roundeven_v2f16:
241; GFX8:       ; %bb.0:
242; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
243; GFX8-NEXT:    v_rndne_f16_e32 v1, v0
244; GFX8-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
245; GFX8-NEXT:    v_mov_b32_e32 v2, 16
246; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
247; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
248; GFX8-NEXT:    s_setpc_b64 s[30:31]
249;
250; GFX9-LABEL: v_roundeven_v2f16:
251; GFX9:       ; %bb.0:
252; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
253; GFX9-NEXT:    v_rndne_f16_e32 v1, v0
254; GFX9-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
255; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
256; GFX9-NEXT:    v_and_or_b32 v0, v1, v2, v0
257; GFX9-NEXT:    s_setpc_b64 s[30:31]
258;
259; GFX10-LABEL: v_roundeven_v2f16:
260; GFX10:       ; %bb.0:
261; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
262; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
263; GFX10-NEXT:    v_rndne_f16_e32 v1, v0
264; GFX10-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
265; GFX10-NEXT:    v_and_or_b32 v0, 0xffff, v1, v0
266; GFX10-NEXT:    s_setpc_b64 s[30:31]
267  %roundeven = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x)
268  ret <2 x half> %roundeven
269}
270
271define <2 x half> @v_roundeven_v2f16_fneg(<2 x half> %x) {
272; GFX6-LABEL: v_roundeven_v2f16_fneg:
273; GFX6:       ; %bb.0:
274; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
275; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
276; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
277; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
278; GFX6-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
279; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v0
280; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
281; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v0
282; GFX6-NEXT:    v_rndne_f32_e32 v0, v1
283; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
284; GFX6-NEXT:    v_rndne_f32_e32 v1, v2
285; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
286; GFX6-NEXT:    s_setpc_b64 s[30:31]
287;
288; GFX7-LABEL: v_roundeven_v2f16_fneg:
289; GFX7:       ; %bb.0:
290; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
291; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
292; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff, v0
293; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
294; GFX7-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
295; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v0
296; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
297; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v0
298; GFX7-NEXT:    v_rndne_f32_e32 v0, v1
299; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
300; GFX7-NEXT:    v_rndne_f32_e32 v1, v2
301; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
302; GFX7-NEXT:    s_setpc_b64 s[30:31]
303;
304; GFX8-LABEL: v_roundeven_v2f16_fneg:
305; GFX8:       ; %bb.0:
306; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
307; GFX8-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
308; GFX8-NEXT:    v_rndne_f16_e32 v1, v0
309; GFX8-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
310; GFX8-NEXT:    v_mov_b32_e32 v2, 16
311; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
312; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
313; GFX8-NEXT:    s_setpc_b64 s[30:31]
314;
315; GFX9-LABEL: v_roundeven_v2f16_fneg:
316; GFX9:       ; %bb.0:
317; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
318; GFX9-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
319; GFX9-NEXT:    v_rndne_f16_e32 v1, v0
320; GFX9-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
321; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
322; GFX9-NEXT:    v_and_or_b32 v0, v1, v2, v0
323; GFX9-NEXT:    s_setpc_b64 s[30:31]
324;
325; GFX10-LABEL: v_roundeven_v2f16_fneg:
326; GFX10:       ; %bb.0:
327; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
328; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
329; GFX10-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
330; GFX10-NEXT:    v_rndne_f16_e32 v1, v0
331; GFX10-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
332; GFX10-NEXT:    v_and_or_b32 v0, 0xffff, v1, v0
333; GFX10-NEXT:    s_setpc_b64 s[30:31]
334  %x.fneg = fneg <2 x half> %x
335  %roundeven = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x.fneg)
336  ret <2 x half> %roundeven
337}
338
339define <4 x half> @v_roundeven_v4f16(<4 x half> %x) {
340; GFX6-LABEL: v_roundeven_v4f16:
341; GFX6:       ; %bb.0:
342; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
344; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
345; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
346; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
347; GFX6-NEXT:    v_rndne_f32_e32 v0, v0
348; GFX6-NEXT:    v_rndne_f32_e32 v1, v1
349; GFX6-NEXT:    v_rndne_f32_e32 v2, v2
350; GFX6-NEXT:    v_rndne_f32_e32 v3, v3
351; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
352; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
353; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
354; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
355; GFX6-NEXT:    s_setpc_b64 s[30:31]
356;
357; GFX7-LABEL: v_roundeven_v4f16:
358; GFX7:       ; %bb.0:
359; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
360; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
361; GFX7-NEXT:    v_cvt_f32_f16_e32 v1, v1
362; GFX7-NEXT:    v_cvt_f32_f16_e32 v2, v2
363; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v3
364; GFX7-NEXT:    v_rndne_f32_e32 v0, v0
365; GFX7-NEXT:    v_rndne_f32_e32 v1, v1
366; GFX7-NEXT:    v_rndne_f32_e32 v2, v2
367; GFX7-NEXT:    v_rndne_f32_e32 v3, v3
368; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
369; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
370; GFX7-NEXT:    v_cvt_f16_f32_e32 v2, v2
371; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
372; GFX7-NEXT:    s_setpc_b64 s[30:31]
373;
374; GFX8-LABEL: v_roundeven_v4f16:
375; GFX8:       ; %bb.0:
376; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377; GFX8-NEXT:    v_rndne_f16_e32 v2, v0
378; GFX8-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
379; GFX8-NEXT:    v_rndne_f16_e32 v3, v1
380; GFX8-NEXT:    v_rndne_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
381; GFX8-NEXT:    v_mov_b32_e32 v4, 16
382; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
383; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
384; GFX8-NEXT:    v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
385; GFX8-NEXT:    v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
386; GFX8-NEXT:    s_setpc_b64 s[30:31]
387;
388; GFX9-LABEL: v_roundeven_v4f16:
389; GFX9:       ; %bb.0:
390; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
391; GFX9-NEXT:    v_rndne_f16_e32 v2, v0
392; GFX9-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
393; GFX9-NEXT:    v_rndne_f16_e32 v3, v1
394; GFX9-NEXT:    v_rndne_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
395; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
396; GFX9-NEXT:    v_and_or_b32 v0, v2, v4, v0
397; GFX9-NEXT:    v_and_or_b32 v1, v3, v4, v1
398; GFX9-NEXT:    s_setpc_b64 s[30:31]
399;
400; GFX10-LABEL: v_roundeven_v4f16:
401; GFX10:       ; %bb.0:
402; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
403; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
404; GFX10-NEXT:    v_rndne_f16_e32 v2, v0
405; GFX10-NEXT:    v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
406; GFX10-NEXT:    v_rndne_f16_e32 v3, v1
407; GFX10-NEXT:    v_mov_b32_e32 v4, 0xffff
408; GFX10-NEXT:    v_rndne_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
409; GFX10-NEXT:    v_and_or_b32 v0, v2, v4, v0
410; GFX10-NEXT:    v_and_or_b32 v1, v3, v4, v1
411; GFX10-NEXT:    s_setpc_b64 s[30:31]
412  %roundeven = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %x)
413  ret <4 x half> %roundeven
414}
415
416
417define float @v_roundeven_f32_fabs(float %x) {
418; GFX6-LABEL: v_roundeven_f32_fabs:
419; GFX6:       ; %bb.0:
420; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
421; GFX6-NEXT:    v_rndne_f32_e64 v0, |v0|
422; GFX6-NEXT:    s_setpc_b64 s[30:31]
423;
424; GFX7-LABEL: v_roundeven_f32_fabs:
425; GFX7:       ; %bb.0:
426; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
427; GFX7-NEXT:    v_rndne_f32_e64 v0, |v0|
428; GFX7-NEXT:    s_setpc_b64 s[30:31]
429;
430; GFX8-LABEL: v_roundeven_f32_fabs:
431; GFX8:       ; %bb.0:
432; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
433; GFX8-NEXT:    v_rndne_f32_e64 v0, |v0|
434; GFX8-NEXT:    s_setpc_b64 s[30:31]
435;
436; GFX9-LABEL: v_roundeven_f32_fabs:
437; GFX9:       ; %bb.0:
438; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
439; GFX9-NEXT:    v_rndne_f32_e64 v0, |v0|
440; GFX9-NEXT:    s_setpc_b64 s[30:31]
441;
442; GFX10-LABEL: v_roundeven_f32_fabs:
443; GFX10:       ; %bb.0:
444; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
445; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
446; GFX10-NEXT:    v_rndne_f32_e64 v0, |v0|
447; GFX10-NEXT:    s_setpc_b64 s[30:31]
448  %fabs.x = call float @llvm.fabs.f32(float %x)
449  %roundeven = call float @llvm.roundeven.f32(float %fabs.x)
450  ret float %roundeven
451}
452
453define amdgpu_ps float @s_roundeven_f32(float inreg %x) {
454; GFX6-LABEL: s_roundeven_f32:
455; GFX6:       ; %bb.0:
456; GFX6-NEXT:    v_rndne_f32_e32 v0, s0
457; GFX6-NEXT:    ; return to shader part epilog
458;
459; GFX7-LABEL: s_roundeven_f32:
460; GFX7:       ; %bb.0:
461; GFX7-NEXT:    v_rndne_f32_e32 v0, s0
462; GFX7-NEXT:    ; return to shader part epilog
463;
464; GFX8-LABEL: s_roundeven_f32:
465; GFX8:       ; %bb.0:
466; GFX8-NEXT:    v_rndne_f32_e32 v0, s0
467; GFX8-NEXT:    ; return to shader part epilog
468;
469; GFX9-LABEL: s_roundeven_f32:
470; GFX9:       ; %bb.0:
471; GFX9-NEXT:    v_rndne_f32_e32 v0, s0
472; GFX9-NEXT:    ; return to shader part epilog
473;
474; GFX10-LABEL: s_roundeven_f32:
475; GFX10:       ; %bb.0:
476; GFX10-NEXT:    v_rndne_f32_e32 v0, s0
477; GFX10-NEXT:    ; return to shader part epilog
478  %roundeven = call float @llvm.roundeven.f32(float %x)
479  ret float %roundeven
480}
481
482define float @v_roundeven_f32_fneg(float %x) {
483; GFX6-LABEL: v_roundeven_f32_fneg:
484; GFX6:       ; %bb.0:
485; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
486; GFX6-NEXT:    v_rndne_f32_e64 v0, -v0
487; GFX6-NEXT:    s_setpc_b64 s[30:31]
488;
489; GFX7-LABEL: v_roundeven_f32_fneg:
490; GFX7:       ; %bb.0:
491; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492; GFX7-NEXT:    v_rndne_f32_e64 v0, -v0
493; GFX7-NEXT:    s_setpc_b64 s[30:31]
494;
495; GFX8-LABEL: v_roundeven_f32_fneg:
496; GFX8:       ; %bb.0:
497; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
498; GFX8-NEXT:    v_rndne_f32_e64 v0, -v0
499; GFX8-NEXT:    s_setpc_b64 s[30:31]
500;
501; GFX9-LABEL: v_roundeven_f32_fneg:
502; GFX9:       ; %bb.0:
503; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
504; GFX9-NEXT:    v_rndne_f32_e64 v0, -v0
505; GFX9-NEXT:    s_setpc_b64 s[30:31]
506;
507; GFX10-LABEL: v_roundeven_f32_fneg:
508; GFX10:       ; %bb.0:
509; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
510; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
511; GFX10-NEXT:    v_rndne_f32_e64 v0, -v0
512; GFX10-NEXT:    s_setpc_b64 s[30:31]
513  %neg.x = fneg float %x
514  %roundeven = call float @llvm.roundeven.f32(float %neg.x)
515  ret float %roundeven
516}
517
518define double @v_roundeven_f64(double %x) {
519; GFX6-LABEL: v_roundeven_f64:
520; GFX6:       ; %bb.0:
521; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
522; GFX6-NEXT:    v_and_b32_e32 v3, 0x80000000, v1
523; GFX6-NEXT:    v_mov_b32_e32 v2, 0
524; GFX6-NEXT:    v_or_b32_e32 v3, 0x43300000, v3
525; GFX6-NEXT:    v_add_f64 v[4:5], v[0:1], v[2:3]
526; GFX6-NEXT:    s_mov_b32 s4, -1
527; GFX6-NEXT:    s_mov_b32 s5, 0x432fffff
528; GFX6-NEXT:    v_add_f64 v[2:3], v[4:5], -v[2:3]
529; GFX6-NEXT:    v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
530; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
531; GFX6-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
532; GFX6-NEXT:    s_setpc_b64 s[30:31]
533;
534; GFX7-LABEL: v_roundeven_f64:
535; GFX7:       ; %bb.0:
536; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
537; GFX7-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
538; GFX7-NEXT:    s_setpc_b64 s[30:31]
539;
540; GFX8-LABEL: v_roundeven_f64:
541; GFX8:       ; %bb.0:
542; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
543; GFX8-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
544; GFX8-NEXT:    s_setpc_b64 s[30:31]
545;
546; GFX9-LABEL: v_roundeven_f64:
547; GFX9:       ; %bb.0:
548; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
549; GFX9-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
550; GFX9-NEXT:    s_setpc_b64 s[30:31]
551;
552; GFX10-LABEL: v_roundeven_f64:
553; GFX10:       ; %bb.0:
554; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
555; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
556; GFX10-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
557; GFX10-NEXT:    s_setpc_b64 s[30:31]
558  %roundeven = call double @llvm.roundeven.f64(double %x)
559  ret double %roundeven
560}
561
562define double @v_roundeven_f64_fneg(double %x) {
563; GFX6-LABEL: v_roundeven_f64_fneg:
564; GFX6:       ; %bb.0:
565; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566; GFX6-NEXT:    v_xor_b32_e32 v6, 0x80000000, v1
567; GFX6-NEXT:    v_and_b32_e32 v3, 0x80000000, v6
568; GFX6-NEXT:    v_mov_b32_e32 v2, 0
569; GFX6-NEXT:    v_or_b32_e32 v3, 0x43300000, v3
570; GFX6-NEXT:    v_add_f64 v[4:5], -v[0:1], v[2:3]
571; GFX6-NEXT:    s_mov_b32 s4, -1
572; GFX6-NEXT:    s_mov_b32 s5, 0x432fffff
573; GFX6-NEXT:    v_add_f64 v[2:3], v[4:5], -v[2:3]
574; GFX6-NEXT:    v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
575; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
576; GFX6-NEXT:    v_cndmask_b32_e32 v1, v3, v6, vcc
577; GFX6-NEXT:    s_setpc_b64 s[30:31]
578;
579; GFX7-LABEL: v_roundeven_f64_fneg:
580; GFX7:       ; %bb.0:
581; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
582; GFX7-NEXT:    v_rndne_f64_e64 v[0:1], -v[0:1]
583; GFX7-NEXT:    s_setpc_b64 s[30:31]
584;
585; GFX8-LABEL: v_roundeven_f64_fneg:
586; GFX8:       ; %bb.0:
587; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
588; GFX8-NEXT:    v_rndne_f64_e64 v[0:1], -v[0:1]
589; GFX8-NEXT:    s_setpc_b64 s[30:31]
590;
591; GFX9-LABEL: v_roundeven_f64_fneg:
592; GFX9:       ; %bb.0:
593; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
594; GFX9-NEXT:    v_rndne_f64_e64 v[0:1], -v[0:1]
595; GFX9-NEXT:    s_setpc_b64 s[30:31]
596;
597; GFX10-LABEL: v_roundeven_f64_fneg:
598; GFX10:       ; %bb.0:
599; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
600; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
601; GFX10-NEXT:    v_rndne_f64_e64 v[0:1], -v[0:1]
602; GFX10-NEXT:    s_setpc_b64 s[30:31]
603  %neg.x = fneg double %x
604  %roundeven = call double @llvm.roundeven.f64(double %neg.x)
605  ret double %roundeven
606}
607
608define <2 x double> @v_roundeven_v2f64(<2 x double> %x) {
609; GFX6-LABEL: v_roundeven_v2f64:
610; GFX6:       ; %bb.0:
611; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
612; GFX6-NEXT:    s_brev_b32 s6, 1
613; GFX6-NEXT:    s_mov_b32 s7, 0x43300000
614; GFX6-NEXT:    v_and_b32_e32 v5, s6, v1
615; GFX6-NEXT:    v_mov_b32_e32 v4, 0
616; GFX6-NEXT:    v_or_b32_e32 v5, s7, v5
617; GFX6-NEXT:    v_add_f64 v[6:7], v[0:1], v[4:5]
618; GFX6-NEXT:    s_mov_b32 s4, -1
619; GFX6-NEXT:    s_mov_b32 s5, 0x432fffff
620; GFX6-NEXT:    v_add_f64 v[5:6], v[6:7], -v[4:5]
621; GFX6-NEXT:    v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]
622; GFX6-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
623; GFX6-NEXT:    v_and_b32_e32 v5, s6, v3
624; GFX6-NEXT:    v_or_b32_e32 v5, s7, v5
625; GFX6-NEXT:    v_add_f64 v[7:8], v[2:3], v[4:5]
626; GFX6-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
627; GFX6-NEXT:    v_add_f64 v[4:5], v[7:8], -v[4:5]
628; GFX6-NEXT:    v_cmp_gt_f64_e64 vcc, |v[2:3]|, s[4:5]
629; GFX6-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
630; GFX6-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
631; GFX6-NEXT:    s_setpc_b64 s[30:31]
632;
633; GFX7-LABEL: v_roundeven_v2f64:
634; GFX7:       ; %bb.0:
635; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
636; GFX7-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
637; GFX7-NEXT:    v_rndne_f64_e32 v[2:3], v[2:3]
638; GFX7-NEXT:    s_setpc_b64 s[30:31]
639;
640; GFX8-LABEL: v_roundeven_v2f64:
641; GFX8:       ; %bb.0:
642; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
643; GFX8-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
644; GFX8-NEXT:    v_rndne_f64_e32 v[2:3], v[2:3]
645; GFX8-NEXT:    s_setpc_b64 s[30:31]
646;
647; GFX9-LABEL: v_roundeven_v2f64:
648; GFX9:       ; %bb.0:
649; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
650; GFX9-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
651; GFX9-NEXT:    v_rndne_f64_e32 v[2:3], v[2:3]
652; GFX9-NEXT:    s_setpc_b64 s[30:31]
653;
654; GFX10-LABEL: v_roundeven_v2f64:
655; GFX10:       ; %bb.0:
656; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
657; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
658; GFX10-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
659; GFX10-NEXT:    v_rndne_f64_e32 v[2:3], v[2:3]
660; GFX10-NEXT:    s_setpc_b64 s[30:31]
661  %roundeven = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %x)
662  ret <2 x double> %roundeven
663}
664
665declare half @llvm.roundeven.f16(half) #0
666declare <2 x half> @llvm.roundeven.v2f16(<2 x half>) #0
667declare <4 x half> @llvm.roundeven.v4f16(<4 x half>) #0
668
669declare float @llvm.roundeven.f32(float) #0
670declare <2 x float> @llvm.roundeven.v2f32(<2 x float>) #0
671declare <3 x float> @llvm.roundeven.v3f32(<3 x float>) #0
672declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) #0
673
674declare double @llvm.roundeven.f64(double) #0
675declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) #0
676
677declare half @llvm.fabs.f16(half) #0
678declare float @llvm.fabs.f32(float) #0
679
680attributes #0 = { nounwind readnone speculatable willreturn }
681