1; RUN: llc -aarch64-sve-vector-bits-min=128  -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
2; RUN: llc -aarch64-sve-vector-bits-min=256  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
3; RUN: llc -aarch64-sve-vector-bits-min=384  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
4; RUN: llc -aarch64-sve-vector-bits-min=512  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5; RUN: llc -aarch64-sve-vector-bits-min=640  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6; RUN: llc -aarch64-sve-vector-bits-min=768  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
7; RUN: llc -aarch64-sve-vector-bits-min=896  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
8; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
9; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
10; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
11; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
12; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
13; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
14; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
15; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
16; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
17
18target triple = "aarch64-unknown-linux-gnu"
19
20; Don't use SVE when its registers are no bigger than NEON.
21; NO_SVE-NOT: ptrue
22
23;
24; CEIL -> FRINTP
25;
26
27; Don't use SVE for 64-bit vectors.
28define <4 x half> @frintp_v4f16(<4 x half> %op) #0 {
29; CHECK-LABEL: frintp_v4f16:
30; CHECK: frintp v0.4h, v0.4h
31; CHECK-NEXT: ret
32  %res = call <4 x half> @llvm.ceil.v4f16(<4 x half> %op)
33  ret <4 x half> %res
34}
35
36; Don't use SVE for 128-bit vectors.
37define <8 x half> @frintp_v8f16(<8 x half> %op) #0 {
38; CHECK-LABEL: frintp_v8f16:
39; CHECK: frintp v0.8h, v0.8h
40; CHECK-NEXT: ret
41  %res = call <8 x half> @llvm.ceil.v8f16(<8 x half> %op)
42  ret <8 x half> %res
43}
44
45define void @frintp_v16f16(<16 x half>* %a) #0 {
46; CHECK-LABEL: frintp_v16f16:
47; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
48; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
49; CHECK-NEXT: frintp [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
50; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
51; CHECK-NEXT: ret
52  %op = load <16 x half>, <16 x half>* %a
53  %res = call <16 x half> @llvm.ceil.v16f16(<16 x half> %op)
54  store <16 x half> %res, <16 x half>* %a
55  ret void
56}
57
58define void @frintp_v32f16(<32 x half>* %a) #0 {
59; CHECK-LABEL: frintp_v32f16:
60; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
61; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
62; VBITS_GE_512-NEXT: frintp [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
63; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
64; VBITS_GE_512-NEXT: ret
65
66; Ensure sensible type legalisation.
67; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
68; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
69; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
70; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
71; VBITS_EQ_256-DAG: frintp [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
72; VBITS_EQ_256-DAG: frintp [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
73; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
74; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
75; VBITS_EQ_256-NEXT: ret
76  %op = load <32 x half>, <32 x half>* %a
77  %res = call <32 x half> @llvm.ceil.v32f16(<32 x half> %op)
78  store <32 x half> %res, <32 x half>* %a
79  ret void
80}
81
82define void @frintp_v64f16(<64 x half>* %a) #0 {
83; CHECK-LABEL: frintp_v64f16:
84; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
85; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
86; VBITS_GE_1024-NEXT: frintp [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
87; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
88; VBITS_GE_1024-NEXT: ret
89  %op = load <64 x half>, <64 x half>* %a
90  %res = call <64 x half> @llvm.ceil.v64f16(<64 x half> %op)
91  store <64 x half> %res, <64 x half>* %a
92  ret void
93}
94
95define void @frintp_v128f16(<128 x half>* %a) #0 {
96; CHECK-LABEL: frintp_v128f16:
97; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
98; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
99; VBITS_GE_2048-NEXT: frintp [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
100; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
101; VBITS_GE_2048-NEXT: ret
102  %op = load <128 x half>, <128 x half>* %a
103  %res = call <128 x half> @llvm.ceil.v128f16(<128 x half> %op)
104  store <128 x half> %res, <128 x half>* %a
105  ret void
106}
107
108; Don't use SVE for 64-bit vectors.
109define <2 x float> @frintp_v2f32(<2 x float> %op) #0 {
110; CHECK-LABEL: frintp_v2f32:
111; CHECK: frintp v0.2s, v0.2s
112; CHECK-NEXT: ret
113  %res = call <2 x float> @llvm.ceil.v2f32(<2 x float> %op)
114  ret <2 x float> %res
115}
116
117; Don't use SVE for 128-bit vectors.
118define <4 x float> @frintp_v4f32(<4 x float> %op) #0 {
119; CHECK-LABEL: frintp_v4f32:
120; CHECK: frintp v0.4s, v0.4s
121; CHECK-NEXT: ret
122  %res = call <4 x float> @llvm.ceil.v4f32(<4 x float> %op)
123  ret <4 x float> %res
124}
125
126define void @frintp_v8f32(<8 x float>* %a) #0 {
127; CHECK-LABEL: frintp_v8f32:
128; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
129; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
130; CHECK-NEXT: frintp [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
131; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
132; CHECK-NEXT: ret
133  %op = load <8 x float>, <8 x float>* %a
134  %res = call <8 x float> @llvm.ceil.v8f32(<8 x float> %op)
135  store <8 x float> %res, <8 x float>* %a
136  ret void
137}
138
139define void @frintp_v16f32(<16 x float>* %a) #0 {
140; CHECK-LABEL: frintp_v16f32:
141; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
142; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
143; VBITS_GE_512-NEXT: frintp [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
144; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
145; VBITS_GE_512-NEXT: ret
146
147; Ensure sensible type legalisation.
148; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
149; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
150; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
151; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
152; VBITS_EQ_256-DAG: frintp [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
153; VBITS_EQ_256-DAG: frintp [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
154; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
155; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
156; VBITS_EQ_256-NEXT: ret
157  %op = load <16 x float>, <16 x float>* %a
158  %res = call <16 x float> @llvm.ceil.v16f32(<16 x float> %op)
159  store <16 x float> %res, <16 x float>* %a
160  ret void
161}
162
163define void @frintp_v32f32(<32 x float>* %a) #0 {
164; CHECK-LABEL: frintp_v32f32:
165; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
166; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
167; VBITS_GE_1024-NEXT: frintp [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
168; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
169; VBITS_GE_1024-NEXT: ret
170  %op = load <32 x float>, <32 x float>* %a
171  %res = call <32 x float> @llvm.ceil.v32f32(<32 x float> %op)
172  store <32 x float> %res, <32 x float>* %a
173  ret void
174}
175
176define void @frintp_v64f32(<64 x float>* %a) #0 {
177; CHECK-LABEL: frintp_v64f32:
178; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
179; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
180; VBITS_GE_2048-NEXT: frintp [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
181; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
182; VBITS_GE_2048-NEXT: ret
183  %op = load <64 x float>, <64 x float>* %a
184  %res = call <64 x float> @llvm.ceil.v64f32(<64 x float> %op)
185  store <64 x float> %res, <64 x float>* %a
186  ret void
187}
188
189; Don't use SVE for 64-bit vectors.
190define <1 x double> @frintp_v1f64(<1 x double> %op) #0 {
191; CHECK-LABEL: frintp_v1f64:
192; CHECK: frintp d0, d0
193; CHECK-NEXT: ret
194  %res = call <1 x double> @llvm.ceil.v1f64(<1 x double> %op)
195  ret <1 x double> %res
196}
197
198; Don't use SVE for 128-bit vectors.
199define <2 x double> @frintp_v2f64(<2 x double> %op) #0 {
200; CHECK-LABEL: frintp_v2f64:
201; CHECK: frintp v0.2d, v0.2d
202; CHECK-NEXT: ret
203  %res = call <2 x double> @llvm.ceil.v2f64(<2 x double> %op)
204  ret <2 x double> %res
205}
206
207define void @frintp_v4f64(<4 x double>* %a) #0 {
208; CHECK-LABEL: frintp_v4f64:
209; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
210; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
211; CHECK-NEXT: frintp [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
212; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
213; CHECK-NEXT: ret
214  %op = load <4 x double>, <4 x double>* %a
215  %res = call <4 x double> @llvm.ceil.v4f64(<4 x double> %op)
216  store <4 x double> %res, <4 x double>* %a
217  ret void
218}
219
220define void @frintp_v8f64(<8 x double>* %a) #0 {
221; CHECK-LABEL: frintp_v8f64:
222; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
223; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
224; VBITS_GE_512-NEXT: frintp [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
225; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
226; VBITS_GE_512-NEXT: ret
227
228; Ensure sensible type legalisation.
229; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
230; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
231; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
232; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
233; VBITS_EQ_256-DAG: frintp [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
234; VBITS_EQ_256-DAG: frintp [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
235; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
236; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
237; VBITS_EQ_256-NEXT: ret
238  %op = load <8 x double>, <8 x double>* %a
239  %res = call <8 x double> @llvm.ceil.v8f64(<8 x double> %op)
240  store <8 x double> %res, <8 x double>* %a
241  ret void
242}
243
244define void @frintp_v16f64(<16 x double>* %a) #0 {
245; CHECK-LABEL: frintp_v16f64:
246; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
247; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
248; VBITS_GE_1024-NEXT: frintp [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
249; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
250; VBITS_GE_1024-NEXT: ret
251  %op = load <16 x double>, <16 x double>* %a
252  %res = call <16 x double> @llvm.ceil.v16f64(<16 x double> %op)
253  store <16 x double> %res, <16 x double>* %a
254  ret void
255}
256
257define void @frintp_v32f64(<32 x double>* %a) #0 {
258; CHECK-LABEL: frintp_v32f64:
259; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
260; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
261; VBITS_GE_2048-NEXT: frintp [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
262; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
263; VBITS_GE_2048-NEXT: ret
264  %op = load <32 x double>, <32 x double>* %a
265  %res = call <32 x double> @llvm.ceil.v32f64(<32 x double> %op)
266  store <32 x double> %res, <32 x double>* %a
267  ret void
268}
269
270;
271; FLOOR -> FRINTM
272;
273
274; Don't use SVE for 64-bit vectors.
275define <4 x half> @frintm_v4f16(<4 x half> %op) #0 {
276; CHECK-LABEL: frintm_v4f16:
277; CHECK: frintm v0.4h, v0.4h
278; CHECK-NEXT: ret
279  %res = call <4 x half> @llvm.floor.v4f16(<4 x half> %op)
280  ret <4 x half> %res
281}
282
283; Don't use SVE for 128-bit vectors.
284define <8 x half> @frintm_v8f16(<8 x half> %op) #0 {
285; CHECK-LABEL: frintm_v8f16:
286; CHECK: frintm v0.8h, v0.8h
287; CHECK-NEXT: ret
288  %res = call <8 x half> @llvm.floor.v8f16(<8 x half> %op)
289  ret <8 x half> %res
290}
291
292define void @frintm_v16f16(<16 x half>* %a) #0 {
293; CHECK-LABEL: frintm_v16f16:
294; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
295; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
296; CHECK-NEXT: frintm [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
297; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
298; CHECK-NEXT: ret
299  %op = load <16 x half>, <16 x half>* %a
300  %res = call <16 x half> @llvm.floor.v16f16(<16 x half> %op)
301  store <16 x half> %res, <16 x half>* %a
302  ret void
303}
304
305define void @frintm_v32f16(<32 x half>* %a) #0 {
306; CHECK-LABEL: frintm_v32f16:
307; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
308; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
309; VBITS_GE_512-NEXT: frintm [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
310; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
311; VBITS_GE_512-NEXT: ret
312
313; Ensure sensible type legalisation.
314; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
315; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
316; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
317; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
318; VBITS_EQ_256-DAG: frintm [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
319; VBITS_EQ_256-DAG: frintm [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
320; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
321; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
322; VBITS_EQ_256-NEXT: ret
323  %op = load <32 x half>, <32 x half>* %a
324  %res = call <32 x half> @llvm.floor.v32f16(<32 x half> %op)
325  store <32 x half> %res, <32 x half>* %a
326  ret void
327}
328
329define void @frintm_v64f16(<64 x half>* %a) #0 {
330; CHECK-LABEL: frintm_v64f16:
331; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
332; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
333; VBITS_GE_1024-NEXT: frintm [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
334; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
335; VBITS_GE_1024-NEXT: ret
336  %op = load <64 x half>, <64 x half>* %a
337  %res = call <64 x half> @llvm.floor.v64f16(<64 x half> %op)
338  store <64 x half> %res, <64 x half>* %a
339  ret void
340}
341
342define void @frintm_v128f16(<128 x half>* %a) #0 {
343; CHECK-LABEL: frintm_v128f16:
344; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
345; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
346; VBITS_GE_2048-NEXT: frintm [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
347; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
348; VBITS_GE_2048-NEXT: ret
349  %op = load <128 x half>, <128 x half>* %a
350  %res = call <128 x half> @llvm.floor.v128f16(<128 x half> %op)
351  store <128 x half> %res, <128 x half>* %a
352  ret void
353}
354
355; Don't use SVE for 64-bit vectors.
356define <2 x float> @frintm_v2f32(<2 x float> %op) #0 {
357; CHECK-LABEL: frintm_v2f32:
358; CHECK: frintm v0.2s, v0.2s
359; CHECK-NEXT: ret
360  %res = call <2 x float> @llvm.floor.v2f32(<2 x float> %op)
361  ret <2 x float> %res
362}
363
364; Don't use SVE for 128-bit vectors.
365define <4 x float> @frintm_v4f32(<4 x float> %op) #0 {
366; CHECK-LABEL: frintm_v4f32:
367; CHECK: frintm v0.4s, v0.4s
368; CHECK-NEXT: ret
369  %res = call <4 x float> @llvm.floor.v4f32(<4 x float> %op)
370  ret <4 x float> %res
371}
372
373define void @frintm_v8f32(<8 x float>* %a) #0 {
374; CHECK-LABEL: frintm_v8f32:
375; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
376; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
377; CHECK-NEXT: frintm [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
378; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
379; CHECK-NEXT: ret
380  %op = load <8 x float>, <8 x float>* %a
381  %res = call <8 x float> @llvm.floor.v8f32(<8 x float> %op)
382  store <8 x float> %res, <8 x float>* %a
383  ret void
384}
385
386define void @frintm_v16f32(<16 x float>* %a) #0 {
387; CHECK-LABEL: frintm_v16f32:
388; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
389; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
390; VBITS_GE_512-NEXT: frintm [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
391; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
392; VBITS_GE_512-NEXT: ret
393
394; Ensure sensible type legalisation.
395; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
396; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
397; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
398; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
399; VBITS_EQ_256-DAG: frintm [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
400; VBITS_EQ_256-DAG: frintm [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
401; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
402; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
403; VBITS_EQ_256-NEXT: ret
404  %op = load <16 x float>, <16 x float>* %a
405  %res = call <16 x float> @llvm.floor.v16f32(<16 x float> %op)
406  store <16 x float> %res, <16 x float>* %a
407  ret void
408}
409
410define void @frintm_v32f32(<32 x float>* %a) #0 {
411; CHECK-LABEL: frintm_v32f32:
412; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
413; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
414; VBITS_GE_1024-NEXT: frintm [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
415; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
416; VBITS_GE_1024-NEXT: ret
417  %op = load <32 x float>, <32 x float>* %a
418  %res = call <32 x float> @llvm.floor.v32f32(<32 x float> %op)
419  store <32 x float> %res, <32 x float>* %a
420  ret void
421}
422
423define void @frintm_v64f32(<64 x float>* %a) #0 {
424; CHECK-LABEL: frintm_v64f32:
425; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
426; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
427; VBITS_GE_2048-NEXT: frintm [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
428; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
429; VBITS_GE_2048-NEXT: ret
430  %op = load <64 x float>, <64 x float>* %a
431  %res = call <64 x float> @llvm.floor.v64f32(<64 x float> %op)
432  store <64 x float> %res, <64 x float>* %a
433  ret void
434}
435
436; Don't use SVE for 64-bit vectors.
437define <1 x double> @frintm_v1f64(<1 x double> %op) #0 {
438; CHECK-LABEL: frintm_v1f64:
439; CHECK: frintm d0, d0
440; CHECK-NEXT: ret
441  %res = call <1 x double> @llvm.floor.v1f64(<1 x double> %op)
442  ret <1 x double> %res
443}
444
445; Don't use SVE for 128-bit vectors.
446define <2 x double> @frintm_v2f64(<2 x double> %op) #0 {
447; CHECK-LABEL: frintm_v2f64:
448; CHECK: frintm v0.2d, v0.2d
449; CHECK-NEXT: ret
450  %res = call <2 x double> @llvm.floor.v2f64(<2 x double> %op)
451  ret <2 x double> %res
452}
453
454define void @frintm_v4f64(<4 x double>* %a) #0 {
455; CHECK-LABEL: frintm_v4f64:
456; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
457; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
458; CHECK-NEXT: frintm [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
459; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
460; CHECK-NEXT: ret
461  %op = load <4 x double>, <4 x double>* %a
462  %res = call <4 x double> @llvm.floor.v4f64(<4 x double> %op)
463  store <4 x double> %res, <4 x double>* %a
464  ret void
465}
466
467define void @frintm_v8f64(<8 x double>* %a) #0 {
468; CHECK-LABEL: frintm_v8f64:
469; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
470; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
471; VBITS_GE_512-NEXT: frintm [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
472; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
473; VBITS_GE_512-NEXT: ret
474
475; Ensure sensible type legalisation.
476; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
477; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
478; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
479; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
480; VBITS_EQ_256-DAG: frintm [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
481; VBITS_EQ_256-DAG: frintm [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
482; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
483; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
484; VBITS_EQ_256-NEXT: ret
485  %op = load <8 x double>, <8 x double>* %a
486  %res = call <8 x double> @llvm.floor.v8f64(<8 x double> %op)
487  store <8 x double> %res, <8 x double>* %a
488  ret void
489}
490
491define void @frintm_v16f64(<16 x double>* %a) #0 {
492; CHECK-LABEL: frintm_v16f64:
493; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
494; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
495; VBITS_GE_1024-NEXT: frintm [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
496; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
497; VBITS_GE_1024-NEXT: ret
498  %op = load <16 x double>, <16 x double>* %a
499  %res = call <16 x double> @llvm.floor.v16f64(<16 x double> %op)
500  store <16 x double> %res, <16 x double>* %a
501  ret void
502}
503
504define void @frintm_v32f64(<32 x double>* %a) #0 {
505; CHECK-LABEL: frintm_v32f64:
506; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
507; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
508; VBITS_GE_2048-NEXT: frintm [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
509; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
510; VBITS_GE_2048-NEXT: ret
511  %op = load <32 x double>, <32 x double>* %a
512  %res = call <32 x double> @llvm.floor.v32f64(<32 x double> %op)
513  store <32 x double> %res, <32 x double>* %a
514  ret void
515}
516
517;
518; FNEARBYINT -> FRINTI
519;
520
521; Don't use SVE for 64-bit vectors.
522define <4 x half> @frinti_v4f16(<4 x half> %op) #0 {
523; CHECK-LABEL: frinti_v4f16:
524; CHECK: frinti v0.4h, v0.4h
525; CHECK-NEXT: ret
526  %res = call <4 x half> @llvm.nearbyint.v4f16(<4 x half> %op)
527  ret <4 x half> %res
528}
529
530; Don't use SVE for 128-bit vectors.
531define <8 x half> @frinti_v8f16(<8 x half> %op) #0 {
532; CHECK-LABEL: frinti_v8f16:
533; CHECK: frinti v0.8h, v0.8h
534; CHECK-NEXT: ret
535  %res = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %op)
536  ret <8 x half> %res
537}
538
539define void @frinti_v16f16(<16 x half>* %a) #0 {
540; CHECK-LABEL: frinti_v16f16:
541; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
542; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
543; CHECK-NEXT: frinti [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
544; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
545; CHECK-NEXT: ret
546  %op = load <16 x half>, <16 x half>* %a
547  %res = call <16 x half> @llvm.nearbyint.v16f16(<16 x half> %op)
548  store <16 x half> %res, <16 x half>* %a
549  ret void
550}
551
552define void @frinti_v32f16(<32 x half>* %a) #0 {
553; CHECK-LABEL: frinti_v32f16:
554; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
555; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
556; VBITS_GE_512-NEXT: frinti [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
557; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
558; VBITS_GE_512-NEXT: ret
559
560; Ensure sensible type legalisation.
561; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
562; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
563; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
564; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
565; VBITS_EQ_256-DAG: frinti [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
566; VBITS_EQ_256-DAG: frinti [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
567; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
568; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
569; VBITS_EQ_256-NEXT: ret
570  %op = load <32 x half>, <32 x half>* %a
571  %res = call <32 x half> @llvm.nearbyint.v32f16(<32 x half> %op)
572  store <32 x half> %res, <32 x half>* %a
573  ret void
574}
575
576define void @frinti_v64f16(<64 x half>* %a) #0 {
577; CHECK-LABEL: frinti_v64f16:
578; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
579; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
580; VBITS_GE_1024-NEXT: frinti [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
581; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
582; VBITS_GE_1024-NEXT: ret
583  %op = load <64 x half>, <64 x half>* %a
584  %res = call <64 x half> @llvm.nearbyint.v64f16(<64 x half> %op)
585  store <64 x half> %res, <64 x half>* %a
586  ret void
587}
588
589define void @frinti_v128f16(<128 x half>* %a) #0 {
590; CHECK-LABEL: frinti_v128f16:
591; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
592; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
593; VBITS_GE_2048-NEXT: frinti [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
594; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
595; VBITS_GE_2048-NEXT: ret
596  %op = load <128 x half>, <128 x half>* %a
597  %res = call <128 x half> @llvm.nearbyint.v128f16(<128 x half> %op)
598  store <128 x half> %res, <128 x half>* %a
599  ret void
600}
601
602; Don't use SVE for 64-bit vectors.
603define <2 x float> @frinti_v2f32(<2 x float> %op) #0 {
604; CHECK-LABEL: frinti_v2f32:
605; CHECK: frinti v0.2s, v0.2s
606; CHECK-NEXT: ret
607  %res = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %op)
608  ret <2 x float> %res
609}
610
611; Don't use SVE for 128-bit vectors.
612define <4 x float> @frinti_v4f32(<4 x float> %op) #0 {
613; CHECK-LABEL: frinti_v4f32:
614; CHECK: frinti v0.4s, v0.4s
615; CHECK-NEXT: ret
616  %res = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %op)
617  ret <4 x float> %res
618}
619
620define void @frinti_v8f32(<8 x float>* %a) #0 {
621; CHECK-LABEL: frinti_v8f32:
622; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
623; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
624; CHECK-NEXT: frinti [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
625; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
626; CHECK-NEXT: ret
627  %op = load <8 x float>, <8 x float>* %a
628  %res = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %op)
629  store <8 x float> %res, <8 x float>* %a
630  ret void
631}
632
633define void @frinti_v16f32(<16 x float>* %a) #0 {
634; CHECK-LABEL: frinti_v16f32:
635; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
636; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
637; VBITS_GE_512-NEXT: frinti [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
638; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
639; VBITS_GE_512-NEXT: ret
640
641; Ensure sensible type legalisation.
642; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
643; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
644; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
645; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
646; VBITS_EQ_256-DAG: frinti [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
647; VBITS_EQ_256-DAG: frinti [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
648; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
649; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
650; VBITS_EQ_256-NEXT: ret
651  %op = load <16 x float>, <16 x float>* %a
652  %res = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %op)
653  store <16 x float> %res, <16 x float>* %a
654  ret void
655}
656
657define void @frinti_v32f32(<32 x float>* %a) #0 {
658; CHECK-LABEL: frinti_v32f32:
659; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
660; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
661; VBITS_GE_1024-NEXT: frinti [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
662; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
663; VBITS_GE_1024-NEXT: ret
664  %op = load <32 x float>, <32 x float>* %a
665  %res = call <32 x float> @llvm.nearbyint.v32f32(<32 x float> %op)
666  store <32 x float> %res, <32 x float>* %a
667  ret void
668}
669
670define void @frinti_v64f32(<64 x float>* %a) #0 {
671; CHECK-LABEL: frinti_v64f32:
672; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
673; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
674; VBITS_GE_2048-NEXT: frinti [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
675; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
676; VBITS_GE_2048-NEXT: ret
677  %op = load <64 x float>, <64 x float>* %a
678  %res = call <64 x float> @llvm.nearbyint.v64f32(<64 x float> %op)
679  store <64 x float> %res, <64 x float>* %a
680  ret void
681}
682
683; Don't use SVE for 64-bit vectors.
684define <1 x double> @frinti_v1f64(<1 x double> %op) #0 {
685; CHECK-LABEL: frinti_v1f64:
686; CHECK: frinti d0, d0
687; CHECK-NEXT: ret
688  %res = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %op)
689  ret <1 x double> %res
690}
691
692; Don't use SVE for 128-bit vectors.
693define <2 x double> @frinti_v2f64(<2 x double> %op) #0 {
694; CHECK-LABEL: frinti_v2f64:
695; CHECK: frinti v0.2d, v0.2d
696; CHECK-NEXT: ret
697  %res = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %op)
698  ret <2 x double> %res
699}
700
701define void @frinti_v4f64(<4 x double>* %a) #0 {
702; CHECK-LABEL: frinti_v4f64:
703; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
704; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
705; CHECK-NEXT: frinti [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
706; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
707; CHECK-NEXT: ret
708  %op = load <4 x double>, <4 x double>* %a
709  %res = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %op)
710  store <4 x double> %res, <4 x double>* %a
711  ret void
712}
713
714define void @frinti_v8f64(<8 x double>* %a) #0 {
715; CHECK-LABEL: frinti_v8f64:
716; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
717; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
718; VBITS_GE_512-NEXT: frinti [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
719; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
720; VBITS_GE_512-NEXT: ret
721
722; Ensure sensible type legalisation.
723; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
724; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
725; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
726; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
727; VBITS_EQ_256-DAG: frinti [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
728; VBITS_EQ_256-DAG: frinti [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
729; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
730; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
731; VBITS_EQ_256-NEXT: ret
732  %op = load <8 x double>, <8 x double>* %a
733  %res = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %op)
734  store <8 x double> %res, <8 x double>* %a
735  ret void
736}
737
738define void @frinti_v16f64(<16 x double>* %a) #0 {
739; CHECK-LABEL: frinti_v16f64:
740; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
741; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
742; VBITS_GE_1024-NEXT: frinti [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
743; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
744; VBITS_GE_1024-NEXT: ret
745  %op = load <16 x double>, <16 x double>* %a
746  %res = call <16 x double> @llvm.nearbyint.v16f64(<16 x double> %op)
747  store <16 x double> %res, <16 x double>* %a
748  ret void
749}
750
751define void @frinti_v32f64(<32 x double>* %a) #0 {
752; CHECK-LABEL: frinti_v32f64:
753; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
754; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
755; VBITS_GE_2048-NEXT: frinti [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
756; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
757; VBITS_GE_2048-NEXT: ret
758  %op = load <32 x double>, <32 x double>* %a
759  %res = call <32 x double> @llvm.nearbyint.v32f64(<32 x double> %op)
760  store <32 x double> %res, <32 x double>* %a
761  ret void
762}
763
764;
765; RINT -> FRINTX
766;
767
768; Don't use SVE for 64-bit vectors.
769define <4 x half> @frintx_v4f16(<4 x half> %op) #0 {
770; CHECK-LABEL: frintx_v4f16:
771; CHECK: frintx v0.4h, v0.4h
772; CHECK-NEXT: ret
773  %res = call <4 x half> @llvm.rint.v4f16(<4 x half> %op)
774  ret <4 x half> %res
775}
776
777; Don't use SVE for 128-bit vectors.
778define <8 x half> @frintx_v8f16(<8 x half> %op) #0 {
779; CHECK-LABEL: frintx_v8f16:
780; CHECK: frintx v0.8h, v0.8h
781; CHECK-NEXT: ret
782  %res = call <8 x half> @llvm.rint.v8f16(<8 x half> %op)
783  ret <8 x half> %res
784}
785
786define void @frintx_v16f16(<16 x half>* %a) #0 {
787; CHECK-LABEL: frintx_v16f16:
788; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
789; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
790; CHECK-NEXT: frintx [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
791; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
792; CHECK-NEXT: ret
793  %op = load <16 x half>, <16 x half>* %a
794  %res = call <16 x half> @llvm.rint.v16f16(<16 x half> %op)
795  store <16 x half> %res, <16 x half>* %a
796  ret void
797}
798
799define void @frintx_v32f16(<32 x half>* %a) #0 {
800; CHECK-LABEL: frintx_v32f16:
801; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
802; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
803; VBITS_GE_512-NEXT: frintx [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
804; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
805; VBITS_GE_512-NEXT: ret
806
807; Ensure sensible type legalisation.
808; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
809; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
810; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
811; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
812; VBITS_EQ_256-DAG: frintx [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
813; VBITS_EQ_256-DAG: frintx [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
814; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
815; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
816; VBITS_EQ_256-NEXT: ret
817  %op = load <32 x half>, <32 x half>* %a
818  %res = call <32 x half> @llvm.rint.v32f16(<32 x half> %op)
819  store <32 x half> %res, <32 x half>* %a
820  ret void
821}
822
823define void @frintx_v64f16(<64 x half>* %a) #0 {
824; CHECK-LABEL: frintx_v64f16:
825; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
826; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
827; VBITS_GE_1024-NEXT: frintx [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
828; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
829; VBITS_GE_1024-NEXT: ret
830  %op = load <64 x half>, <64 x half>* %a
831  %res = call <64 x half> @llvm.rint.v64f16(<64 x half> %op)
832  store <64 x half> %res, <64 x half>* %a
833  ret void
834}
835
836define void @frintx_v128f16(<128 x half>* %a) #0 {
837; CHECK-LABEL: frintx_v128f16:
838; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
839; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
840; VBITS_GE_2048-NEXT: frintx [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
841; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
842; VBITS_GE_2048-NEXT: ret
843  %op = load <128 x half>, <128 x half>* %a
844  %res = call <128 x half> @llvm.rint.v128f16(<128 x half> %op)
845  store <128 x half> %res, <128 x half>* %a
846  ret void
847}
848
849; Don't use SVE for 64-bit vectors.
850define <2 x float> @frintx_v2f32(<2 x float> %op) #0 {
851; CHECK-LABEL: frintx_v2f32:
852; CHECK: frintx v0.2s, v0.2s
853; CHECK-NEXT: ret
854  %res = call <2 x float> @llvm.rint.v2f32(<2 x float> %op)
855  ret <2 x float> %res
856}
857
858; Don't use SVE for 128-bit vectors.
859define <4 x float> @frintx_v4f32(<4 x float> %op) #0 {
860; CHECK-LABEL: frintx_v4f32:
861; CHECK: frintx v0.4s, v0.4s
862; CHECK-NEXT: ret
863  %res = call <4 x float> @llvm.rint.v4f32(<4 x float> %op)
864  ret <4 x float> %res
865}
866
867define void @frintx_v8f32(<8 x float>* %a) #0 {
868; CHECK-LABEL: frintx_v8f32:
869; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
870; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
871; CHECK-NEXT: frintx [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
872; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
873; CHECK-NEXT: ret
874  %op = load <8 x float>, <8 x float>* %a
875  %res = call <8 x float> @llvm.rint.v8f32(<8 x float> %op)
876  store <8 x float> %res, <8 x float>* %a
877  ret void
878}
879
880define void @frintx_v16f32(<16 x float>* %a) #0 {
881; CHECK-LABEL: frintx_v16f32:
882; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
883; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
884; VBITS_GE_512-NEXT: frintx [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
885; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
886; VBITS_GE_512-NEXT: ret
887
888; Ensure sensible type legalisation.
889; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
890; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
891; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
892; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
893; VBITS_EQ_256-DAG: frintx [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
894; VBITS_EQ_256-DAG: frintx [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
895; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
896; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
897; VBITS_EQ_256-NEXT: ret
898  %op = load <16 x float>, <16 x float>* %a
899  %res = call <16 x float> @llvm.rint.v16f32(<16 x float> %op)
900  store <16 x float> %res, <16 x float>* %a
901  ret void
902}
903
904define void @frintx_v32f32(<32 x float>* %a) #0 {
905; CHECK-LABEL: frintx_v32f32:
906; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
907; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
908; VBITS_GE_1024-NEXT: frintx [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
909; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
910; VBITS_GE_1024-NEXT: ret
911  %op = load <32 x float>, <32 x float>* %a
912  %res = call <32 x float> @llvm.rint.v32f32(<32 x float> %op)
913  store <32 x float> %res, <32 x float>* %a
914  ret void
915}
916
917define void @frintx_v64f32(<64 x float>* %a) #0 {
918; CHECK-LABEL: frintx_v64f32:
919; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
920; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
921; VBITS_GE_2048-NEXT: frintx [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
922; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
923; VBITS_GE_2048-NEXT: ret
924  %op = load <64 x float>, <64 x float>* %a
925  %res = call <64 x float> @llvm.rint.v64f32(<64 x float> %op)
926  store <64 x float> %res, <64 x float>* %a
927  ret void
928}
929
930; Don't use SVE for 64-bit vectors.
931define <1 x double> @frintx_v1f64(<1 x double> %op) #0 {
932; CHECK-LABEL: frintx_v1f64:
933; CHECK: frintx d0, d0
934; CHECK-NEXT: ret
935  %res = call <1 x double> @llvm.rint.v1f64(<1 x double> %op)
936  ret <1 x double> %res
937}
938
939; Don't use SVE for 128-bit vectors.
940define <2 x double> @frintx_v2f64(<2 x double> %op) #0 {
941; CHECK-LABEL: frintx_v2f64:
942; CHECK: frintx v0.2d, v0.2d
943; CHECK-NEXT: ret
944  %res = call <2 x double> @llvm.rint.v2f64(<2 x double> %op)
945  ret <2 x double> %res
946}
947
948define void @frintx_v4f64(<4 x double>* %a) #0 {
949; CHECK-LABEL: frintx_v4f64:
950; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
951; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
952; CHECK-NEXT: frintx [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
953; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
954; CHECK-NEXT: ret
955  %op = load <4 x double>, <4 x double>* %a
956  %res = call <4 x double> @llvm.rint.v4f64(<4 x double> %op)
957  store <4 x double> %res, <4 x double>* %a
958  ret void
959}
960
961define void @frintx_v8f64(<8 x double>* %a) #0 {
962; CHECK-LABEL: frintx_v8f64:
963; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
964; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
965; VBITS_GE_512-NEXT: frintx [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
966; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
967; VBITS_GE_512-NEXT: ret
968
969; Ensure sensible type legalisation.
970; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
971; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
972; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
973; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
974; VBITS_EQ_256-DAG: frintx [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
975; VBITS_EQ_256-DAG: frintx [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
976; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
977; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
978; VBITS_EQ_256-NEXT: ret
979  %op = load <8 x double>, <8 x double>* %a
980  %res = call <8 x double> @llvm.rint.v8f64(<8 x double> %op)
981  store <8 x double> %res, <8 x double>* %a
982  ret void
983}
984
985define void @frintx_v16f64(<16 x double>* %a) #0 {
986; CHECK-LABEL: frintx_v16f64:
987; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
988; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
989; VBITS_GE_1024-NEXT: frintx [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
990; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
991; VBITS_GE_1024-NEXT: ret
992  %op = load <16 x double>, <16 x double>* %a
993  %res = call <16 x double> @llvm.rint.v16f64(<16 x double> %op)
994  store <16 x double> %res, <16 x double>* %a
995  ret void
996}
997
998define void @frintx_v32f64(<32 x double>* %a) #0 {
999; CHECK-LABEL: frintx_v32f64:
1000; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
1001; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1002; VBITS_GE_2048-NEXT: frintx [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1003; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1004; VBITS_GE_2048-NEXT: ret
1005  %op = load <32 x double>, <32 x double>* %a
1006  %res = call <32 x double> @llvm.rint.v32f64(<32 x double> %op)
1007  store <32 x double> %res, <32 x double>* %a
1008  ret void
1009}
1010
1011;
1012; ROUND -> FRINTA
1013;
1014
1015; Don't use SVE for 64-bit vectors.
1016define <4 x half> @frinta_v4f16(<4 x half> %op) #0 {
1017; CHECK-LABEL: frinta_v4f16:
1018; CHECK: frinta v0.4h, v0.4h
1019; CHECK-NEXT: ret
1020  %res = call <4 x half> @llvm.round.v4f16(<4 x half> %op)
1021  ret <4 x half> %res
1022}
1023
1024; Don't use SVE for 128-bit vectors.
1025define <8 x half> @frinta_v8f16(<8 x half> %op) #0 {
1026; CHECK-LABEL: frinta_v8f16:
1027; CHECK: frinta v0.8h, v0.8h
1028; CHECK-NEXT: ret
1029  %res = call <8 x half> @llvm.round.v8f16(<8 x half> %op)
1030  ret <8 x half> %res
1031}
1032
1033define void @frinta_v16f16(<16 x half>* %a) #0 {
1034; CHECK-LABEL: frinta_v16f16:
1035; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
1036; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1037; CHECK-NEXT: frinta [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1038; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1039; CHECK-NEXT: ret
1040  %op = load <16 x half>, <16 x half>* %a
1041  %res = call <16 x half> @llvm.round.v16f16(<16 x half> %op)
1042  store <16 x half> %res, <16 x half>* %a
1043  ret void
1044}
1045
1046define void @frinta_v32f16(<32 x half>* %a) #0 {
1047; CHECK-LABEL: frinta_v32f16:
1048; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
1049; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1050; VBITS_GE_512-NEXT: frinta [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1051; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1052; VBITS_GE_512-NEXT: ret
1053
1054; Ensure sensible type legalisation.
1055; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
1056; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
1057; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
1058; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
1059; VBITS_EQ_256-DAG: frinta [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
1060; VBITS_EQ_256-DAG: frinta [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
1061; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
1062; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
1063; VBITS_EQ_256-NEXT: ret
1064  %op = load <32 x half>, <32 x half>* %a
1065  %res = call <32 x half> @llvm.round.v32f16(<32 x half> %op)
1066  store <32 x half> %res, <32 x half>* %a
1067  ret void
1068}
1069
1070define void @frinta_v64f16(<64 x half>* %a) #0 {
1071; CHECK-LABEL: frinta_v64f16:
1072; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
1073; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1074; VBITS_GE_1024-NEXT: frinta [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1075; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1076; VBITS_GE_1024-NEXT: ret
1077  %op = load <64 x half>, <64 x half>* %a
1078  %res = call <64 x half> @llvm.round.v64f16(<64 x half> %op)
1079  store <64 x half> %res, <64 x half>* %a
1080  ret void
1081}
1082
1083define void @frinta_v128f16(<128 x half>* %a) #0 {
1084; CHECK-LABEL: frinta_v128f16:
1085; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
1086; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1087; VBITS_GE_2048-NEXT: frinta [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1088; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1089; VBITS_GE_2048-NEXT: ret
1090  %op = load <128 x half>, <128 x half>* %a
1091  %res = call <128 x half> @llvm.round.v128f16(<128 x half> %op)
1092  store <128 x half> %res, <128 x half>* %a
1093  ret void
1094}
1095
1096; Don't use SVE for 64-bit vectors.
1097define <2 x float> @frinta_v2f32(<2 x float> %op) #0 {
1098; CHECK-LABEL: frinta_v2f32:
1099; CHECK: frinta v0.2s, v0.2s
1100; CHECK-NEXT: ret
1101  %res = call <2 x float> @llvm.round.v2f32(<2 x float> %op)
1102  ret <2 x float> %res
1103}
1104
1105; Don't use SVE for 128-bit vectors.
1106define <4 x float> @frinta_v4f32(<4 x float> %op) #0 {
1107; CHECK-LABEL: frinta_v4f32:
1108; CHECK: frinta v0.4s, v0.4s
1109; CHECK-NEXT: ret
1110  %res = call <4 x float> @llvm.round.v4f32(<4 x float> %op)
1111  ret <4 x float> %res
1112}
1113
1114define void @frinta_v8f32(<8 x float>* %a) #0 {
1115; CHECK-LABEL: frinta_v8f32:
1116; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
1117; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1118; CHECK-NEXT: frinta [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1119; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1120; CHECK-NEXT: ret
1121  %op = load <8 x float>, <8 x float>* %a
1122  %res = call <8 x float> @llvm.round.v8f32(<8 x float> %op)
1123  store <8 x float> %res, <8 x float>* %a
1124  ret void
1125}
1126
1127define void @frinta_v16f32(<16 x float>* %a) #0 {
1128; CHECK-LABEL: frinta_v16f32:
1129; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
1130; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1131; VBITS_GE_512-NEXT: frinta [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1132; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1133; VBITS_GE_512-NEXT: ret
1134
1135; Ensure sensible type legalisation.
1136; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
1137; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
1138; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
1139; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
1140; VBITS_EQ_256-DAG: frinta [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
1141; VBITS_EQ_256-DAG: frinta [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
1142; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
1143; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
1144; VBITS_EQ_256-NEXT: ret
1145  %op = load <16 x float>, <16 x float>* %a
1146  %res = call <16 x float> @llvm.round.v16f32(<16 x float> %op)
1147  store <16 x float> %res, <16 x float>* %a
1148  ret void
1149}
1150
1151define void @frinta_v32f32(<32 x float>* %a) #0 {
1152; CHECK-LABEL: frinta_v32f32:
1153; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
1154; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1155; VBITS_GE_1024-NEXT: frinta [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1156; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1157; VBITS_GE_1024-NEXT: ret
1158  %op = load <32 x float>, <32 x float>* %a
1159  %res = call <32 x float> @llvm.round.v32f32(<32 x float> %op)
1160  store <32 x float> %res, <32 x float>* %a
1161  ret void
1162}
1163
1164define void @frinta_v64f32(<64 x float>* %a) #0 {
1165; CHECK-LABEL: frinta_v64f32:
1166; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
1167; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1168; VBITS_GE_2048-NEXT: frinta [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1169; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1170; VBITS_GE_2048-NEXT: ret
1171  %op = load <64 x float>, <64 x float>* %a
1172  %res = call <64 x float> @llvm.round.v64f32(<64 x float> %op)
1173  store <64 x float> %res, <64 x float>* %a
1174  ret void
1175}
1176
1177; Don't use SVE for 64-bit vectors.
1178define <1 x double> @frinta_v1f64(<1 x double> %op) #0 {
1179; CHECK-LABEL: frinta_v1f64:
1180; CHECK: frinta d0, d0
1181; CHECK-NEXT: ret
1182  %res = call <1 x double> @llvm.round.v1f64(<1 x double> %op)
1183  ret <1 x double> %res
1184}
1185
1186; Don't use SVE for 128-bit vectors.
1187define <2 x double> @frinta_v2f64(<2 x double> %op) #0 {
1188; CHECK-LABEL: frinta_v2f64:
1189; CHECK: frinta v0.2d, v0.2d
1190; CHECK-NEXT: ret
1191  %res = call <2 x double> @llvm.round.v2f64(<2 x double> %op)
1192  ret <2 x double> %res
1193}
1194
1195define void @frinta_v4f64(<4 x double>* %a) #0 {
1196; CHECK-LABEL: frinta_v4f64:
1197; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
1198; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1199; CHECK-NEXT: frinta [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1200; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1201; CHECK-NEXT: ret
1202  %op = load <4 x double>, <4 x double>* %a
1203  %res = call <4 x double> @llvm.round.v4f64(<4 x double> %op)
1204  store <4 x double> %res, <4 x double>* %a
1205  ret void
1206}
1207
1208define void @frinta_v8f64(<8 x double>* %a) #0 {
1209; CHECK-LABEL: frinta_v8f64:
1210; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
1211; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1212; VBITS_GE_512-NEXT: frinta [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1213; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1214; VBITS_GE_512-NEXT: ret
1215
1216; Ensure sensible type legalisation.
1217; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
1218; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
1219; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
1220; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
1221; VBITS_EQ_256-DAG: frinta [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
1222; VBITS_EQ_256-DAG: frinta [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
1223; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
1224; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
1225; VBITS_EQ_256-NEXT: ret
1226  %op = load <8 x double>, <8 x double>* %a
1227  %res = call <8 x double> @llvm.round.v8f64(<8 x double> %op)
1228  store <8 x double> %res, <8 x double>* %a
1229  ret void
1230}
1231
1232define void @frinta_v16f64(<16 x double>* %a) #0 {
1233; CHECK-LABEL: frinta_v16f64:
1234; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
1235; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1236; VBITS_GE_1024-NEXT: frinta [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1237; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1238; VBITS_GE_1024-NEXT: ret
1239  %op = load <16 x double>, <16 x double>* %a
1240  %res = call <16 x double> @llvm.round.v16f64(<16 x double> %op)
1241  store <16 x double> %res, <16 x double>* %a
1242  ret void
1243}
1244
1245define void @frinta_v32f64(<32 x double>* %a) #0 {
1246; CHECK-LABEL: frinta_v32f64:
1247; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
1248; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1249; VBITS_GE_2048-NEXT: frinta [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1250; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1251; VBITS_GE_2048-NEXT: ret
1252  %op = load <32 x double>, <32 x double>* %a
1253  %res = call <32 x double> @llvm.round.v32f64(<32 x double> %op)
1254  store <32 x double> %res, <32 x double>* %a
1255  ret void
1256}
1257
1258;
1259; ROUNDEVEN -> FRINTN
1260;
1261
1262; Don't use SVE for 64-bit vectors.
1263define <4 x half> @frintn_v4f16(<4 x half> %op) #0 {
1264; CHECK-LABEL: frintn_v4f16:
1265; CHECK: frintn v0.4h, v0.4h
1266; CHECK-NEXT: ret
1267  %res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op)
1268  ret <4 x half> %res
1269}
1270
1271; Don't use SVE for 128-bit vectors.
1272define <8 x half> @frintn_v8f16(<8 x half> %op) #0 {
1273; CHECK-LABEL: frintn_v8f16:
1274; CHECK: frintn v0.8h, v0.8h
1275; CHECK-NEXT: ret
1276  %res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op)
1277  ret <8 x half> %res
1278}
1279
1280define void @frintn_v16f16(<16 x half>* %a) #0 {
1281; CHECK-LABEL: frintn_v16f16:
1282; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
1283; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1284; CHECK-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1285; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1286; CHECK-NEXT: ret
1287  %op = load <16 x half>, <16 x half>* %a
1288  %res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op)
1289  store <16 x half> %res, <16 x half>* %a
1290  ret void
1291}
1292
1293define void @frintn_v32f16(<32 x half>* %a) #0 {
1294; CHECK-LABEL: frintn_v32f16:
1295; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
1296; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1297; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1298; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1299; VBITS_GE_512-NEXT: ret
1300
1301; Ensure sensible type legalisation.
1302; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
1303; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
1304; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
1305; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
1306; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
1307; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
1308; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
1309; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
1310; VBITS_EQ_256-NEXT: ret
1311  %op = load <32 x half>, <32 x half>* %a
1312  %res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op)
1313  store <32 x half> %res, <32 x half>* %a
1314  ret void
1315}
1316
1317define void @frintn_v64f16(<64 x half>* %a) #0 {
1318; CHECK-LABEL: frintn_v64f16:
1319; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
1320; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1321; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1322; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1323; VBITS_GE_1024-NEXT: ret
1324  %op = load <64 x half>, <64 x half>* %a
1325  %res = call <64 x half> @llvm.roundeven.v64f16(<64 x half> %op)
1326  store <64 x half> %res, <64 x half>* %a
1327  ret void
1328}
1329
1330define void @frintn_v128f16(<128 x half>* %a) #0 {
1331; CHECK-LABEL: frintn_v128f16:
1332; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
1333; VBITS_GE_2048-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1334; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1335; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1336; VBITS_GE_2048-NEXT: ret
1337  %op = load <128 x half>, <128 x half>* %a
1338  %res = call <128 x half> @llvm.roundeven.v128f16(<128 x half> %op)
1339  store <128 x half> %res, <128 x half>* %a
1340  ret void
1341}
1342
1343; Don't use SVE for 64-bit vectors.
1344define <2 x float> @frintn_v2f32(<2 x float> %op) #0 {
1345; CHECK-LABEL: frintn_v2f32:
1346; CHECK: frintn v0.2s, v0.2s
1347; CHECK-NEXT: ret
1348  %res = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %op)
1349  ret <2 x float> %res
1350}
1351
1352; Don't use SVE for 128-bit vectors.
1353define <4 x float> @frintn_v4f32(<4 x float> %op) #0 {
1354; CHECK-LABEL: frintn_v4f32:
1355; CHECK: frintn v0.4s, v0.4s
1356; CHECK-NEXT: ret
1357  %res = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %op)
1358  ret <4 x float> %res
1359}
1360
1361define void @frintn_v8f32(<8 x float>* %a) #0 {
1362; CHECK-LABEL: frintn_v8f32:
1363; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
1364; CHECK-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1365; CHECK-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1366; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1367; CHECK-NEXT: ret
1368  %op = load <8 x float>, <8 x float>* %a
1369  %res = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %op)
1370  store <8 x float> %res, <8 x float>* %a
1371  ret void
1372}
1373
1374define void @frintn_v16f32(<16 x float>* %a) #0 {
1375; CHECK-LABEL: frintn_v16f32:
1376; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
1377; VBITS_GE_512-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1378; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1379; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1380; VBITS_GE_512-NEXT: ret
1381
1382; Ensure sensible type legalisation.
1383; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
1384; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
1385; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
1386; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
1387; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
1388; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
1389; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
1390; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
1391; VBITS_EQ_256-NEXT: ret
1392  %op = load <16 x float>, <16 x float>* %a
1393  %res = call <16 x float> @llvm.roundeven.v16f32(<16 x float> %op)
1394  store <16 x float> %res, <16 x float>* %a
1395  ret void
1396}
1397
1398define void @frintn_v32f32(<32 x float>* %a) #0 {
1399; CHECK-LABEL: frintn_v32f32:
1400; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
1401; VBITS_GE_1024-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1402; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1403; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1404; VBITS_GE_1024-NEXT: ret
1405  %op = load <32 x float>, <32 x float>* %a
1406  %res = call <32 x float> @llvm.roundeven.v32f32(<32 x float> %op)
1407  store <32 x float> %res, <32 x float>* %a
1408  ret void
1409}
1410
1411define void @frintn_v64f32(<64 x float>* %a) #0 {
1412; CHECK-LABEL: frintn_v64f32:
1413; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
1414; VBITS_GE_2048-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1415; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1416; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1417; VBITS_GE_2048-NEXT: ret
1418  %op = load <64 x float>, <64 x float>* %a
1419  %res = call <64 x float> @llvm.roundeven.v64f32(<64 x float> %op)
1420  store <64 x float> %res, <64 x float>* %a
1421  ret void
1422}
1423
1424; Don't use SVE for 64-bit vectors.
1425define <1 x double> @frintn_v1f64(<1 x double> %op) #0 {
1426; CHECK-LABEL: frintn_v1f64:
1427; CHECK: frintn d0, d0
1428; CHECK-NEXT: ret
1429  %res = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %op)
1430  ret <1 x double> %res
1431}
1432
1433; Don't use SVE for 128-bit vectors.
1434define <2 x double> @frintn_v2f64(<2 x double> %op) #0 {
1435; CHECK-LABEL: frintn_v2f64:
1436; CHECK: frintn v0.2d, v0.2d
1437; CHECK-NEXT: ret
1438  %res = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %op)
1439  ret <2 x double> %res
1440}
1441
1442define void @frintn_v4f64(<4 x double>* %a) #0 {
1443; CHECK-LABEL: frintn_v4f64:
1444; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
1445; CHECK-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1446; CHECK-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1447; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1448; CHECK-NEXT: ret
1449  %op = load <4 x double>, <4 x double>* %a
1450  %res = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %op)
1451  store <4 x double> %res, <4 x double>* %a
1452  ret void
1453}
1454
1455define void @frintn_v8f64(<8 x double>* %a) #0 {
1456; CHECK-LABEL: frintn_v8f64:
1457; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
1458; VBITS_GE_512-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1459; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1460; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1461; VBITS_GE_512-NEXT: ret
1462
1463; Ensure sensible type legalisation.
1464; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
1465; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
1466; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
1467; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
1468; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
1469; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
1470; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
1471; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
1472; VBITS_EQ_256-NEXT: ret
1473  %op = load <8 x double>, <8 x double>* %a
1474  %res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %op)
1475  store <8 x double> %res, <8 x double>* %a
1476  ret void
1477}
1478
1479define void @frintn_v16f64(<16 x double>* %a) #0 {
1480; CHECK-LABEL: frintn_v16f64:
1481; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
1482; VBITS_GE_1024-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1483; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1484; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1485; VBITS_GE_1024-NEXT: ret
1486  %op = load <16 x double>, <16 x double>* %a
1487  %res = call <16 x double> @llvm.roundeven.v16f64(<16 x double> %op)
1488  store <16 x double> %res, <16 x double>* %a
1489  ret void
1490}
1491
1492define void @frintn_v32f64(<32 x double>* %a) #0 {
1493; CHECK-LABEL: frintn_v32f64:
1494; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
1495; VBITS_GE_2048-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1496; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1497; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1498; VBITS_GE_2048-NEXT: ret
1499  %op = load <32 x double>, <32 x double>* %a
1500  %res = call <32 x double> @llvm.roundeven.v32f64(<32 x double> %op)
1501  store <32 x double> %res, <32 x double>* %a
1502  ret void
1503}
1504
1505;
1506; TRUNC -> FRINTZ
1507;
1508
1509; Don't use SVE for 64-bit vectors.
1510define <4 x half> @frintz_v4f16(<4 x half> %op) #0 {
1511; CHECK-LABEL: frintz_v4f16:
1512; CHECK: frintz v0.4h, v0.4h
1513; CHECK-NEXT: ret
1514  %res = call <4 x half> @llvm.trunc.v4f16(<4 x half> %op)
1515  ret <4 x half> %res
1516}
1517
1518; Don't use SVE for 128-bit vectors.
1519define <8 x half> @frintz_v8f16(<8 x half> %op) #0 {
1520; CHECK-LABEL: frintz_v8f16:
1521; CHECK: frintz v0.8h, v0.8h
1522; CHECK-NEXT: ret
1523  %res = call <8 x half> @llvm.trunc.v8f16(<8 x half> %op)
1524  ret <8 x half> %res
1525}
1526
1527define void @frintz_v16f16(<16 x half>* %a) #0 {
1528; CHECK-LABEL: frintz_v16f16:
1529; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
1530; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1531; CHECK-NEXT: frintz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1532; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1533; CHECK-NEXT: ret
1534  %op = load <16 x half>, <16 x half>* %a
1535  %res = call <16 x half> @llvm.trunc.v16f16(<16 x half> %op)
1536  store <16 x half> %res, <16 x half>* %a
1537  ret void
1538}
1539
1540define void @frintz_v32f16(<32 x half>* %a) #0 {
1541; CHECK-LABEL: frintz_v32f16:
1542; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
1543; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1544; VBITS_GE_512-NEXT: frintz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1545; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1546; VBITS_GE_512-NEXT: ret
1547
1548; Ensure sensible type legalisation.
1549; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
1550; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
1551; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
1552; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
1553; VBITS_EQ_256-DAG: frintz [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
1554; VBITS_EQ_256-DAG: frintz [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
1555; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
1556; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
1557; VBITS_EQ_256-NEXT: ret
1558  %op = load <32 x half>, <32 x half>* %a
1559  %res = call <32 x half> @llvm.trunc.v32f16(<32 x half> %op)
1560  store <32 x half> %res, <32 x half>* %a
1561  ret void
1562}
1563
1564define void @frintz_v64f16(<64 x half>* %a) #0 {
1565; CHECK-LABEL: frintz_v64f16:
1566; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
1567; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1568; VBITS_GE_1024-NEXT: frintz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1569; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1570; VBITS_GE_1024-NEXT: ret
1571  %op = load <64 x half>, <64 x half>* %a
1572  %res = call <64 x half> @llvm.trunc.v64f16(<64 x half> %op)
1573  store <64 x half> %res, <64 x half>* %a
1574  ret void
1575}
1576
1577define void @frintz_v128f16(<128 x half>* %a) #0 {
1578; CHECK-LABEL: frintz_v128f16:
1579; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
1580; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1581; VBITS_GE_2048-NEXT: frintz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1582; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1583; VBITS_GE_2048-NEXT: ret
1584  %op = load <128 x half>, <128 x half>* %a
1585  %res = call <128 x half> @llvm.trunc.v128f16(<128 x half> %op)
1586  store <128 x half> %res, <128 x half>* %a
1587  ret void
1588}
1589
1590; Don't use SVE for 64-bit vectors.
1591define <2 x float> @frintz_v2f32(<2 x float> %op) #0 {
1592; CHECK-LABEL: frintz_v2f32:
1593; CHECK: frintz v0.2s, v0.2s
1594; CHECK-NEXT: ret
1595  %res = call <2 x float> @llvm.trunc.v2f32(<2 x float> %op)
1596  ret <2 x float> %res
1597}
1598
1599; Don't use SVE for 128-bit vectors.
1600define <4 x float> @frintz_v4f32(<4 x float> %op) #0 {
1601; CHECK-LABEL: frintz_v4f32:
1602; CHECK: frintz v0.4s, v0.4s
1603; CHECK-NEXT: ret
1604  %res = call <4 x float> @llvm.trunc.v4f32(<4 x float> %op)
1605  ret <4 x float> %res
1606}
1607
1608define void @frintz_v8f32(<8 x float>* %a) #0 {
1609; CHECK-LABEL: frintz_v8f32:
1610; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
1611; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1612; CHECK-NEXT: frintz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1613; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1614; CHECK-NEXT: ret
1615  %op = load <8 x float>, <8 x float>* %a
1616  %res = call <8 x float> @llvm.trunc.v8f32(<8 x float> %op)
1617  store <8 x float> %res, <8 x float>* %a
1618  ret void
1619}
1620
1621define void @frintz_v16f32(<16 x float>* %a) #0 {
1622; CHECK-LABEL: frintz_v16f32:
1623; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
1624; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1625; VBITS_GE_512-NEXT: frintz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1626; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1627; VBITS_GE_512-NEXT: ret
1628
1629; Ensure sensible type legalisation.
1630; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
1631; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
1632; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
1633; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
1634; VBITS_EQ_256-DAG: frintz [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
1635; VBITS_EQ_256-DAG: frintz [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
1636; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
1637; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
1638; VBITS_EQ_256-NEXT: ret
1639  %op = load <16 x float>, <16 x float>* %a
1640  %res = call <16 x float> @llvm.trunc.v16f32(<16 x float> %op)
1641  store <16 x float> %res, <16 x float>* %a
1642  ret void
1643}
1644
1645define void @frintz_v32f32(<32 x float>* %a) #0 {
1646; CHECK-LABEL: frintz_v32f32:
1647; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
1648; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1649; VBITS_GE_1024-NEXT: frintz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1650; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1651; VBITS_GE_1024-NEXT: ret
1652  %op = load <32 x float>, <32 x float>* %a
1653  %res = call <32 x float> @llvm.trunc.v32f32(<32 x float> %op)
1654  store <32 x float> %res, <32 x float>* %a
1655  ret void
1656}
1657
1658define void @frintz_v64f32(<64 x float>* %a) #0 {
1659; CHECK-LABEL: frintz_v64f32:
1660; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
1661; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1662; VBITS_GE_2048-NEXT: frintz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1663; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1664; VBITS_GE_2048-NEXT: ret
1665  %op = load <64 x float>, <64 x float>* %a
1666  %res = call <64 x float> @llvm.trunc.v64f32(<64 x float> %op)
1667  store <64 x float> %res, <64 x float>* %a
1668  ret void
1669}
1670
1671; Don't use SVE for 64-bit vectors.
1672define <1 x double> @frintz_v1f64(<1 x double> %op) #0 {
1673; CHECK-LABEL: frintz_v1f64:
1674; CHECK: frintz d0, d0
1675; CHECK-NEXT: ret
1676  %res = call <1 x double> @llvm.trunc.v1f64(<1 x double> %op)
1677  ret <1 x double> %res
1678}
1679
1680; Don't use SVE for 128-bit vectors.
1681define <2 x double> @frintz_v2f64(<2 x double> %op) #0 {
1682; CHECK-LABEL: frintz_v2f64:
1683; CHECK: frintz v0.2d, v0.2d
1684; CHECK-NEXT: ret
1685  %res = call <2 x double> @llvm.trunc.v2f64(<2 x double> %op)
1686  ret <2 x double> %res
1687}
1688
1689define void @frintz_v4f64(<4 x double>* %a) #0 {
1690; CHECK-LABEL: frintz_v4f64:
1691; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
1692; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1693; CHECK-NEXT: frintz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1694; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1695; CHECK-NEXT: ret
1696  %op = load <4 x double>, <4 x double>* %a
1697  %res = call <4 x double> @llvm.trunc.v4f64(<4 x double> %op)
1698  store <4 x double> %res, <4 x double>* %a
1699  ret void
1700}
1701
1702define void @frintz_v8f64(<8 x double>* %a) #0 {
1703; CHECK-LABEL: frintz_v8f64:
1704; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
1705; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1706; VBITS_GE_512-NEXT: frintz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1707; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1708; VBITS_GE_512-NEXT: ret
1709
1710; Ensure sensible type legalisation.
1711; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
1712; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
1713; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
1714; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
1715; VBITS_EQ_256-DAG: frintz [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
1716; VBITS_EQ_256-DAG: frintz [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
1717; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
1718; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
1719; VBITS_EQ_256-NEXT: ret
1720  %op = load <8 x double>, <8 x double>* %a
1721  %res = call <8 x double> @llvm.trunc.v8f64(<8 x double> %op)
1722  store <8 x double> %res, <8 x double>* %a
1723  ret void
1724}
1725
1726define void @frintz_v16f64(<16 x double>* %a) #0 {
1727; CHECK-LABEL: frintz_v16f64:
1728; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
1729; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1730; VBITS_GE_1024-NEXT: frintz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1731; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1732; VBITS_GE_1024-NEXT: ret
1733  %op = load <16 x double>, <16 x double>* %a
1734  %res = call <16 x double> @llvm.trunc.v16f64(<16 x double> %op)
1735  store <16 x double> %res, <16 x double>* %a
1736  ret void
1737}
1738
1739define void @frintz_v32f64(<32 x double>* %a) #0 {
1740; CHECK-LABEL: frintz_v32f64:
1741; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
1742; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1743; VBITS_GE_2048-NEXT: frintz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1744; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1745; VBITS_GE_2048-NEXT: ret
1746  %op = load <32 x double>, <32 x double>* %a
1747  %res = call <32 x double> @llvm.trunc.v32f64(<32 x double> %op)
1748  store <32 x double> %res, <32 x double>* %a
1749  ret void
1750}
1751
1752attributes #0 = { "target-features"="+sve" }
1753
1754declare <4 x half> @llvm.ceil.v4f16(<4 x half>)
1755declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
1756declare <16 x half> @llvm.ceil.v16f16(<16 x half>)
1757declare <32 x half> @llvm.ceil.v32f16(<32 x half>)
1758declare <64 x half> @llvm.ceil.v64f16(<64 x half>)
1759declare <128 x half> @llvm.ceil.v128f16(<128 x half>)
1760declare <2 x float> @llvm.ceil.v2f32(<2 x float>)
1761declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
1762declare <8 x float> @llvm.ceil.v8f32(<8 x float>)
1763declare <16 x float> @llvm.ceil.v16f32(<16 x float>)
1764declare <32 x float> @llvm.ceil.v32f32(<32 x float>)
1765declare <64 x float> @llvm.ceil.v64f32(<64 x float>)
1766declare <1 x double> @llvm.ceil.v1f64(<1 x double>)
1767declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
1768declare <4 x double> @llvm.ceil.v4f64(<4 x double>)
1769declare <8 x double> @llvm.ceil.v8f64(<8 x double>)
1770declare <16 x double> @llvm.ceil.v16f64(<16 x double>)
1771declare <32 x double> @llvm.ceil.v32f64(<32 x double>)
1772
1773declare <4 x half> @llvm.floor.v4f16(<4 x half>)
1774declare <8 x half> @llvm.floor.v8f16(<8 x half>)
1775declare <16 x half> @llvm.floor.v16f16(<16 x half>)
1776declare <32 x half> @llvm.floor.v32f16(<32 x half>)
1777declare <64 x half> @llvm.floor.v64f16(<64 x half>)
1778declare <128 x half> @llvm.floor.v128f16(<128 x half>)
1779declare <2 x float> @llvm.floor.v2f32(<2 x float>)
1780declare <4 x float> @llvm.floor.v4f32(<4 x float>)
1781declare <8 x float> @llvm.floor.v8f32(<8 x float>)
1782declare <16 x float> @llvm.floor.v16f32(<16 x float>)
1783declare <32 x float> @llvm.floor.v32f32(<32 x float>)
1784declare <64 x float> @llvm.floor.v64f32(<64 x float>)
1785declare <1 x double> @llvm.floor.v1f64(<1 x double>)
1786declare <2 x double> @llvm.floor.v2f64(<2 x double>)
1787declare <4 x double> @llvm.floor.v4f64(<4 x double>)
1788declare <8 x double> @llvm.floor.v8f64(<8 x double>)
1789declare <16 x double> @llvm.floor.v16f64(<16 x double>)
1790declare <32 x double> @llvm.floor.v32f64(<32 x double>)
1791
1792declare <4 x half> @llvm.nearbyint.v4f16(<4 x half>)
1793declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
1794declare <16 x half> @llvm.nearbyint.v16f16(<16 x half>)
1795declare <32 x half> @llvm.nearbyint.v32f16(<32 x half>)
1796declare <64 x half> @llvm.nearbyint.v64f16(<64 x half>)
1797declare <128 x half> @llvm.nearbyint.v128f16(<128 x half>)
1798declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>)
1799declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
1800declare <8 x float> @llvm.nearbyint.v8f32(<8 x float>)
1801declare <16 x float> @llvm.nearbyint.v16f32(<16 x float>)
1802declare <32 x float> @llvm.nearbyint.v32f32(<32 x float>)
1803declare <64 x float> @llvm.nearbyint.v64f32(<64 x float>)
1804declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>)
1805declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
1806declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>)
1807declare <8 x double> @llvm.nearbyint.v8f64(<8 x double>)
1808declare <16 x double> @llvm.nearbyint.v16f64(<16 x double>)
1809declare <32 x double> @llvm.nearbyint.v32f64(<32 x double>)
1810
1811declare <4 x half> @llvm.rint.v4f16(<4 x half>)
1812declare <8 x half> @llvm.rint.v8f16(<8 x half>)
1813declare <16 x half> @llvm.rint.v16f16(<16 x half>)
1814declare <32 x half> @llvm.rint.v32f16(<32 x half>)
1815declare <64 x half> @llvm.rint.v64f16(<64 x half>)
1816declare <128 x half> @llvm.rint.v128f16(<128 x half>)
1817declare <2 x float> @llvm.rint.v2f32(<2 x float>)
1818declare <4 x float> @llvm.rint.v4f32(<4 x float>)
1819declare <8 x float> @llvm.rint.v8f32(<8 x float>)
1820declare <16 x float> @llvm.rint.v16f32(<16 x float>)
1821declare <32 x float> @llvm.rint.v32f32(<32 x float>)
1822declare <64 x float> @llvm.rint.v64f32(<64 x float>)
1823declare <1 x double> @llvm.rint.v1f64(<1 x double>)
1824declare <2 x double> @llvm.rint.v2f64(<2 x double>)
1825declare <4 x double> @llvm.rint.v4f64(<4 x double>)
1826declare <8 x double> @llvm.rint.v8f64(<8 x double>)
1827declare <16 x double> @llvm.rint.v16f64(<16 x double>)
1828declare <32 x double> @llvm.rint.v32f64(<32 x double>)
1829
1830declare <4 x half> @llvm.round.v4f16(<4 x half>)
1831declare <8 x half> @llvm.round.v8f16(<8 x half>)
1832declare <16 x half> @llvm.round.v16f16(<16 x half>)
1833declare <32 x half> @llvm.round.v32f16(<32 x half>)
1834declare <64 x half> @llvm.round.v64f16(<64 x half>)
1835declare <128 x half> @llvm.round.v128f16(<128 x half>)
1836declare <2 x float> @llvm.round.v2f32(<2 x float>)
1837declare <4 x float> @llvm.round.v4f32(<4 x float>)
1838declare <8 x float> @llvm.round.v8f32(<8 x float>)
1839declare <16 x float> @llvm.round.v16f32(<16 x float>)
1840declare <32 x float> @llvm.round.v32f32(<32 x float>)
1841declare <64 x float> @llvm.round.v64f32(<64 x float>)
1842declare <1 x double> @llvm.round.v1f64(<1 x double>)
1843declare <2 x double> @llvm.round.v2f64(<2 x double>)
1844declare <4 x double> @llvm.round.v4f64(<4 x double>)
1845declare <8 x double> @llvm.round.v8f64(<8 x double>)
1846declare <16 x double> @llvm.round.v16f64(<16 x double>)
1847declare <32 x double> @llvm.round.v32f64(<32 x double>)
1848
1849declare <4 x half> @llvm.roundeven.v4f16(<4 x half>)
1850declare <8 x half> @llvm.roundeven.v8f16(<8 x half>)
1851declare <16 x half> @llvm.roundeven.v16f16(<16 x half>)
1852declare <32 x half> @llvm.roundeven.v32f16(<32 x half>)
1853declare <64 x half> @llvm.roundeven.v64f16(<64 x half>)
1854declare <128 x half> @llvm.roundeven.v128f16(<128 x half>)
1855declare <2 x float> @llvm.roundeven.v2f32(<2 x float>)
1856declare <4 x float> @llvm.roundeven.v4f32(<4 x float>)
1857declare <8 x float> @llvm.roundeven.v8f32(<8 x float>)
1858declare <16 x float> @llvm.roundeven.v16f32(<16 x float>)
1859declare <32 x float> @llvm.roundeven.v32f32(<32 x float>)
1860declare <64 x float> @llvm.roundeven.v64f32(<64 x float>)
1861declare <1 x double> @llvm.roundeven.v1f64(<1 x double>)
1862declare <2 x double> @llvm.roundeven.v2f64(<2 x double>)
1863declare <4 x double> @llvm.roundeven.v4f64(<4 x double>)
1864declare <8 x double> @llvm.roundeven.v8f64(<8 x double>)
1865declare <16 x double> @llvm.roundeven.v16f64(<16 x double>)
1866declare <32 x double> @llvm.roundeven.v32f64(<32 x double>)
1867
1868declare <4 x half> @llvm.trunc.v4f16(<4 x half>)
1869declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
1870declare <16 x half> @llvm.trunc.v16f16(<16 x half>)
1871declare <32 x half> @llvm.trunc.v32f16(<32 x half>)
1872declare <64 x half> @llvm.trunc.v64f16(<64 x half>)
1873declare <128 x half> @llvm.trunc.v128f16(<128 x half>)
1874declare <2 x float> @llvm.trunc.v2f32(<2 x float>)
1875declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
1876declare <8 x float> @llvm.trunc.v8f32(<8 x float>)
1877declare <16 x float> @llvm.trunc.v16f32(<16 x float>)
1878declare <32 x float> @llvm.trunc.v32f32(<32 x float>)
1879declare <64 x float> @llvm.trunc.v64f32(<64 x float>)
1880declare <1 x double> @llvm.trunc.v1f64(<1 x double>)
1881declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
1882declare <4 x double> @llvm.trunc.v4f64(<4 x double>)
1883declare <8 x double> @llvm.trunc.v8f64(<8 x double>)
1884declare <16 x double> @llvm.trunc.v16f64(<16 x double>)
1885declare <32 x double> @llvm.trunc.v32f64(<32 x double>)
1886