1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
4
5define arm_aapcs_vfpcc <4 x float> @fceil_float32_t(<4 x float> %src) {
6; CHECK-MVE-LABEL: fceil_float32_t:
7; CHECK-MVE:       @ %bb.0: @ %entry
8; CHECK-MVE-NEXT:    vrintp.f32 s7, s3
9; CHECK-MVE-NEXT:    vrintp.f32 s6, s2
10; CHECK-MVE-NEXT:    vrintp.f32 s5, s1
11; CHECK-MVE-NEXT:    vrintp.f32 s4, s0
12; CHECK-MVE-NEXT:    vmov q0, q1
13; CHECK-MVE-NEXT:    bx lr
14;
15; CHECK-MVEFP-LABEL: fceil_float32_t:
16; CHECK-MVEFP:       @ %bb.0: @ %entry
17; CHECK-MVEFP-NEXT:    vrintp.f32 q0, q0
18; CHECK-MVEFP-NEXT:    bx lr
19entry:
20  %0 = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> %src)
21  ret <4 x float> %0
22}
23
24define arm_aapcs_vfpcc <8 x half> @fceil_float16_t(<8 x half> %src) {
25; CHECK-MVE-LABEL: fceil_float16_t:
26; CHECK-MVE:       @ %bb.0: @ %entry
27; CHECK-MVE-NEXT:    vmov q1, q0
28; CHECK-MVE-NEXT:    vmovx.f16 s0, s4
29; CHECK-MVE-NEXT:    vrintp.f16 s8, s0
30; CHECK-MVE-NEXT:    vrintp.f16 s0, s4
31; CHECK-MVE-NEXT:    vins.f16 s0, s8
32; CHECK-MVE-NEXT:    vmovx.f16 s8, s5
33; CHECK-MVE-NEXT:    vrintp.f16 s8, s8
34; CHECK-MVE-NEXT:    vrintp.f16 s1, s5
35; CHECK-MVE-NEXT:    vins.f16 s1, s8
36; CHECK-MVE-NEXT:    vmovx.f16 s8, s6
37; CHECK-MVE-NEXT:    vrintp.f16 s8, s8
38; CHECK-MVE-NEXT:    vrintp.f16 s2, s6
39; CHECK-MVE-NEXT:    vins.f16 s2, s8
40; CHECK-MVE-NEXT:    vmovx.f16 s8, s7
41; CHECK-MVE-NEXT:    vrintp.f16 s8, s8
42; CHECK-MVE-NEXT:    vrintp.f16 s3, s7
43; CHECK-MVE-NEXT:    vins.f16 s3, s8
44; CHECK-MVE-NEXT:    bx lr
45;
46; CHECK-MVEFP-LABEL: fceil_float16_t:
47; CHECK-MVEFP:       @ %bb.0: @ %entry
48; CHECK-MVEFP-NEXT:    vrintp.f16 q0, q0
49; CHECK-MVEFP-NEXT:    bx lr
50entry:
51  %0 = call fast <8 x half> @llvm.ceil.v8f16(<8 x half> %src)
52  ret <8 x half> %0
53}
54
55define arm_aapcs_vfpcc <2 x double> @fceil_float64_t(<2 x double> %src) {
56; CHECK-LABEL: fceil_float64_t:
57; CHECK:       @ %bb.0: @ %entry
58; CHECK-NEXT:    .save {r7, lr}
59; CHECK-NEXT:    push {r7, lr}
60; CHECK-NEXT:    .vsave {d8, d9}
61; CHECK-NEXT:    vpush {d8, d9}
62; CHECK-NEXT:    vmov q4, q0
63; CHECK-NEXT:    vmov r0, r1, d9
64; CHECK-NEXT:    bl ceil
65; CHECK-NEXT:    vmov r2, r3, d8
66; CHECK-NEXT:    vmov d9, r0, r1
67; CHECK-NEXT:    mov r0, r2
68; CHECK-NEXT:    mov r1, r3
69; CHECK-NEXT:    bl ceil
70; CHECK-NEXT:    vmov d8, r0, r1
71; CHECK-NEXT:    vmov q0, q4
72; CHECK-NEXT:    vpop {d8, d9}
73; CHECK-NEXT:    pop {r7, pc}
74entry:
75  %0 = call fast <2 x double> @llvm.ceil.v2f64(<2 x double> %src)
76  ret <2 x double> %0
77}
78
79define arm_aapcs_vfpcc <4 x float> @ftrunc_float32_t(<4 x float> %src) {
80; CHECK-MVE-LABEL: ftrunc_float32_t:
81; CHECK-MVE:       @ %bb.0: @ %entry
82; CHECK-MVE-NEXT:    vrintz.f32 s7, s3
83; CHECK-MVE-NEXT:    vrintz.f32 s6, s2
84; CHECK-MVE-NEXT:    vrintz.f32 s5, s1
85; CHECK-MVE-NEXT:    vrintz.f32 s4, s0
86; CHECK-MVE-NEXT:    vmov q0, q1
87; CHECK-MVE-NEXT:    bx lr
88;
89; CHECK-MVEFP-LABEL: ftrunc_float32_t:
90; CHECK-MVEFP:       @ %bb.0: @ %entry
91; CHECK-MVEFP-NEXT:    vrintz.f32 q0, q0
92; CHECK-MVEFP-NEXT:    bx lr
93entry:
94  %0 = call fast <4 x float> @llvm.trunc.v4f32(<4 x float> %src)
95  ret <4 x float> %0
96}
97
98define arm_aapcs_vfpcc <8 x half> @ftrunc_float16_t(<8 x half> %src) {
99; CHECK-MVE-LABEL: ftrunc_float16_t:
100; CHECK-MVE:       @ %bb.0: @ %entry
101; CHECK-MVE-NEXT:    vmov q1, q0
102; CHECK-MVE-NEXT:    vmovx.f16 s0, s4
103; CHECK-MVE-NEXT:    vrintz.f16 s8, s0
104; CHECK-MVE-NEXT:    vrintz.f16 s0, s4
105; CHECK-MVE-NEXT:    vins.f16 s0, s8
106; CHECK-MVE-NEXT:    vmovx.f16 s8, s5
107; CHECK-MVE-NEXT:    vrintz.f16 s8, s8
108; CHECK-MVE-NEXT:    vrintz.f16 s1, s5
109; CHECK-MVE-NEXT:    vins.f16 s1, s8
110; CHECK-MVE-NEXT:    vmovx.f16 s8, s6
111; CHECK-MVE-NEXT:    vrintz.f16 s8, s8
112; CHECK-MVE-NEXT:    vrintz.f16 s2, s6
113; CHECK-MVE-NEXT:    vins.f16 s2, s8
114; CHECK-MVE-NEXT:    vmovx.f16 s8, s7
115; CHECK-MVE-NEXT:    vrintz.f16 s8, s8
116; CHECK-MVE-NEXT:    vrintz.f16 s3, s7
117; CHECK-MVE-NEXT:    vins.f16 s3, s8
118; CHECK-MVE-NEXT:    bx lr
119;
120; CHECK-MVEFP-LABEL: ftrunc_float16_t:
121; CHECK-MVEFP:       @ %bb.0: @ %entry
122; CHECK-MVEFP-NEXT:    vrintz.f16 q0, q0
123; CHECK-MVEFP-NEXT:    bx lr
124entry:
125  %0 = call fast <8 x half> @llvm.trunc.v8f16(<8 x half> %src)
126  ret <8 x half> %0
127}
128
129define arm_aapcs_vfpcc <2 x double> @ftrunc_float64_t(<2 x double> %src) {
130; CHECK-LABEL: ftrunc_float64_t:
131; CHECK:       @ %bb.0: @ %entry
132; CHECK-NEXT:    .save {r7, lr}
133; CHECK-NEXT:    push {r7, lr}
134; CHECK-NEXT:    .vsave {d8, d9}
135; CHECK-NEXT:    vpush {d8, d9}
136; CHECK-NEXT:    vmov q4, q0
137; CHECK-NEXT:    vmov r0, r1, d9
138; CHECK-NEXT:    bl trunc
139; CHECK-NEXT:    vmov r2, r3, d8
140; CHECK-NEXT:    vmov d9, r0, r1
141; CHECK-NEXT:    mov r0, r2
142; CHECK-NEXT:    mov r1, r3
143; CHECK-NEXT:    bl trunc
144; CHECK-NEXT:    vmov d8, r0, r1
145; CHECK-NEXT:    vmov q0, q4
146; CHECK-NEXT:    vpop {d8, d9}
147; CHECK-NEXT:    pop {r7, pc}
148entry:
149  %0 = call fast <2 x double> @llvm.trunc.v2f64(<2 x double> %src)
150  ret <2 x double> %0
151}
152
153define arm_aapcs_vfpcc <4 x float> @frint_float32_t(<4 x float> %src) {
154; CHECK-MVE-LABEL: frint_float32_t:
155; CHECK-MVE:       @ %bb.0: @ %entry
156; CHECK-MVE-NEXT:    vrintx.f32 s7, s3
157; CHECK-MVE-NEXT:    vrintx.f32 s6, s2
158; CHECK-MVE-NEXT:    vrintx.f32 s5, s1
159; CHECK-MVE-NEXT:    vrintx.f32 s4, s0
160; CHECK-MVE-NEXT:    vmov q0, q1
161; CHECK-MVE-NEXT:    bx lr
162;
163; CHECK-MVEFP-LABEL: frint_float32_t:
164; CHECK-MVEFP:       @ %bb.0: @ %entry
165; CHECK-MVEFP-NEXT:    vrintx.f32 q0, q0
166; CHECK-MVEFP-NEXT:    bx lr
167entry:
168  %0 = call fast <4 x float> @llvm.rint.v4f32(<4 x float> %src)
169  ret <4 x float> %0
170}
171
172define arm_aapcs_vfpcc <8 x half> @frint_float16_t(<8 x half> %src) {
173; CHECK-MVE-LABEL: frint_float16_t:
174; CHECK-MVE:       @ %bb.0: @ %entry
175; CHECK-MVE-NEXT:    vmov q1, q0
176; CHECK-MVE-NEXT:    vmovx.f16 s0, s4
177; CHECK-MVE-NEXT:    vrintx.f16 s8, s0
178; CHECK-MVE-NEXT:    vrintx.f16 s0, s4
179; CHECK-MVE-NEXT:    vins.f16 s0, s8
180; CHECK-MVE-NEXT:    vmovx.f16 s8, s5
181; CHECK-MVE-NEXT:    vrintx.f16 s8, s8
182; CHECK-MVE-NEXT:    vrintx.f16 s1, s5
183; CHECK-MVE-NEXT:    vins.f16 s1, s8
184; CHECK-MVE-NEXT:    vmovx.f16 s8, s6
185; CHECK-MVE-NEXT:    vrintx.f16 s8, s8
186; CHECK-MVE-NEXT:    vrintx.f16 s2, s6
187; CHECK-MVE-NEXT:    vins.f16 s2, s8
188; CHECK-MVE-NEXT:    vmovx.f16 s8, s7
189; CHECK-MVE-NEXT:    vrintx.f16 s8, s8
190; CHECK-MVE-NEXT:    vrintx.f16 s3, s7
191; CHECK-MVE-NEXT:    vins.f16 s3, s8
192; CHECK-MVE-NEXT:    bx lr
193;
194; CHECK-MVEFP-LABEL: frint_float16_t:
195; CHECK-MVEFP:       @ %bb.0: @ %entry
196; CHECK-MVEFP-NEXT:    vrintx.f16 q0, q0
197; CHECK-MVEFP-NEXT:    bx lr
198entry:
199  %0 = call fast <8 x half> @llvm.rint.v8f16(<8 x half> %src)
200  ret <8 x half> %0
201}
202
203define arm_aapcs_vfpcc <2 x double> @frint_float64_t(<2 x double> %src) {
204; CHECK-LABEL: frint_float64_t:
205; CHECK:       @ %bb.0: @ %entry
206; CHECK-NEXT:    .save {r7, lr}
207; CHECK-NEXT:    push {r7, lr}
208; CHECK-NEXT:    .vsave {d8, d9}
209; CHECK-NEXT:    vpush {d8, d9}
210; CHECK-NEXT:    vmov q4, q0
211; CHECK-NEXT:    vmov r0, r1, d9
212; CHECK-NEXT:    bl rint
213; CHECK-NEXT:    vmov r2, r3, d8
214; CHECK-NEXT:    vmov d9, r0, r1
215; CHECK-NEXT:    mov r0, r2
216; CHECK-NEXT:    mov r1, r3
217; CHECK-NEXT:    bl rint
218; CHECK-NEXT:    vmov d8, r0, r1
219; CHECK-NEXT:    vmov q0, q4
220; CHECK-NEXT:    vpop {d8, d9}
221; CHECK-NEXT:    pop {r7, pc}
222entry:
223  %0 = call fast <2 x double> @llvm.rint.v2f64(<2 x double> %src)
224  ret <2 x double> %0
225}
226
227define arm_aapcs_vfpcc <4 x float> @fnearbyint_float32_t(<4 x float> %src) {
228; CHECK-LABEL: fnearbyint_float32_t:
229; CHECK:       @ %bb.0: @ %entry
230; CHECK-NEXT:    vrintr.f32 s7, s3
231; CHECK-NEXT:    vrintr.f32 s6, s2
232; CHECK-NEXT:    vrintr.f32 s5, s1
233; CHECK-NEXT:    vrintr.f32 s4, s0
234; CHECK-NEXT:    vmov q0, q1
235; CHECK-NEXT:    bx lr
236entry:
237  %0 = call fast <4 x float> @llvm.nearbyint.v4f32(<4 x float> %src)
238  ret <4 x float> %0
239}
240
241define arm_aapcs_vfpcc <8 x half> @fnearbyint_float16_t(<8 x half> %src) {
242; CHECK-LABEL: fnearbyint_float16_t:
243; CHECK:       @ %bb.0: @ %entry
244; CHECK-NEXT:    vmov q1, q0
245; CHECK-NEXT:    vmovx.f16 s0, s4
246; CHECK-NEXT:    vrintr.f16 s8, s0
247; CHECK-NEXT:    vrintr.f16 s0, s4
248; CHECK-NEXT:    vins.f16 s0, s8
249; CHECK-NEXT:    vmovx.f16 s8, s5
250; CHECK-NEXT:    vrintr.f16 s8, s8
251; CHECK-NEXT:    vrintr.f16 s1, s5
252; CHECK-NEXT:    vins.f16 s1, s8
253; CHECK-NEXT:    vmovx.f16 s8, s6
254; CHECK-NEXT:    vrintr.f16 s8, s8
255; CHECK-NEXT:    vrintr.f16 s2, s6
256; CHECK-NEXT:    vins.f16 s2, s8
257; CHECK-NEXT:    vmovx.f16 s8, s7
258; CHECK-NEXT:    vrintr.f16 s8, s8
259; CHECK-NEXT:    vrintr.f16 s3, s7
260; CHECK-NEXT:    vins.f16 s3, s8
261; CHECK-NEXT:    bx lr
262entry:
263  %0 = call fast <8 x half> @llvm.nearbyint.v8f16(<8 x half> %src)
264  ret <8 x half> %0
265}
266
267define arm_aapcs_vfpcc <2 x double> @fnearbyint_float64_t(<2 x double> %src) {
268; CHECK-LABEL: fnearbyint_float64_t:
269; CHECK:       @ %bb.0: @ %entry
270; CHECK-NEXT:    .save {r7, lr}
271; CHECK-NEXT:    push {r7, lr}
272; CHECK-NEXT:    .vsave {d8, d9}
273; CHECK-NEXT:    vpush {d8, d9}
274; CHECK-NEXT:    vmov q4, q0
275; CHECK-NEXT:    vmov r0, r1, d9
276; CHECK-NEXT:    bl nearbyint
277; CHECK-NEXT:    vmov r2, r3, d8
278; CHECK-NEXT:    vmov d9, r0, r1
279; CHECK-NEXT:    mov r0, r2
280; CHECK-NEXT:    mov r1, r3
281; CHECK-NEXT:    bl nearbyint
282; CHECK-NEXT:    vmov d8, r0, r1
283; CHECK-NEXT:    vmov q0, q4
284; CHECK-NEXT:    vpop {d8, d9}
285; CHECK-NEXT:    pop {r7, pc}
286entry:
287  %0 = call fast <2 x double> @llvm.nearbyint.v2f64(<2 x double> %src)
288  ret <2 x double> %0
289}
290
291define arm_aapcs_vfpcc <4 x float> @ffloor_float32_t(<4 x float> %src) {
292; CHECK-MVE-LABEL: ffloor_float32_t:
293; CHECK-MVE:       @ %bb.0: @ %entry
294; CHECK-MVE-NEXT:    vrintm.f32 s7, s3
295; CHECK-MVE-NEXT:    vrintm.f32 s6, s2
296; CHECK-MVE-NEXT:    vrintm.f32 s5, s1
297; CHECK-MVE-NEXT:    vrintm.f32 s4, s0
298; CHECK-MVE-NEXT:    vmov q0, q1
299; CHECK-MVE-NEXT:    bx lr
300;
301; CHECK-MVEFP-LABEL: ffloor_float32_t:
302; CHECK-MVEFP:       @ %bb.0: @ %entry
303; CHECK-MVEFP-NEXT:    vrintm.f32 q0, q0
304; CHECK-MVEFP-NEXT:    bx lr
305entry:
306  %0 = call fast <4 x float> @llvm.floor.v4f32(<4 x float> %src)
307  ret <4 x float> %0
308}
309
310define arm_aapcs_vfpcc <8 x half> @ffloor_float16_t(<8 x half> %src) {
311; CHECK-MVE-LABEL: ffloor_float16_t:
312; CHECK-MVE:       @ %bb.0: @ %entry
313; CHECK-MVE-NEXT:    vmov q1, q0
314; CHECK-MVE-NEXT:    vmovx.f16 s0, s4
315; CHECK-MVE-NEXT:    vrintm.f16 s8, s0
316; CHECK-MVE-NEXT:    vrintm.f16 s0, s4
317; CHECK-MVE-NEXT:    vins.f16 s0, s8
318; CHECK-MVE-NEXT:    vmovx.f16 s8, s5
319; CHECK-MVE-NEXT:    vrintm.f16 s8, s8
320; CHECK-MVE-NEXT:    vrintm.f16 s1, s5
321; CHECK-MVE-NEXT:    vins.f16 s1, s8
322; CHECK-MVE-NEXT:    vmovx.f16 s8, s6
323; CHECK-MVE-NEXT:    vrintm.f16 s8, s8
324; CHECK-MVE-NEXT:    vrintm.f16 s2, s6
325; CHECK-MVE-NEXT:    vins.f16 s2, s8
326; CHECK-MVE-NEXT:    vmovx.f16 s8, s7
327; CHECK-MVE-NEXT:    vrintm.f16 s8, s8
328; CHECK-MVE-NEXT:    vrintm.f16 s3, s7
329; CHECK-MVE-NEXT:    vins.f16 s3, s8
330; CHECK-MVE-NEXT:    bx lr
331;
332; CHECK-MVEFP-LABEL: ffloor_float16_t:
333; CHECK-MVEFP:       @ %bb.0: @ %entry
334; CHECK-MVEFP-NEXT:    vrintm.f16 q0, q0
335; CHECK-MVEFP-NEXT:    bx lr
336entry:
337  %0 = call fast <8 x half> @llvm.floor.v8f16(<8 x half> %src)
338  ret <8 x half> %0
339}
340
341define arm_aapcs_vfpcc <2 x double> @ffloor_float64_t(<2 x double> %src) {
342; CHECK-LABEL: ffloor_float64_t:
343; CHECK:       @ %bb.0: @ %entry
344; CHECK-NEXT:    .save {r7, lr}
345; CHECK-NEXT:    push {r7, lr}
346; CHECK-NEXT:    .vsave {d8, d9}
347; CHECK-NEXT:    vpush {d8, d9}
348; CHECK-NEXT:    vmov q4, q0
349; CHECK-NEXT:    vmov r0, r1, d9
350; CHECK-NEXT:    bl floor
351; CHECK-NEXT:    vmov r2, r3, d8
352; CHECK-NEXT:    vmov d9, r0, r1
353; CHECK-NEXT:    mov r0, r2
354; CHECK-NEXT:    mov r1, r3
355; CHECK-NEXT:    bl floor
356; CHECK-NEXT:    vmov d8, r0, r1
357; CHECK-NEXT:    vmov q0, q4
358; CHECK-NEXT:    vpop {d8, d9}
359; CHECK-NEXT:    pop {r7, pc}
360entry:
361  %0 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %src)
362  ret <2 x double> %0
363}
364
365define arm_aapcs_vfpcc <4 x float> @fround_float32_t(<4 x float> %src) {
366; CHECK-MVE-LABEL: fround_float32_t:
367; CHECK-MVE:       @ %bb.0: @ %entry
368; CHECK-MVE-NEXT:    vrinta.f32 s7, s3
369; CHECK-MVE-NEXT:    vrinta.f32 s6, s2
370; CHECK-MVE-NEXT:    vrinta.f32 s5, s1
371; CHECK-MVE-NEXT:    vrinta.f32 s4, s0
372; CHECK-MVE-NEXT:    vmov q0, q1
373; CHECK-MVE-NEXT:    bx lr
374;
375; CHECK-MVEFP-LABEL: fround_float32_t:
376; CHECK-MVEFP:       @ %bb.0: @ %entry
377; CHECK-MVEFP-NEXT:    vrinta.f32 q0, q0
378; CHECK-MVEFP-NEXT:    bx lr
379entry:
380  %0 = call fast <4 x float> @llvm.round.v4f32(<4 x float> %src)
381  ret <4 x float> %0
382}
383
384define arm_aapcs_vfpcc <8 x half> @fround_float16_t(<8 x half> %src) {
385; CHECK-MVE-LABEL: fround_float16_t:
386; CHECK-MVE:       @ %bb.0: @ %entry
387; CHECK-MVE-NEXT:    vmov q1, q0
388; CHECK-MVE-NEXT:    vmovx.f16 s0, s4
389; CHECK-MVE-NEXT:    vrinta.f16 s8, s0
390; CHECK-MVE-NEXT:    vrinta.f16 s0, s4
391; CHECK-MVE-NEXT:    vins.f16 s0, s8
392; CHECK-MVE-NEXT:    vmovx.f16 s8, s5
393; CHECK-MVE-NEXT:    vrinta.f16 s8, s8
394; CHECK-MVE-NEXT:    vrinta.f16 s1, s5
395; CHECK-MVE-NEXT:    vins.f16 s1, s8
396; CHECK-MVE-NEXT:    vmovx.f16 s8, s6
397; CHECK-MVE-NEXT:    vrinta.f16 s8, s8
398; CHECK-MVE-NEXT:    vrinta.f16 s2, s6
399; CHECK-MVE-NEXT:    vins.f16 s2, s8
400; CHECK-MVE-NEXT:    vmovx.f16 s8, s7
401; CHECK-MVE-NEXT:    vrinta.f16 s8, s8
402; CHECK-MVE-NEXT:    vrinta.f16 s3, s7
403; CHECK-MVE-NEXT:    vins.f16 s3, s8
404; CHECK-MVE-NEXT:    bx lr
405;
406; CHECK-MVEFP-LABEL: fround_float16_t:
407; CHECK-MVEFP:       @ %bb.0: @ %entry
408; CHECK-MVEFP-NEXT:    vrinta.f16 q0, q0
409; CHECK-MVEFP-NEXT:    bx lr
410entry:
411  %0 = call fast <8 x half> @llvm.round.v8f16(<8 x half> %src)
412  ret <8 x half> %0
413}
414
415define arm_aapcs_vfpcc <2 x double> @fround_float64_t(<2 x double> %src) {
416; CHECK-LABEL: fround_float64_t:
417; CHECK:       @ %bb.0: @ %entry
418; CHECK-NEXT:    .save {r7, lr}
419; CHECK-NEXT:    push {r7, lr}
420; CHECK-NEXT:    .vsave {d8, d9}
421; CHECK-NEXT:    vpush {d8, d9}
422; CHECK-NEXT:    vmov q4, q0
423; CHECK-NEXT:    vmov r0, r1, d9
424; CHECK-NEXT:    bl round
425; CHECK-NEXT:    vmov r2, r3, d8
426; CHECK-NEXT:    vmov d9, r0, r1
427; CHECK-NEXT:    mov r0, r2
428; CHECK-NEXT:    mov r1, r3
429; CHECK-NEXT:    bl round
430; CHECK-NEXT:    vmov d8, r0, r1
431; CHECK-NEXT:    vmov q0, q4
432; CHECK-NEXT:    vpop {d8, d9}
433; CHECK-NEXT:    pop {r7, pc}
434entry:
435  %0 = call fast <2 x double> @llvm.round.v2f64(<2 x double> %src)
436  ret <2 x double> %0
437}
438
439declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
440declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
441declare <4 x float> @llvm.rint.v4f32(<4 x float>)
442declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
443declare <4 x float> @llvm.floor.v4f32(<4 x float>)
444declare <4 x float> @llvm.round.v4f32(<4 x float>)
445declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
446declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
447declare <8 x half> @llvm.rint.v8f16(<8 x half>)
448declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
449declare <8 x half> @llvm.floor.v8f16(<8 x half>)
450declare <8 x half> @llvm.round.v8f16(<8 x half>)
451declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
452declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
453declare <2 x double> @llvm.rint.v2f64(<2 x double>)
454declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
455declare <2 x double> @llvm.floor.v2f64(<2 x double>)
456declare <2 x double> @llvm.round.v2f64(<2 x double>)
457