1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=arm-eabi -mattr=+neon,+fp16 %s -o - | FileCheck %s
3
4define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind {
5; CHECK-LABEL: vcvt_f32tos32:
6; CHECK:       @ %bb.0:
7; CHECK-NEXT:    vldr d16, [r0]
8; CHECK-NEXT:    vcvt.s32.f32 d16, d16
9; CHECK-NEXT:    vmov r0, r1, d16
10; CHECK-NEXT:    mov pc, lr
11	%tmp1 = load <2 x float>, <2 x float>* %A
12	%tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
13	ret <2 x i32> %tmp2
14}
15
16define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind {
17; CHECK-LABEL: vcvt_f32tou32:
18; CHECK:       @ %bb.0:
19; CHECK-NEXT:    vldr d16, [r0]
20; CHECK-NEXT:    vcvt.u32.f32 d16, d16
21; CHECK-NEXT:    vmov r0, r1, d16
22; CHECK-NEXT:    mov pc, lr
23	%tmp1 = load <2 x float>, <2 x float>* %A
24	%tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
25	ret <2 x i32> %tmp2
26}
27
28define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind {
29; CHECK-LABEL: vcvt_s32tof32:
30; CHECK:       @ %bb.0:
31; CHECK-NEXT:    vldr d16, [r0]
32; CHECK-NEXT:    vcvt.f32.s32 d16, d16
33; CHECK-NEXT:    vmov r0, r1, d16
34; CHECK-NEXT:    mov pc, lr
35	%tmp1 = load <2 x i32>, <2 x i32>* %A
36	%tmp2 = sitofp <2 x i32> %tmp1 to <2 x float>
37	ret <2 x float> %tmp2
38}
39
40define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind {
41; CHECK-LABEL: vcvt_u32tof32:
42; CHECK:       @ %bb.0:
43; CHECK-NEXT:    vldr d16, [r0]
44; CHECK-NEXT:    vcvt.f32.u32 d16, d16
45; CHECK-NEXT:    vmov r0, r1, d16
46; CHECK-NEXT:    mov pc, lr
47	%tmp1 = load <2 x i32>, <2 x i32>* %A
48	%tmp2 = uitofp <2 x i32> %tmp1 to <2 x float>
49	ret <2 x float> %tmp2
50}
51
52define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind {
53; CHECK-LABEL: vcvtQ_f32tos32:
54; CHECK:       @ %bb.0:
55; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
56; CHECK-NEXT:    vcvt.s32.f32 q8, q8
57; CHECK-NEXT:    vmov r0, r1, d16
58; CHECK-NEXT:    vmov r2, r3, d17
59; CHECK-NEXT:    mov pc, lr
60	%tmp1 = load <4 x float>, <4 x float>* %A
61	%tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
62	ret <4 x i32> %tmp2
63}
64
65define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind {
66; CHECK-LABEL: vcvtQ_f32tou32:
67; CHECK:       @ %bb.0:
68; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
69; CHECK-NEXT:    vcvt.u32.f32 q8, q8
70; CHECK-NEXT:    vmov r0, r1, d16
71; CHECK-NEXT:    vmov r2, r3, d17
72; CHECK-NEXT:    mov pc, lr
73	%tmp1 = load <4 x float>, <4 x float>* %A
74	%tmp2 = fptoui <4 x float> %tmp1 to <4 x i32>
75	ret <4 x i32> %tmp2
76}
77
78define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind {
79; CHECK-LABEL: vcvtQ_s32tof32:
80; CHECK:       @ %bb.0:
81; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
82; CHECK-NEXT:    vcvt.f32.s32 q8, q8
83; CHECK-NEXT:    vmov r0, r1, d16
84; CHECK-NEXT:    vmov r2, r3, d17
85; CHECK-NEXT:    mov pc, lr
86	%tmp1 = load <4 x i32>, <4 x i32>* %A
87	%tmp2 = sitofp <4 x i32> %tmp1 to <4 x float>
88	ret <4 x float> %tmp2
89}
90
91define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind {
92; CHECK-LABEL: vcvtQ_u32tof32:
93; CHECK:       @ %bb.0:
94; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
95; CHECK-NEXT:    vcvt.f32.u32 q8, q8
96; CHECK-NEXT:    vmov r0, r1, d16
97; CHECK-NEXT:    vmov r2, r3, d17
98; CHECK-NEXT:    mov pc, lr
99	%tmp1 = load <4 x i32>, <4 x i32>* %A
100	%tmp2 = uitofp <4 x i32> %tmp1 to <4 x float>
101	ret <4 x float> %tmp2
102}
103
104define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind {
105; CHECK-LABEL: vcvt_n_f32tos32:
106; CHECK:       @ %bb.0:
107; CHECK-NEXT:    vldr d16, [r0]
108; CHECK-NEXT:    vcvt.s32.f32 d16, d16, #1
109; CHECK-NEXT:    vmov r0, r1, d16
110; CHECK-NEXT:    mov pc, lr
111	%tmp1 = load <2 x float>, <2 x float>* %A
112	%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %tmp1, i32 1)
113	ret <2 x i32> %tmp2
114}
115
116define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind {
117; CHECK-LABEL: vcvt_n_f32tou32:
118; CHECK:       @ %bb.0:
119; CHECK-NEXT:    vldr d16, [r0]
120; CHECK-NEXT:    vcvt.u32.f32 d16, d16, #1
121; CHECK-NEXT:    vmov r0, r1, d16
122; CHECK-NEXT:    mov pc, lr
123	%tmp1 = load <2 x float>, <2 x float>* %A
124	%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %tmp1, i32 1)
125	ret <2 x i32> %tmp2
126}
127
128define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind {
129; CHECK-LABEL: vcvt_n_s32tof32:
130; CHECK:       @ %bb.0:
131; CHECK-NEXT:    vldr d16, [r0]
132; CHECK-NEXT:    vcvt.f32.s32 d16, d16, #1
133; CHECK-NEXT:    vmov r0, r1, d16
134; CHECK-NEXT:    mov pc, lr
135	%tmp1 = load <2 x i32>, <2 x i32>* %A
136	%tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
137	ret <2 x float> %tmp2
138}
139
140define <2 x float> @vcvt_n_u32tof32(<2 x i32>* %A) nounwind {
141; CHECK-LABEL: vcvt_n_u32tof32:
142; CHECK:       @ %bb.0:
143; CHECK-NEXT:    vldr d16, [r0]
144; CHECK-NEXT:    vcvt.f32.u32 d16, d16, #1
145; CHECK-NEXT:    vmov r0, r1, d16
146; CHECK-NEXT:    mov pc, lr
147	%tmp1 = load <2 x i32>, <2 x i32>* %A
148	%tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
149	ret <2 x float> %tmp2
150}
151
152declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) nounwind readnone
153declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) nounwind readnone
154declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
155declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
156
157define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind {
158; CHECK-LABEL: vcvtQ_n_f32tos32:
159; CHECK:       @ %bb.0:
160; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
161; CHECK-NEXT:    vcvt.s32.f32 q8, q8, #1
162; CHECK-NEXT:    vmov r0, r1, d16
163; CHECK-NEXT:    vmov r2, r3, d17
164; CHECK-NEXT:    mov pc, lr
165	%tmp1 = load <4 x float>, <4 x float>* %A
166	%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %tmp1, i32 1)
167	ret <4 x i32> %tmp2
168}
169
170define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind {
171; CHECK-LABEL: vcvtQ_n_f32tou32:
172; CHECK:       @ %bb.0:
173; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
174; CHECK-NEXT:    vcvt.u32.f32 q8, q8, #1
175; CHECK-NEXT:    vmov r0, r1, d16
176; CHECK-NEXT:    vmov r2, r3, d17
177; CHECK-NEXT:    mov pc, lr
178	%tmp1 = load <4 x float>, <4 x float>* %A
179	%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %tmp1, i32 1)
180	ret <4 x i32> %tmp2
181}
182
183define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind {
184; CHECK-LABEL: vcvtQ_n_s32tof32:
185; CHECK:       @ %bb.0:
186; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
187; CHECK-NEXT:    vcvt.f32.s32 q8, q8, #1
188; CHECK-NEXT:    vmov r0, r1, d16
189; CHECK-NEXT:    vmov r2, r3, d17
190; CHECK-NEXT:    mov pc, lr
191	%tmp1 = load <4 x i32>, <4 x i32>* %A
192	%tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
193	ret <4 x float> %tmp2
194}
195
196define <4 x float> @vcvtQ_n_u32tof32(<4 x i32>* %A) nounwind {
197; CHECK-LABEL: vcvtQ_n_u32tof32:
198; CHECK:       @ %bb.0:
199; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
200; CHECK-NEXT:    vcvt.f32.u32 q8, q8, #1
201; CHECK-NEXT:    vmov r0, r1, d16
202; CHECK-NEXT:    vmov r2, r3, d17
203; CHECK-NEXT:    mov pc, lr
204	%tmp1 = load <4 x i32>, <4 x i32>* %A
205	%tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
206	ret <4 x float> %tmp2
207}
208
209declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) nounwind readnone
210declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwind readnone
211declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
212declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
213
214define <4 x float> @vcvt_f16tof32(<4 x i16>* %A) nounwind {
215; CHECK-LABEL: vcvt_f16tof32:
216; CHECK:       @ %bb.0:
217; CHECK-NEXT:    vldr d16, [r0]
218; CHECK-NEXT:    vcvt.f32.f16 q8, d16
219; CHECK-NEXT:    vmov r0, r1, d16
220; CHECK-NEXT:    vmov r2, r3, d17
221; CHECK-NEXT:    mov pc, lr
222	%tmp1 = load <4 x i16>, <4 x i16>* %A
223	%tmp2 = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %tmp1)
224	ret <4 x float> %tmp2
225}
226
227define <4 x i16> @vcvt_f32tof16(<4 x float>* %A) nounwind {
228; CHECK-LABEL: vcvt_f32tof16:
229; CHECK:       @ %bb.0:
230; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
231; CHECK-NEXT:    vcvt.f16.f32 d16, q8
232; CHECK-NEXT:    vmov r0, r1, d16
233; CHECK-NEXT:    mov pc, lr
234	%tmp1 = load <4 x float>, <4 x float>* %A
235	%tmp2 = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %tmp1)
236	ret <4 x i16> %tmp2
237}
238
239declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) nounwind readnone
240declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) nounwind readnone
241
242
243define <4 x i16> @fix_float_to_i16(<4 x float> %in) {
244; CHECK-LABEL: fix_float_to_i16:
245; CHECK:       @ %bb.0:
246; CHECK-NEXT:    vmov d17, r2, r3
247; CHECK-NEXT:    vmov d16, r0, r1
248; CHECK-NEXT:    vcvt.u32.f32 q8, q8, #1
249; CHECK-NEXT:    vmovn.i32 d16, q8
250; CHECK-NEXT:    vmov r0, r1, d16
251; CHECK-NEXT:    mov pc, lr
252
253  %scale = fmul <4 x float> %in, <float 2.0, float 2.0, float 2.0, float 2.0>
254  %conv = fptoui <4 x float> %scale to <4 x i16>
255  ret <4 x i16> %conv
256}
257
258define <2 x i64> @fix_float_to_i64(<2 x float> %in) {
259; CHECK-LABEL: fix_float_to_i64:
260; CHECK:       @ %bb.0:
261; CHECK-NEXT:    .save {r4, lr}
262; CHECK-NEXT:    push {r4, lr}
263; CHECK-NEXT:    .vsave {d8, d9}
264; CHECK-NEXT:    vpush {d8, d9}
265; CHECK-NEXT:    vmov d16, r0, r1
266; CHECK-NEXT:    vadd.f32 d8, d16, d16
267; CHECK-NEXT:    vmov r0, s17
268; CHECK-NEXT:    bl __aeabi_f2ulz
269; CHECK-NEXT:    mov r4, r1
270; CHECK-NEXT:    vmov r1, s16
271; CHECK-NEXT:    vmov.32 d9[0], r0
272; CHECK-NEXT:    mov r0, r1
273; CHECK-NEXT:    bl __aeabi_f2ulz
274; CHECK-NEXT:    vmov.32 d8[0], r0
275; CHECK-NEXT:    vmov.32 d9[1], r4
276; CHECK-NEXT:    vmov.32 d8[1], r1
277; CHECK-NEXT:    vmov r2, r3, d9
278; CHECK-NEXT:    vmov r0, r1, d8
279; CHECK-NEXT:    vpop {d8, d9}
280; CHECK-NEXT:    pop {r4, lr}
281; CHECK-NEXT:    mov pc, lr
282
283  %scale = fmul <2 x float> %in, <float 2.0, float 2.0>
284  %conv = fptoui <2 x float> %scale to <2 x i64>
285  ret <2 x i64> %conv
286}
287
288define <4 x i16> @fix_double_to_i16(<4 x double> %in) {
289; CHECK-LABEL: fix_double_to_i16:
290; CHECK:       @ %bb.0:
291; CHECK-NEXT:    vmov d18, r0, r1
292; CHECK-NEXT:    mov r12, sp
293; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
294; CHECK-NEXT:    vmov d19, r2, r3
295; CHECK-NEXT:    vadd.f64 d18, d18, d18
296; CHECK-NEXT:    vcvt.u32.f64 s0, d18
297; CHECK-NEXT:    vmov r0, s0
298; CHECK-NEXT:    vadd.f64 d20, d16, d16
299; CHECK-NEXT:    vadd.f64 d19, d19, d19
300; CHECK-NEXT:    vadd.f64 d16, d17, d17
301; CHECK-NEXT:    vcvt.u32.f64 s2, d20
302; CHECK-NEXT:    vcvt.u32.f64 s4, d19
303; CHECK-NEXT:    vcvt.u32.f64 s6, d16
304; CHECK-NEXT:    vmov.32 d16[0], r0
305; CHECK-NEXT:    vmov r0, s2
306; CHECK-NEXT:    vmov.32 d17[0], r0
307; CHECK-NEXT:    vmov r0, s4
308; CHECK-NEXT:    vmov.32 d16[1], r0
309; CHECK-NEXT:    vmov r0, s6
310; CHECK-NEXT:    vmov.32 d17[1], r0
311; CHECK-NEXT:    vmovn.i32 d16, q8
312; CHECK-NEXT:    vmov r0, r1, d16
313; CHECK-NEXT:    mov pc, lr
314
315  %scale = fmul <4 x double> %in, <double 2.0, double 2.0, double 2.0, double 2.0>
316  %conv = fptoui <4 x double> %scale to <4 x i16>
317  ret <4 x i16> %conv
318}
319
320define <2 x i64> @fix_double_to_i64(<2 x double> %in) {
321; CHECK-LABEL: fix_double_to_i64:
322; CHECK:       @ %bb.0:
323; CHECK-NEXT:    .save {r4, lr}
324; CHECK-NEXT:    push {r4, lr}
325; CHECK-NEXT:    .vsave {d8, d9}
326; CHECK-NEXT:    vpush {d8, d9}
327; CHECK-NEXT:    vmov d16, r2, r3
328; CHECK-NEXT:    vadd.f64 d16, d16, d16
329; CHECK-NEXT:    vmov r2, r3, d16
330; CHECK-NEXT:    vmov d16, r0, r1
331; CHECK-NEXT:    vadd.f64 d8, d16, d16
332; CHECK-NEXT:    mov r0, r2
333; CHECK-NEXT:    mov r1, r3
334; CHECK-NEXT:    bl __aeabi_d2ulz
335; CHECK-NEXT:    mov r4, r1
336; CHECK-NEXT:    vmov r2, r1, d8
337; CHECK-NEXT:    vmov.32 d9[0], r0
338; CHECK-NEXT:    mov r0, r2
339; CHECK-NEXT:    bl __aeabi_d2ulz
340; CHECK-NEXT:    vmov.32 d8[0], r0
341; CHECK-NEXT:    vmov.32 d9[1], r4
342; CHECK-NEXT:    vmov.32 d8[1], r1
343; CHECK-NEXT:    vmov r2, r3, d9
344; CHECK-NEXT:    vmov r0, r1, d8
345; CHECK-NEXT:    vpop {d8, d9}
346; CHECK-NEXT:    pop {r4, lr}
347; CHECK-NEXT:    mov pc, lr
348  %scale = fmul <2 x double> %in, <double 2.0, double 2.0>
349  %conv = fptoui <2 x double> %scale to <2 x i64>
350  ret <2 x i64> %conv
351}
352
353define i32 @multi_sint(double %c, i32* nocapture %p, i32* nocapture %q) {
354; CHECK-LABEL: multi_sint:
355; CHECK:       @ %bb.0:
356; CHECK-NEXT:    vmov d16, r0, r1
357; CHECK-NEXT:    vcvt.s32.f64 s0, d16
358; CHECK-NEXT:    vstr s0, [r2]
359; CHECK-NEXT:    vcvt.s32.f64 s0, d16
360; CHECK-NEXT:    vcvt.s32.f64 s2, d16
361; CHECK-NEXT:    vmov r0, s2
362; CHECK-NEXT:    vstr s0, [r3]
363; CHECK-NEXT:    mov pc, lr
364  %conv = fptosi double %c to i32
365  store i32 %conv, i32* %p, align 4
366  store i32 %conv, i32* %q, align 4
367  ret i32 %conv
368}
369
370define i32 @multi_uint(double %c, i32* nocapture %p, i32* nocapture %q) {
371; CHECK-LABEL: multi_uint:
372; CHECK:       @ %bb.0:
373; CHECK-NEXT:    vmov d16, r0, r1
374; CHECK-NEXT:    vcvt.u32.f64 s0, d16
375; CHECK-NEXT:    vstr s0, [r2]
376; CHECK-NEXT:    vcvt.u32.f64 s0, d16
377; CHECK-NEXT:    vcvt.u32.f64 s2, d16
378; CHECK-NEXT:    vmov r0, s2
379; CHECK-NEXT:    vstr s0, [r3]
380; CHECK-NEXT:    mov pc, lr
381  %conv = fptoui double %c to i32
382  store i32 %conv, i32* %p, align 4
383  store i32 %conv, i32* %q, align 4
384  ret i32 %conv
385}
386
387define void @double_to_sint_store(double %c, i32* nocapture %p) {
388; CHECK-LABEL: double_to_sint_store:
389; CHECK:       @ %bb.0:
390; CHECK-NEXT:    vmov d16, r0, r1
391; CHECK-NEXT:    vcvt.s32.f64 s0, d16
392; CHECK-NEXT:    vstr s0, [r2]
393; CHECK-NEXT:    mov pc, lr
394  %conv = fptosi double %c to i32
395  store i32 %conv, i32* %p, align 4
396  ret void
397}
398
399define void @double_to_uint_store(double %c, i32* nocapture %p) {
400; CHECK-LABEL: double_to_uint_store:
401; CHECK:       @ %bb.0:
402; CHECK-NEXT:    vmov d16, r0, r1
403; CHECK-NEXT:    vcvt.u32.f64 s0, d16
404; CHECK-NEXT:    vstr s0, [r2]
405; CHECK-NEXT:    mov pc, lr
406  %conv = fptoui double %c to i32
407  store i32 %conv, i32* %p, align 4
408  ret void
409}
410
411define void @float_to_sint_store(float %c, i32* nocapture %p) {
412; CHECK-LABEL: float_to_sint_store:
413; CHECK:       @ %bb.0:
414; CHECK-NEXT:    vmov s0, r0
415; CHECK-NEXT:    vcvt.s32.f32 s0, s0
416; CHECK-NEXT:    vstr s0, [r1]
417; CHECK-NEXT:    mov pc, lr
418  %conv = fptosi float %c to i32
419  store i32 %conv, i32* %p, align 4
420  ret void
421}
422
423define void @float_to_uint_store(float %c, i32* nocapture %p) {
424; CHECK-LABEL: float_to_uint_store:
425; CHECK:       @ %bb.0:
426; CHECK-NEXT:    vmov s0, r0
427; CHECK-NEXT:    vcvt.u32.f32 s0, s0
428; CHECK-NEXT:    vstr s0, [r1]
429; CHECK-NEXT:    mov pc, lr
430  %conv = fptoui float %c to i32
431  store i32 %conv, i32* %p, align 4
432  ret void
433}
434