1; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
2
3; CHECK-LABEL: f:
4define float @f(<4 x i16>* nocapture %in) {
5  ; CHECK: vldr
6  ; CHECK: vmovl.u16
7  %1 = load <4 x i16>* %in
8  ; CHECK: vcvt.f32.u32
9  %2 = uitofp <4 x i16> %1 to <4 x float>
10  %3 = extractelement <4 x float> %2, i32 0
11  %4 = extractelement <4 x float> %2, i32 1
12  %5 = extractelement <4 x float> %2, i32 2
13
14  ; CHECK: vadd.f32
15  %6 = fadd float %3, %4
16  %7 = fadd float %6, %5
17
18  ret float %7
19}
20
21; CHECK-LABEL: g:
22define float @g(<4 x i8>* nocapture %in) {
23; Note: vld1 here is reasonably important. Mixing VFP and NEON
24; instructions is bad on some cores
25  ; CHECK: vld1
26  ; CHECK: vmovl.u8
27  ; CHECK: vmovl.u16
28  %1 = load <4 x i8>* %in
29  ; CHECK: vcvt.f32.u32
30  %2 = uitofp <4 x i8> %1 to <4 x float>
31  %3 = extractelement <4 x float> %2, i32 0
32  %4 = extractelement <4 x float> %2, i32 1
33  %5 = extractelement <4 x float> %2, i32 2
34
35  ; CHECK: vadd.f32
36  %6 = fadd float %3, %4
37  %7 = fadd float %6, %5
38
39  ret float %7
40}
41
42; CHECK-LABEL: h:
43define <4 x i8> @h(<4 x float> %v) {
44  ; CHECK: vcvt.{{[us]}}32.f32
45  ; CHECK: vmovn.i32
46  %1 = fptoui <4 x float> %v to <4 x i8>
47  ret <4 x i8> %1
48}
49
50; CHECK-LABEL: i:
51define <4 x i8> @i(<4 x i8>* %x) {
52; Note: vld1 here is reasonably important. Mixing VFP and NEON
53; instructions is bad on some cores
54  ; CHECK: vld1
55  ; CHECK: vmovl.s8
56  ; CHECK: vmovl.s16
57  ; CHECK: vrecpe
58  ; CHECK: vrecps
59  ; CHECK: vmul
60  ; CHECK: vmovn
61  %1 = load <4 x i8>* %x, align 4
62  %2 = sdiv <4 x i8> zeroinitializer, %1
63  ret <4 x i8> %2
64}
65; CHECK-LABEL: j:
66define <4 x i32> @j(<4 x i8>* %in) nounwind {
67  ; CHECK: vld1
68  ; CHECK: vmovl.u8
69  ; CHECK: vmovl.u16
70  ; CHECK-NOT: vand
71  %1 = load <4 x i8>* %in, align 4
72  %2 = zext <4 x i8> %1 to <4 x i32>
73  ret <4 x i32> %2
74}
75
76