1; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
2
3; CHECK-LABEL: f:
4define float @f(<4 x i16>* nocapture %in) {
5  ; CHECK: vld1
6  ; CHECK: vmovl.u16
7  ; CHECK-NOT: vand
8  %1 = load <4 x i16>, <4 x i16>* %in
9  ; CHECK: vcvt.f32.u32
10  %2 = uitofp <4 x i16> %1 to <4 x float>
11  %3 = extractelement <4 x float> %2, i32 0
12  %4 = extractelement <4 x float> %2, i32 1
13  %5 = extractelement <4 x float> %2, i32 2
14
15  ; CHECK: vadd.f32
16  %6 = fadd float %3, %4
17  %7 = fadd float %6, %5
18
19  ret float %7
20}
21
22; CHECK-LABEL: g:
23define float @g(<4 x i16>* nocapture %in) {
24  ; CHECK: vldr
25  %1 = load <4 x i16>, <4 x i16>* %in
26
27  ; For now we're generating a vmov.16 and a uxth instruction.
28  ; The uxth is redundant, and we should be able to extend without
29  ; having to generate cross-domain copies. Once we can do this
30  ; we should modify the checks below.
31
32  ; CHECK: uxth
33  %2 = extractelement <4 x i16> %1, i32 0
34  ; CHECK: vcvt.f32.u32
35  %3 = uitofp i16 %2 to float
36  ret float %3
37}
38
39; Make sure we generate zext from <4 x i8> to <4 x 32>.
40
41; CHECK-LABEL: h:
42; CHECK: vld1.32
43; CHECK: vmovl.u8 q8, d16
44; CHECK: vmovl.u16 q8, d16
45; CHECK: vmov r0, r1, d16
46; CHECK: vmov r2, r3, d17
47define <4 x i32> @h(<4 x i8> *%in) {
48  %1 = load <4 x i8>, <4 x i8>* %in, align 4
49  %2 = extractelement <4 x i8> %1, i32 0
50  %3 = zext i8 %2 to i32
51  %4 = insertelement <4 x i32> undef, i32 %3, i32 0
52  %5 = extractelement <4 x i8> %1, i32 1
53  %6 = zext i8 %5 to i32
54  %7 = insertelement <4 x i32> %4, i32 %6, i32 1
55  %8 = extractelement <4 x i8> %1, i32 2
56  %9 = zext i8 %8 to i32
57  %10 = insertelement <4 x i32> %7, i32 %9, i32 2
58  %11 = extractelement <4 x i8> %1, i32 3
59  %12 = zext i8 %11 to i32
60  %13 = insertelement <4 x i32> %10, i32 %12, i32 3
61  ret <4 x i32> %13
62}
63