1; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
2
3; This tests icmp operations that do not map directly to NEON instructions.
4; Not-equal (ne) operations are implemented by VCEQ/VMVN.  Less-than (lt/ult)
5; and less-than-or-equal (le/ule) are implemented by swapping the arguments
6; to VCGT and VCGE.  Test all the operand types for not-equal but only sample
7; the other operations.
8
9define <8 x i8> @vcnei8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
10;CHECK-LABEL: vcnei8:
11;CHECK: vceq.i8
12;CHECK-NEXT: vmvn
13	%tmp1 = load <8 x i8>* %A
14	%tmp2 = load <8 x i8>* %B
15	%tmp3 = icmp ne <8 x i8> %tmp1, %tmp2
16        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
17	ret <8 x i8> %tmp4
18}
19
20define <4 x i16> @vcnei16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
21;CHECK-LABEL: vcnei16:
22;CHECK: vceq.i16
23;CHECK-NEXT: vmvn
24	%tmp1 = load <4 x i16>* %A
25	%tmp2 = load <4 x i16>* %B
26	%tmp3 = icmp ne <4 x i16> %tmp1, %tmp2
27        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
28	ret <4 x i16> %tmp4
29}
30
31define <2 x i32> @vcnei32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
32;CHECK-LABEL: vcnei32:
33;CHECK: vceq.i32
34;CHECK-NEXT: vmvn
35	%tmp1 = load <2 x i32>* %A
36	%tmp2 = load <2 x i32>* %B
37	%tmp3 = icmp ne <2 x i32> %tmp1, %tmp2
38        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
39	ret <2 x i32> %tmp4
40}
41
42define <16 x i8> @vcneQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
43;CHECK-LABEL: vcneQi8:
44;CHECK: vceq.i8
45;CHECK-NEXT: vmvn
46	%tmp1 = load <16 x i8>* %A
47	%tmp2 = load <16 x i8>* %B
48	%tmp3 = icmp ne <16 x i8> %tmp1, %tmp2
49        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
50	ret <16 x i8> %tmp4
51}
52
53define <8 x i16> @vcneQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
54;CHECK-LABEL: vcneQi16:
55;CHECK: vceq.i16
56;CHECK-NEXT: vmvn
57	%tmp1 = load <8 x i16>* %A
58	%tmp2 = load <8 x i16>* %B
59	%tmp3 = icmp ne <8 x i16> %tmp1, %tmp2
60        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
61	ret <8 x i16> %tmp4
62}
63
64define <4 x i32> @vcneQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
65;CHECK-LABEL: vcneQi32:
66;CHECK: vceq.i32
67;CHECK-NEXT: vmvn
68	%tmp1 = load <4 x i32>* %A
69	%tmp2 = load <4 x i32>* %B
70	%tmp3 = icmp ne <4 x i32> %tmp1, %tmp2
71        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
72	ret <4 x i32> %tmp4
73}
74
75define <16 x i8> @vcltQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
76;CHECK-LABEL: vcltQs8:
77;CHECK: vcgt.s8
78	%tmp1 = load <16 x i8>* %A
79	%tmp2 = load <16 x i8>* %B
80	%tmp3 = icmp slt <16 x i8> %tmp1, %tmp2
81        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
82	ret <16 x i8> %tmp4
83}
84
85define <4 x i16> @vcles16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
86;CHECK-LABEL: vcles16:
87;CHECK: vcge.s16
88	%tmp1 = load <4 x i16>* %A
89	%tmp2 = load <4 x i16>* %B
90	%tmp3 = icmp sle <4 x i16> %tmp1, %tmp2
91        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
92	ret <4 x i16> %tmp4
93}
94
95define <4 x i16> @vcltu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
96;CHECK-LABEL: vcltu16:
97;CHECK: vcgt.u16
98	%tmp1 = load <4 x i16>* %A
99	%tmp2 = load <4 x i16>* %B
100	%tmp3 = icmp ult <4 x i16> %tmp1, %tmp2
101        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
102	ret <4 x i16> %tmp4
103}
104
105define <4 x i32> @vcleQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
106;CHECK-LABEL: vcleQu32:
107;CHECK: vcge.u32
108	%tmp1 = load <4 x i32>* %A
109	%tmp2 = load <4 x i32>* %B
110	%tmp3 = icmp ule <4 x i32> %tmp1, %tmp2
111        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
112	ret <4 x i32> %tmp4
113}
114