1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s
2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
3
4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
5; WARN-NOT: warning
6
7; Since UQDEC{B|H|W|D|P} and UQINC{B|H|W|D|P} have identical semantics, the tests for
8;   * @llvm.aarch64.sve.uqinc{b|h|w|d|p}, and
9;   * @llvm.aarch64.sve.uqdec{b|h|w|d|p}
10; should also be identical (with the instruction name being adjusted). When
11; updating this file remember to make similar changes in the file testing the
12; other intrinsic.
13
14;
15; UQDECH (vector)
16;
17
18define <vscale x 8 x i16> @uqdech(<vscale x 8 x i16> %a) {
19; CHECK-LABEL: uqdech:
20; CHECK: uqdech z0.h, pow2
21; CHECK-NEXT: ret
22  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqdech.nxv8i16(<vscale x 8 x i16> %a,
23                                                                  i32 0, i32 1)
24  ret <vscale x 8 x i16> %out
25}
26
27;
28; UQDECW (vector)
29;
30
31define <vscale x 4 x i32> @uqdecw(<vscale x 4 x i32> %a) {
32; CHECK-LABEL: uqdecw:
33; CHECK: uqdecw z0.s, vl1, mul #2
34; CHECK-NEXT: ret
35  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqdecw.nxv4i32(<vscale x 4 x i32> %a,
36                                                                  i32 1, i32 2)
37  ret <vscale x 4 x i32> %out
38}
39
40;
41; UQDECD (vector)
42;
43
44define <vscale x 2 x i64> @uqdecd(<vscale x 2 x i64> %a) {
45; CHECK-LABEL: uqdecd:
46; CHECK: uqdecd z0.d, vl2, mul #3
47; CHECK-NEXT: ret
48  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqdecd.nxv2i64(<vscale x 2 x i64> %a,
49                                                                  i32 2, i32 3)
50  ret <vscale x 2 x i64> %out
51}
52
53;
54; UQDECP (vector)
55;
56
57define <vscale x 8 x i16> @uqdecp_b16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %b) {
58; CHECK-LABEL: uqdecp_b16:
59; CHECK: uqdecp z0.h, p0
60; CHECK-NEXT: ret
61  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqdecp.nxv8i16(<vscale x 8 x i16> %a,
62                                                                  <vscale x 8 x i1> %b)
63  ret <vscale x 8 x i16> %out
64}
65
66define <vscale x 4 x i32> @uqdecp_b32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %b) {
67; CHECK-LABEL: uqdecp_b32:
68; CHECK: uqdecp z0.s, p0
69; CHECK-NEXT: ret
70  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqdecp.nxv4i32(<vscale x 4 x i32> %a,
71                                                                  <vscale x 4 x i1> %b)
72  ret <vscale x 4 x i32> %out
73}
74
75define <vscale x 2 x i64> @uqdecp_b64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %b) {
76; CHECK-LABEL: uqdecp_b64:
77; CHECK: uqdecp z0.d, p0
78; CHECK-NEXT: ret
79  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqdecp.nxv2i64(<vscale x 2 x i64> %a,
80                                                                  <vscale x 2 x i1> %b)
81  ret <vscale x 2 x i64> %out
82}
83
84;
85; UQDECB (scalar)
86;
87
88define i32 @uqdecb_n32(i32 %a) {
89; CHECK-LABEL: uqdecb_n32:
90; CHECK: uqdecb w0, vl3, mul #4
91; CHECK-NEXT: ret
92  %out = call i32 @llvm.aarch64.sve.uqdecb.n32(i32 %a, i32 3, i32 4)
93  ret i32 %out
94}
95
96define i64 @uqdecb_n64(i64 %a) {
97; CHECK-LABEL: uqdecb_n64:
98; CHECK: uqdecb x0, vl4, mul #5
99; CHECK-NEXT: ret
100  %out = call i64 @llvm.aarch64.sve.uqdecb.n64(i64 %a, i32 4, i32 5)
101  ret i64 %out
102}
103
104;
105; UQDECH (scalar)
106;
107
108define i32 @uqdech_n32(i32 %a) {
109; CHECK-LABEL: uqdech_n32:
110; CHECK: uqdech w0, vl5, mul #6
111; CHECK-NEXT: ret
112  %out = call i32 @llvm.aarch64.sve.uqdech.n32(i32 %a, i32 5, i32 6)
113  ret i32 %out
114}
115
116define i64 @uqdech_n64(i64 %a) {
117; CHECK-LABEL: uqdech_n64:
118; CHECK: uqdech x0, vl6, mul #7
119; CHECK-NEXT: ret
120  %out = call i64 @llvm.aarch64.sve.uqdech.n64(i64 %a, i32 6, i32 7)
121  ret i64 %out
122}
123
124;
125; UQDECW (scalar)
126;
127
128define i32 @uqdecw_n32(i32 %a) {
129; CHECK-LABEL: uqdecw_n32:
130; CHECK: uqdecw w0, vl7, mul #8
131; CHECK-NEXT: ret
132  %out = call i32 @llvm.aarch64.sve.uqdecw.n32(i32 %a, i32 7, i32 8)
133  ret i32 %out
134}
135
136define i64 @uqdecw_n64(i64 %a) {
137; CHECK-LABEL: uqdecw_n64:
138; CHECK: uqdecw x0, vl8, mul #9
139; CHECK-NEXT: ret
140  %out = call i64 @llvm.aarch64.sve.uqdecw.n64(i64 %a, i32 8, i32 9)
141  ret i64 %out
142}
143
144;
145; UQDECD (scalar)
146;
147
148define i32 @uqdecd_n32(i32 %a) {
149; CHECK-LABEL: uqdecd_n32:
150; CHECK: uqdecd w0, vl16, mul #10
151; CHECK-NEXT: ret
152  %out = call i32 @llvm.aarch64.sve.uqdecd.n32(i32 %a, i32 9, i32 10)
153  ret i32 %out
154}
155
156define i64 @uqdecd_n64(i64 %a) {
157; CHECK-LABEL: uqdecd_n64:
158; CHECK: uqdecd x0, vl32, mul #11
159; CHECK-NEXT: ret
160  %out = call i64 @llvm.aarch64.sve.uqdecd.n64(i64 %a, i32 10, i32 11)
161  ret i64 %out
162}
163
164;
165; UQDECP (scalar)
166;
167
168define i32 @uqdecp_n32_b8(i32 %a, <vscale x 16 x i1> %b) {
169; CHECK-LABEL: uqdecp_n32_b8:
170; CHECK: uqdecp w0, p0.b
171; CHECK-NEXT: ret
172  %out = call i32 @llvm.aarch64.sve.uqdecp.n32.nxv16i1(i32 %a, <vscale x 16 x i1> %b)
173  ret i32 %out
174}
175
176define i32 @uqdecp_n32_b16(i32 %a, <vscale x 8 x i1> %b) {
177; CHECK-LABEL: uqdecp_n32_b16:
178; CHECK: uqdecp w0, p0.h
179; CHECK-NEXT: ret
180  %out = call i32 @llvm.aarch64.sve.uqdecp.n32.nxv8i1(i32 %a, <vscale x 8 x i1> %b)
181  ret i32 %out
182}
183
184define i32 @uqdecp_n32_b32(i32 %a, <vscale x 4 x i1> %b) {
185; CHECK-LABEL: uqdecp_n32_b32:
186; CHECK: uqdecp w0, p0.s
187; CHECK-NEXT: ret
188  %out = call i32 @llvm.aarch64.sve.uqdecp.n32.nxv4i1(i32 %a, <vscale x 4 x i1> %b)
189  ret i32 %out
190}
191
192define i32 @uqdecp_n32_b64(i32 %a, <vscale x 2 x i1> %b) {
193; CHECK-LABEL: uqdecp_n32_b64:
194; CHECK: uqdecp w0, p0.d
195; CHECK-NEXT: ret
196  %out = call i32 @llvm.aarch64.sve.uqdecp.n32.nxv2i1(i32 %a, <vscale x 2 x i1> %b)
197  ret i32 %out
198}
199
200define i64 @uqdecp_n64_b8(i64 %a, <vscale x 16 x i1> %b) {
201; CHECK-LABEL: uqdecp_n64_b8:
202; CHECK: uqdecp x0, p0.b
203; CHECK-NEXT: ret
204  %out = call i64 @llvm.aarch64.sve.uqdecp.n64.nxv16i1(i64 %a, <vscale x 16 x i1> %b)
205  ret i64 %out
206}
207
208define i64 @uqdecp_n64_b16(i64 %a, <vscale x 8 x i1> %b) {
209; CHECK-LABEL: uqdecp_n64_b16:
210; CHECK: uqdecp x0, p0.h
211; CHECK-NEXT: ret
212  %out = call i64 @llvm.aarch64.sve.uqdecp.n64.nxv8i1(i64 %a, <vscale x 8 x i1> %b)
213  ret i64 %out
214}
215
216define i64 @uqdecp_n64_b32(i64 %a, <vscale x 4 x i1> %b) {
217; CHECK-LABEL: uqdecp_n64_b32:
218; CHECK: uqdecp x0, p0.s
219; CHECK-NEXT: ret
220  %out = call i64 @llvm.aarch64.sve.uqdecp.n64.nxv4i1(i64 %a, <vscale x 4 x i1> %b)
221  ret i64 %out
222}
223
224define i64 @uqdecp_n64_b64(i64 %a, <vscale x 2 x i1> %b) {
225; CHECK-LABEL: uqdecp_n64_b64:
226; CHECK: uqdecp x0, p0.d
227; CHECK-NEXT: ret
228  %out = call i64 @llvm.aarch64.sve.uqdecp.n64.nxv2i1(i64 %a, <vscale x 2 x i1> %b)
229  ret i64 %out
230}
231
232; uqdec{h|w|d}(vector, pattern, multiplier)
233declare <vscale x 8 x i16> @llvm.aarch64.sve.uqdech.nxv8i16(<vscale x 8 x i16>, i32, i32)
234declare <vscale x 4 x i32> @llvm.aarch64.sve.uqdecw.nxv4i32(<vscale x 4 x i32>, i32, i32)
235declare <vscale x 2 x i64> @llvm.aarch64.sve.uqdecd.nxv2i64(<vscale x 2 x i64>, i32, i32)
236
237; uqdec{b|h|w|d}(scalar, pattern, multiplier)
238declare i32 @llvm.aarch64.sve.uqdecb.n32(i32, i32, i32)
239declare i64 @llvm.aarch64.sve.uqdecb.n64(i64, i32, i32)
240declare i32 @llvm.aarch64.sve.uqdech.n32(i32, i32, i32)
241declare i64 @llvm.aarch64.sve.uqdech.n64(i64, i32, i32)
242declare i32 @llvm.aarch64.sve.uqdecw.n32(i32, i32, i32)
243declare i64 @llvm.aarch64.sve.uqdecw.n64(i64, i32, i32)
244declare i32 @llvm.aarch64.sve.uqdecd.n32(i32, i32, i32)
245declare i64 @llvm.aarch64.sve.uqdecd.n64(i64, i32, i32)
246
247; uqdecp(scalar, predicate)
248declare i32 @llvm.aarch64.sve.uqdecp.n32.nxv16i1(i32, <vscale x 16 x i1>)
249declare i32 @llvm.aarch64.sve.uqdecp.n32.nxv8i1(i32, <vscale x 8 x i1>)
250declare i32 @llvm.aarch64.sve.uqdecp.n32.nxv4i1(i32, <vscale x 4 x i1>)
251declare i32 @llvm.aarch64.sve.uqdecp.n32.nxv2i1(i32, <vscale x 2 x i1>)
252
253declare i64 @llvm.aarch64.sve.uqdecp.n64.nxv16i1(i64, <vscale x 16 x i1>)
254declare i64 @llvm.aarch64.sve.uqdecp.n64.nxv8i1(i64, <vscale x 8 x i1>)
255declare i64 @llvm.aarch64.sve.uqdecp.n64.nxv4i1(i64, <vscale x 4 x i1>)
256declare i64 @llvm.aarch64.sve.uqdecp.n64.nxv2i1(i64, <vscale x 2 x i1>)
257
258; uqdecp(vector, predicate)
259declare <vscale x 8 x i16> @llvm.aarch64.sve.uqdecp.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>)
260declare <vscale x 4 x i32> @llvm.aarch64.sve.uqdecp.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>)
261declare <vscale x 2 x i64> @llvm.aarch64.sve.uqdecp.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>)
262