1; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
2
3define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) {
4; CHECK: test_vshr_n_s8
5; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
6  %vshr_n = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
7  ret <8 x i8> %vshr_n
8}
9
10define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) {
11; CHECK: test_vshr_n_s16
12; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
13  %vshr_n = ashr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
14  ret <4 x i16> %vshr_n
15}
16
17define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) {
18; CHECK: test_vshr_n_s32
19; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
20  %vshr_n = ashr <2 x i32> %a, <i32 3, i32 3>
21  ret <2 x i32> %vshr_n
22}
23
24define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) {
25; CHECK: test_vshrq_n_s8
26; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
27  %vshr_n = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
28  ret <16 x i8> %vshr_n
29}
30
31define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) {
32; CHECK: test_vshrq_n_s16
33; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
34  %vshr_n = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
35  ret <8 x i16> %vshr_n
36}
37
38define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) {
39; CHECK: test_vshrq_n_s32
40; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
41  %vshr_n = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
42  ret <4 x i32> %vshr_n
43}
44
45define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) {
46; CHECK: test_vshrq_n_s64
47; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
48  %vshr_n = ashr <2 x i64> %a, <i64 3, i64 3>
49  ret <2 x i64> %vshr_n
50}
51
52define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) {
53; CHECK: test_vshr_n_u8
54; CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
55  %vshr_n = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
56  ret <8 x i8> %vshr_n
57}
58
59define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) {
60; CHECK: test_vshr_n_u16
61; CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
62  %vshr_n = lshr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
63  ret <4 x i16> %vshr_n
64}
65
66define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) {
67; CHECK: test_vshr_n_u32
68; CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
69  %vshr_n = lshr <2 x i32> %a, <i32 3, i32 3>
70  ret <2 x i32> %vshr_n
71}
72
73define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) {
74; CHECK: test_vshrq_n_u8
75; CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
76  %vshr_n = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
77  ret <16 x i8> %vshr_n
78}
79
80define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) {
81; CHECK: test_vshrq_n_u16
82; CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
83  %vshr_n = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
84  ret <8 x i16> %vshr_n
85}
86
87define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) {
88; CHECK: test_vshrq_n_u32
89; CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
90  %vshr_n = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
91  ret <4 x i32> %vshr_n
92}
93
94define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) {
95; CHECK: test_vshrq_n_u64
96; CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
97  %vshr_n = lshr <2 x i64> %a, <i64 3, i64 3>
98  ret <2 x i64> %vshr_n
99}
100
101define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
102; CHECK: test_vsra_n_s8
103; CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
104  %vsra_n = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
105  %1 = add <8 x i8> %vsra_n, %a
106  ret <8 x i8> %1
107}
108
109define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
110; CHECK: test_vsra_n_s16
111; CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
112  %vsra_n = ashr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
113  %1 = add <4 x i16> %vsra_n, %a
114  ret <4 x i16> %1
115}
116
117define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
118; CHECK: test_vsra_n_s32
119; CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
120  %vsra_n = ashr <2 x i32> %b, <i32 3, i32 3>
121  %1 = add <2 x i32> %vsra_n, %a
122  ret <2 x i32> %1
123}
124
125define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
126; CHECK: test_vsraq_n_s8
127; CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
128  %vsra_n = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
129  %1 = add <16 x i8> %vsra_n, %a
130  ret <16 x i8> %1
131}
132
133define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
134; CHECK: test_vsraq_n_s16
135; CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
136  %vsra_n = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
137  %1 = add <8 x i16> %vsra_n, %a
138  ret <8 x i16> %1
139}
140
141define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
142; CHECK: test_vsraq_n_s32
143; CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
144  %vsra_n = ashr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
145  %1 = add <4 x i32> %vsra_n, %a
146  ret <4 x i32> %1
147}
148
149define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
150; CHECK: test_vsraq_n_s64
151; CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
152  %vsra_n = ashr <2 x i64> %b, <i64 3, i64 3>
153  %1 = add <2 x i64> %vsra_n, %a
154  ret <2 x i64> %1
155}
156
157define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
158; CHECK: test_vsra_n_u8
159; CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
160  %vsra_n = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
161  %1 = add <8 x i8> %vsra_n, %a
162  ret <8 x i8> %1
163}
164
165define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
166; CHECK: test_vsra_n_u16
167; CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
168  %vsra_n = lshr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
169  %1 = add <4 x i16> %vsra_n, %a
170  ret <4 x i16> %1
171}
172
173define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
174; CHECK: test_vsra_n_u32
175; CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
176  %vsra_n = lshr <2 x i32> %b, <i32 3, i32 3>
177  %1 = add <2 x i32> %vsra_n, %a
178  ret <2 x i32> %1
179}
180
181define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
182; CHECK: test_vsraq_n_u8
183; CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
184  %vsra_n = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
185  %1 = add <16 x i8> %vsra_n, %a
186  ret <16 x i8> %1
187}
188
189define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
190; CHECK: test_vsraq_n_u16
191; CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
192  %vsra_n = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
193  %1 = add <8 x i16> %vsra_n, %a
194  ret <8 x i16> %1
195}
196
197define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
198; CHECK: test_vsraq_n_u32
199; CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
200  %vsra_n = lshr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
201  %1 = add <4 x i32> %vsra_n, %a
202  ret <4 x i32> %1
203}
204
205define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
206; CHECK: test_vsraq_n_u64
207; CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
208  %vsra_n = lshr <2 x i64> %b, <i64 3, i64 3>
209  %1 = add <2 x i64> %vsra_n, %a
210  ret <2 x i64> %1
211}
212
213define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) {
214; CHECK: test_vshrn_n_s16
215; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
216  %1 = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
217  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
218  ret <8 x i8> %vshrn_n
219}
220
221define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) {
222; CHECK: test_vshrn_n_s32
223; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
224  %1 = ashr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
225  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
226  ret <4 x i16> %vshrn_n
227}
228
229define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) {
230; CHECK: test_vshrn_n_s64
231; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
232  %1 = ashr <2 x i64> %a, <i64 19, i64 19>
233  %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
234  ret <2 x i32> %vshrn_n
235}
236
237define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) {
238; CHECK: test_vshrn_n_u16
239; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
240  %1 = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
241  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
242  ret <8 x i8> %vshrn_n
243}
244
245define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) {
246; CHECK: test_vshrn_n_u32
247; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
248  %1 = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
249  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
250  ret <4 x i16> %vshrn_n
251}
252
253define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) {
254; CHECK: test_vshrn_n_u64
255; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
256  %1 = lshr <2 x i64> %a, <i64 19, i64 19>
257  %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
258  ret <2 x i32> %vshrn_n
259}
260
261define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
262; CHECK: test_vshrn_high_n_s16
263; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
264  %1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
265  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
266  %2 = bitcast <8 x i8> %a to <1 x i64>
267  %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
268  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
269  %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
270  ret <16 x i8> %4
271}
272
273define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
274; CHECK: test_vshrn_high_n_s32
275; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
276  %1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
277  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
278  %2 = bitcast <4 x i16> %a to <1 x i64>
279  %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
280  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
281  %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
282  ret <8 x i16> %4
283}
284
285define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
286; CHECK: test_vshrn_high_n_s64
287; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
288  %1 = bitcast <2 x i32> %a to <1 x i64>
289  %2 = ashr <2 x i64> %b, <i64 19, i64 19>
290  %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
291  %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
292  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
293  %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
294  ret <4 x i32> %4
295}
296
297define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
298; CHECK: test_vshrn_high_n_u16
299; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
300  %1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
301  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
302  %2 = bitcast <8 x i8> %a to <1 x i64>
303  %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
304  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
305  %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
306  ret <16 x i8> %4
307}
308
309define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
310; CHECK: test_vshrn_high_n_u32
311; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
312  %1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
313  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
314  %2 = bitcast <4 x i16> %a to <1 x i64>
315  %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
316  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
317  %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
318  ret <8 x i16> %4
319}
320
321define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
322; CHECK: test_vshrn_high_n_u64
323; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
324  %1 = bitcast <2 x i32> %a to <1 x i64>
325  %2 = lshr <2 x i64> %b, <i64 19, i64 19>
326  %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
327  %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
328  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
329  %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
330  ret <4 x i32> %4
331}
332
333define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
334; CHECK: test_vqshrun_high_n_s16
335; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
336  %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %b, i32 3)
337  %1 = bitcast <8 x i8> %a to <1 x i64>
338  %2 = bitcast <8 x i8> %vqshrun to <1 x i64>
339  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
340  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
341  ret <16 x i8> %3
342}
343
344define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
345; CHECK: test_vqshrun_high_n_s32
346; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
347  %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %b, i32 9)
348  %1 = bitcast <4 x i16> %a to <1 x i64>
349  %2 = bitcast <4 x i16> %vqshrun to <1 x i64>
350  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
351  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
352  ret <8 x i16> %3
353}
354
355define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
356; CHECK: test_vqshrun_high_n_s64
357; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
358  %1 = bitcast <2 x i32> %a to <1 x i64>
359  %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %b, i32 19)
360  %2 = bitcast <2 x i32> %vqshrun to <1 x i64>
361  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
362  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
363  ret <4 x i32> %3
364}
365
366define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
367; CHECK: test_vrshrn_high_n_s16
368; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
369  %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %b, i32 3)
370  %1 = bitcast <8 x i8> %a to <1 x i64>
371  %2 = bitcast <8 x i8> %vrshrn to <1 x i64>
372  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
373  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
374  ret <16 x i8> %3
375}
376
377define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
378; CHECK: test_vrshrn_high_n_s32
379; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
380  %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 9)
381  %1 = bitcast <4 x i16> %a to <1 x i64>
382  %2 = bitcast <4 x i16> %vrshrn to <1 x i64>
383  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
384  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
385  ret <8 x i16> %3
386}
387
388define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
389; CHECK: test_vrshrn_high_n_s64
390; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
391  %1 = bitcast <2 x i32> %a to <1 x i64>
392  %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %b, i32 19)
393  %2 = bitcast <2 x i32> %vrshrn to <1 x i64>
394  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
395  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
396  ret <4 x i32> %3
397}
398
399define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
400; CHECK: test_vqrshrun_high_n_s16
401; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
402  %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %b, i32 3)
403  %1 = bitcast <8 x i8> %a to <1 x i64>
404  %2 = bitcast <8 x i8> %vqrshrun to <1 x i64>
405  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
406  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
407  ret <16 x i8> %3
408}
409
410define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
411; CHECK: test_vqrshrun_high_n_s32
412; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
413  %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %b, i32 9)
414  %1 = bitcast <4 x i16> %a to <1 x i64>
415  %2 = bitcast <4 x i16> %vqrshrun to <1 x i64>
416  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
417  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
418  ret <8 x i16> %3
419}
420
421define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
422; CHECK: test_vqrshrun_high_n_s64
423; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
424  %1 = bitcast <2 x i32> %a to <1 x i64>
425  %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %b, i32 19)
426  %2 = bitcast <2 x i32> %vqrshrun to <1 x i64>
427  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
428  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
429  ret <4 x i32> %3
430}
431
432define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
433; CHECK: test_vqshrn_high_n_s16
434; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
435  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %b, i32 3)
436  %1 = bitcast <8 x i8> %a to <1 x i64>
437  %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
438  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
439  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
440  ret <16 x i8> %3
441}
442
443define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
444; CHECK: test_vqshrn_high_n_s32
445; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
446  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %b, i32 9)
447  %1 = bitcast <4 x i16> %a to <1 x i64>
448  %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
449  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
450  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
451  ret <8 x i16> %3
452}
453
454define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
455; CHECK: test_vqshrn_high_n_s64
456; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
457  %1 = bitcast <2 x i32> %a to <1 x i64>
458  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %b, i32 19)
459  %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
460  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
461  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
462  ret <4 x i32> %3
463}
464
465define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
466; CHECK: test_vqshrn_high_n_u16
467; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
468  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %b, i32 3)
469  %1 = bitcast <8 x i8> %a to <1 x i64>
470  %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
471  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
472  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
473  ret <16 x i8> %3
474}
475
476define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
477; CHECK: test_vqshrn_high_n_u32
478; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
479  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %b, i32 9)
480  %1 = bitcast <4 x i16> %a to <1 x i64>
481  %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
482  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
483  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
484  ret <8 x i16> %3
485}
486
487define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
488; CHECK: test_vqshrn_high_n_u64
489; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
490  %1 = bitcast <2 x i32> %a to <1 x i64>
491  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %b, i32 19)
492  %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
493  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
494  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
495  ret <4 x i32> %3
496}
497
498define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
499; CHECK: test_vqrshrn_high_n_s16
500; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
501  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %b, i32 3)
502  %1 = bitcast <8 x i8> %a to <1 x i64>
503  %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
504  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
505  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
506  ret <16 x i8> %3
507}
508
509define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
510; CHECK: test_vqrshrn_high_n_s32
511; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
512  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %b, i32 9)
513  %1 = bitcast <4 x i16> %a to <1 x i64>
514  %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
515  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
516  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
517  ret <8 x i16> %3
518}
519
520define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
521; CHECK: test_vqrshrn_high_n_s64
522; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
523  %1 = bitcast <2 x i32> %a to <1 x i64>
524  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %b, i32 19)
525  %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
526  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
527  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
528  ret <4 x i32> %3
529}
530
531define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
532; CHECK: test_vqrshrn_high_n_u16
533; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
534  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %b, i32 3)
535  %1 = bitcast <8 x i8> %a to <1 x i64>
536  %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
537  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
538  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
539  ret <16 x i8> %3
540}
541
542define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
543; CHECK: test_vqrshrn_high_n_u32
544; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
545  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %b, i32 9)
546  %1 = bitcast <4 x i16> %a to <1 x i64>
547  %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
548  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
549  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
550  ret <8 x i16> %3
551}
552
553define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
554; CHECK: test_vqrshrn_high_n_u64
555; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
556  %1 = bitcast <2 x i32> %a to <1 x i64>
557  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %b, i32 19)
558  %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
559  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
560  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
561  ret <4 x i32> %3
562}
563
564
565
566declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32)
567
568declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32)
569
570declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32)
571
572declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32)
573
574declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32)
575
576declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32)
577
578declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32)
579
580declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32)
581
582declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32)
583
584declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32)
585
586declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32)
587
588declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32)
589
590declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32)
591
592declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32)
593
594declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32)
595
596declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32)
597
598declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32)
599
600declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32)
601
602declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32)
603
604declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32)
605
606declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32)
607
608declare <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32)
609
610declare <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32)
611
612declare <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32)
613
614declare <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32)
615
616declare <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32)
617
618declare <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32)
619
620declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32)
621
622declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32)
623
624declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32)
625
626declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32)
627
628declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32)
629
630declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32)
631
632define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) {
633; CHECK-LABEL: test_vcvt_n_s64_f64
634; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64
635  %1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64)
636  ret <1 x i64> %1
637}
638
639define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) {
640; CHECK-LABEL: test_vcvt_n_u64_f64
641; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64
642  %1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64)
643  ret <1 x i64> %1
644}
645
646define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) {
647; CHECK-LABEL: test_vcvt_n_f64_s64
648; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
649  %1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
650  ret <1 x double> %1
651}
652
653define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) {
654; CHECK-LABEL: test_vcvt_n_f64_u64
655; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
656  %1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
657  ret <1 x double> %1
658}
659
660declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32)
661declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32)
662declare <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32)
663declare <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32)
664