1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
3; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECKBE
4
5define arm_aapcs_vfpcc <8 x i16> @vmovn32_trunc1(<4 x i32> %src1, <4 x i32> %src2) {
6; CHECK-LABEL: vmovn32_trunc1:
7; CHECK:       @ %bb.0: @ %entry
8; CHECK-NEXT:    vmovnt.i32 q0, q1
9; CHECK-NEXT:    bx lr
10;
11; CHECKBE-LABEL: vmovn32_trunc1:
12; CHECKBE:       @ %bb.0: @ %entry
13; CHECKBE-NEXT:    vrev64.32 q2, q1
14; CHECKBE-NEXT:    vrev64.32 q1, q0
15; CHECKBE-NEXT:    vmovnt.i32 q1, q2
16; CHECKBE-NEXT:    vrev64.16 q0, q1
17; CHECKBE-NEXT:    bx lr
18entry:
19  %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> %src2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
20  %out = trunc <8 x i32> %strided.vec to <8 x i16>
21  ret <8 x i16> %out
22}
23
24define arm_aapcs_vfpcc <8 x i16> @vmovn32_trunc2(<4 x i32> %src1, <4 x i32> %src2) {
25; CHECK-LABEL: vmovn32_trunc2:
26; CHECK:       @ %bb.0: @ %entry
27; CHECK-NEXT:    vmovnt.i32 q1, q0
28; CHECK-NEXT:    vmov q0, q1
29; CHECK-NEXT:    bx lr
30;
31; CHECKBE-LABEL: vmovn32_trunc2:
32; CHECKBE:       @ %bb.0: @ %entry
33; CHECKBE-NEXT:    vrev64.32 q2, q0
34; CHECKBE-NEXT:    vrev64.32 q3, q1
35; CHECKBE-NEXT:    vmovnt.i32 q3, q2
36; CHECKBE-NEXT:    vrev64.16 q0, q3
37; CHECKBE-NEXT:    bx lr
38entry:
39  %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> %src2, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3>
40  %out = trunc <8 x i32> %strided.vec to <8 x i16>
41  ret <8 x i16> %out
42}
43
44define arm_aapcs_vfpcc <8 x i16> @vmovn32_trunc3(<4 x i32> %src1) {
45; CHECK-LABEL: vmovn32_trunc3:
46; CHECK:       @ %bb.0: @ %entry
47; CHECK-NEXT:    vmovnt.i32 q0, q0
48; CHECK-NEXT:    bx lr
49;
50; CHECKBE-LABEL: vmovn32_trunc3:
51; CHECKBE:       @ %bb.0: @ %entry
52; CHECKBE-NEXT:    vrev64.32 q1, q0
53; CHECKBE-NEXT:    vmovnt.i32 q1, q1
54; CHECKBE-NEXT:    vrev64.16 q0, q1
55; CHECKBE-NEXT:    bx lr
56entry:
57  %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> undef, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
58  %out = trunc <8 x i32> %strided.vec to <8 x i16>
59  ret <8 x i16> %out
60}
61
62
63define arm_aapcs_vfpcc <16 x i8> @vmovn16_trunc1(<8 x i16> %src1, <8 x i16> %src2) {
64; CHECK-LABEL: vmovn16_trunc1:
65; CHECK:       @ %bb.0: @ %entry
66; CHECK-NEXT:    vmovnt.i16 q0, q1
67; CHECK-NEXT:    bx lr
68;
69; CHECKBE-LABEL: vmovn16_trunc1:
70; CHECKBE:       @ %bb.0: @ %entry
71; CHECKBE-NEXT:    vrev64.16 q2, q1
72; CHECKBE-NEXT:    vrev64.16 q1, q0
73; CHECKBE-NEXT:    vmovnt.i16 q1, q2
74; CHECKBE-NEXT:    vrev64.8 q0, q1
75; CHECKBE-NEXT:    bx lr
76entry:
77  %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> %src2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
78  %out = trunc <16 x i16> %strided.vec to <16 x i8>
79  ret <16 x i8> %out
80}
81
82define arm_aapcs_vfpcc <16 x i8> @vmovn16_trunc2(<8 x i16> %src1, <8 x i16> %src2) {
83; CHECK-LABEL: vmovn16_trunc2:
84; CHECK:       @ %bb.0: @ %entry
85; CHECK-NEXT:    vmovnt.i16 q1, q0
86; CHECK-NEXT:    vmov q0, q1
87; CHECK-NEXT:    bx lr
88;
89; CHECKBE-LABEL: vmovn16_trunc2:
90; CHECKBE:       @ %bb.0: @ %entry
91; CHECKBE-NEXT:    vrev64.16 q2, q0
92; CHECKBE-NEXT:    vrev64.16 q3, q1
93; CHECKBE-NEXT:    vmovnt.i16 q3, q2
94; CHECKBE-NEXT:    vrev64.8 q0, q3
95; CHECKBE-NEXT:    bx lr
96entry:
97  %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> %src2, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
98  %out = trunc <16 x i16> %strided.vec to <16 x i8>
99  ret <16 x i8> %out
100}
101
102define arm_aapcs_vfpcc <16 x i8> @vmovn16_trunc3(<8 x i16> %src1) {
103; CHECK-LABEL: vmovn16_trunc3:
104; CHECK:       @ %bb.0: @ %entry
105; CHECK-NEXT:    vmovnt.i16 q0, q0
106; CHECK-NEXT:    bx lr
107;
108; CHECKBE-LABEL: vmovn16_trunc3:
109; CHECKBE:       @ %bb.0: @ %entry
110; CHECKBE-NEXT:    vrev64.16 q1, q0
111; CHECKBE-NEXT:    vmovnt.i16 q1, q1
112; CHECKBE-NEXT:    vrev64.8 q0, q1
113; CHECKBE-NEXT:    bx lr
114entry:
115  %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
116  %out = trunc <16 x i16> %strided.vec to <16 x i8>
117  ret <16 x i8> %out
118}
119
120
121
122define arm_aapcs_vfpcc <2 x i64> @vmovn64_t1(<2 x i64> %src1, <2 x i64> %src2) {
123; CHECK-LABEL: vmovn64_t1:
124; CHECK:       @ %bb.0: @ %entry
125; CHECK-NEXT:    vmov.f32 s2, s4
126; CHECK-NEXT:    vmov.f32 s3, s5
127; CHECK-NEXT:    bx lr
128;
129; CHECKBE-LABEL: vmovn64_t1:
130; CHECKBE:       @ %bb.0: @ %entry
131; CHECKBE-NEXT:    vmov.f32 s2, s4
132; CHECKBE-NEXT:    vmov.f32 s3, s5
133; CHECKBE-NEXT:    bx lr
134entry:
135  %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 0, i32 2>
136  ret <2 x i64> %out
137}
138
139define arm_aapcs_vfpcc <2 x i64> @vmovn64_t2(<2 x i64> %src1, <2 x i64> %src2) {
140; CHECK-LABEL: vmovn64_t2:
141; CHECK:       @ %bb.0: @ %entry
142; CHECK-NEXT:    vmov.f32 s6, s0
143; CHECK-NEXT:    vmov.f32 s7, s1
144; CHECK-NEXT:    vmov q0, q1
145; CHECK-NEXT:    bx lr
146;
147; CHECKBE-LABEL: vmovn64_t2:
148; CHECKBE:       @ %bb.0: @ %entry
149; CHECKBE-NEXT:    vmov.f32 s6, s0
150; CHECKBE-NEXT:    vmov.f32 s7, s1
151; CHECKBE-NEXT:    vmov q0, q1
152; CHECKBE-NEXT:    bx lr
153entry:
154  %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 2, i32 0>
155  ret <2 x i64> %out
156}
157
158define arm_aapcs_vfpcc <2 x i64> @vmovn64_b1(<2 x i64> %src1, <2 x i64> %src2) {
159; CHECK-LABEL: vmovn64_b1:
160; CHECK:       @ %bb.0: @ %entry
161; CHECK-NEXT:    vmov.f32 s2, s6
162; CHECK-NEXT:    vmov.f32 s3, s7
163; CHECK-NEXT:    bx lr
164;
165; CHECKBE-LABEL: vmovn64_b1:
166; CHECKBE:       @ %bb.0: @ %entry
167; CHECKBE-NEXT:    vmov.f32 s2, s6
168; CHECKBE-NEXT:    vmov.f32 s3, s7
169; CHECKBE-NEXT:    bx lr
170entry:
171  %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 0, i32 3>
172  ret <2 x i64> %out
173}
174
175define arm_aapcs_vfpcc <2 x i64> @vmovn64_b2(<2 x i64> %src1, <2 x i64> %src2) {
176; CHECK-LABEL: vmovn64_b2:
177; CHECK:       @ %bb.0: @ %entry
178; CHECK-NEXT:    vmov.f32 s4, s6
179; CHECK-NEXT:    vmov.f32 s6, s0
180; CHECK-NEXT:    vmov.f32 s5, s7
181; CHECK-NEXT:    vmov.f32 s7, s1
182; CHECK-NEXT:    vmov q0, q1
183; CHECK-NEXT:    bx lr
184;
185; CHECKBE-LABEL: vmovn64_b2:
186; CHECKBE:       @ %bb.0: @ %entry
187; CHECKBE-NEXT:    vmov.f32 s4, s6
188; CHECKBE-NEXT:    vmov.f32 s6, s0
189; CHECKBE-NEXT:    vmov.f32 s5, s7
190; CHECKBE-NEXT:    vmov.f32 s7, s1
191; CHECKBE-NEXT:    vmov q0, q1
192; CHECKBE-NEXT:    bx lr
193entry:
194  %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 3, i32 0>
195  ret <2 x i64> %out
196}
197
198define arm_aapcs_vfpcc <2 x i64> @vmovn64_b3(<2 x i64> %src1, <2 x i64> %src2) {
199; CHECK-LABEL: vmovn64_b3:
200; CHECK:       @ %bb.0: @ %entry
201; CHECK-NEXT:    vmov.f32 s0, s2
202; CHECK-NEXT:    vmov.f32 s2, s4
203; CHECK-NEXT:    vmov.f32 s1, s3
204; CHECK-NEXT:    vmov.f32 s3, s5
205; CHECK-NEXT:    bx lr
206;
207; CHECKBE-LABEL: vmovn64_b3:
208; CHECKBE:       @ %bb.0: @ %entry
209; CHECKBE-NEXT:    vmov.f32 s0, s2
210; CHECKBE-NEXT:    vmov.f32 s2, s4
211; CHECKBE-NEXT:    vmov.f32 s1, s3
212; CHECKBE-NEXT:    vmov.f32 s3, s5
213; CHECKBE-NEXT:    bx lr
214entry:
215  %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 1, i32 2>
216  ret <2 x i64> %out
217}
218
219define arm_aapcs_vfpcc <2 x i64> @vmovn64_b4(<2 x i64> %src1, <2 x i64> %src2) {
220; CHECK-LABEL: vmovn64_b4:
221; CHECK:       @ %bb.0: @ %entry
222; CHECK-NEXT:    vmov.f32 s6, s2
223; CHECK-NEXT:    vmov.f32 s7, s3
224; CHECK-NEXT:    vmov q0, q1
225; CHECK-NEXT:    bx lr
226;
227; CHECKBE-LABEL: vmovn64_b4:
228; CHECKBE:       @ %bb.0: @ %entry
229; CHECKBE-NEXT:    vmov.f32 s6, s2
230; CHECKBE-NEXT:    vmov.f32 s7, s3
231; CHECKBE-NEXT:    vmov q0, q1
232; CHECKBE-NEXT:    bx lr
233entry:
234  %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 2, i32 1>
235  ret <2 x i64> %out
236}
237
238
239
240define arm_aapcs_vfpcc <4 x i32> @vmovn32_t1(<4 x i32> %src1, <4 x i32> %src2) {
241; CHECK-LABEL: vmovn32_t1:
242; CHECK:       @ %bb.0: @ %entry
243; CHECK-NEXT:    vmov.f32 s1, s4
244; CHECK-NEXT:    vmov.f32 s3, s6
245; CHECK-NEXT:    bx lr
246;
247; CHECKBE-LABEL: vmovn32_t1:
248; CHECKBE:       @ %bb.0: @ %entry
249; CHECKBE-NEXT:    vrev64.32 q2, q1
250; CHECKBE-NEXT:    vrev64.32 q1, q0
251; CHECKBE-NEXT:    vmov.f32 s5, s8
252; CHECKBE-NEXT:    vmov.f32 s7, s10
253; CHECKBE-NEXT:    vrev64.32 q0, q1
254; CHECKBE-NEXT:    bx lr
255entry:
256  %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
257  ret <4 x i32> %out
258}
259
260define arm_aapcs_vfpcc <4 x i32> @vmovn32_t2(<4 x i32> %src1, <4 x i32> %src2) {
261; CHECK-LABEL: vmovn32_t2:
262; CHECK:       @ %bb.0: @ %entry
263; CHECK-NEXT:    vmov.f32 s5, s0
264; CHECK-NEXT:    vmov.f32 s7, s2
265; CHECK-NEXT:    vmov q0, q1
266; CHECK-NEXT:    bx lr
267;
268; CHECKBE-LABEL: vmovn32_t2:
269; CHECKBE:       @ %bb.0: @ %entry
270; CHECKBE-NEXT:    vrev64.32 q2, q0
271; CHECKBE-NEXT:    vrev64.32 q3, q1
272; CHECKBE-NEXT:    vmov.f32 s13, s8
273; CHECKBE-NEXT:    vmov.f32 s15, s10
274; CHECKBE-NEXT:    vrev64.32 q0, q3
275; CHECKBE-NEXT:    bx lr
276entry:
277  %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
278  ret <4 x i32> %out
279}
280
281define arm_aapcs_vfpcc <4 x i32> @vmovn32_b1(<4 x i32> %src1, <4 x i32> %src2) {
282; CHECK-LABEL: vmovn32_b1:
283; CHECK:       @ %bb.0: @ %entry
284; CHECK-NEXT:    vmov.f32 s1, s5
285; CHECK-NEXT:    vmov.f32 s3, s7
286; CHECK-NEXT:    bx lr
287;
288; CHECKBE-LABEL: vmovn32_b1:
289; CHECKBE:       @ %bb.0: @ %entry
290; CHECKBE-NEXT:    vrev64.32 q2, q1
291; CHECKBE-NEXT:    vrev64.32 q1, q0
292; CHECKBE-NEXT:    vmov.f32 s5, s9
293; CHECKBE-NEXT:    vmov.f32 s7, s11
294; CHECKBE-NEXT:    vrev64.32 q0, q1
295; CHECKBE-NEXT:    bx lr
296entry:
297  %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
298  ret <4 x i32> %out
299}
300
301define arm_aapcs_vfpcc <4 x i32> @vmovn32_b2(<4 x i32> %src1, <4 x i32> %src2) {
302; CHECK-LABEL: vmovn32_b2:
303; CHECK:       @ %bb.0: @ %entry
304; CHECK-NEXT:    vmov.f32 s4, s5
305; CHECK-NEXT:    vmov.f32 s6, s7
306; CHECK-NEXT:    vmov.f32 s5, s0
307; CHECK-NEXT:    vmov.f32 s7, s2
308; CHECK-NEXT:    vmov q0, q1
309; CHECK-NEXT:    bx lr
310;
311; CHECKBE-LABEL: vmovn32_b2:
312; CHECKBE:       @ %bb.0: @ %entry
313; CHECKBE-NEXT:    vrev64.32 q2, q0
314; CHECKBE-NEXT:    vrev64.32 q0, q1
315; CHECKBE-NEXT:    vmov.f32 s4, s1
316; CHECKBE-NEXT:    vmov.f32 s5, s8
317; CHECKBE-NEXT:    vmov.f32 s6, s3
318; CHECKBE-NEXT:    vmov.f32 s7, s10
319; CHECKBE-NEXT:    vrev64.32 q0, q1
320; CHECKBE-NEXT:    bx lr
321entry:
322  %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 5, i32 0, i32 7, i32 2>
323  ret <4 x i32> %out
324}
325
326define arm_aapcs_vfpcc <4 x i32> @vmovn32_b3(<4 x i32> %src1, <4 x i32> %src2) {
327; CHECK-LABEL: vmovn32_b3:
328; CHECK:       @ %bb.0: @ %entry
329; CHECK-NEXT:    vmov.f32 s0, s1
330; CHECK-NEXT:    vmov.f32 s2, s3
331; CHECK-NEXT:    vmov.f32 s1, s4
332; CHECK-NEXT:    vmov.f32 s3, s6
333; CHECK-NEXT:    bx lr
334;
335; CHECKBE-LABEL: vmovn32_b3:
336; CHECKBE:       @ %bb.0: @ %entry
337; CHECKBE-NEXT:    vrev64.32 q2, q1
338; CHECKBE-NEXT:    vrev64.32 q1, q0
339; CHECKBE-NEXT:    vmov.f32 s4, s5
340; CHECKBE-NEXT:    vmov.f32 s6, s7
341; CHECKBE-NEXT:    vmov.f32 s5, s8
342; CHECKBE-NEXT:    vmov.f32 s7, s10
343; CHECKBE-NEXT:    vrev64.32 q0, q1
344; CHECKBE-NEXT:    bx lr
345entry:
346  %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 1, i32 4, i32 3, i32 6>
347  ret <4 x i32> %out
348}
349
350define arm_aapcs_vfpcc <4 x i32> @vmovn32_b4(<4 x i32> %src1, <4 x i32> %src2) {
351; CHECK-LABEL: vmovn32_b4:
352; CHECK:       @ %bb.0: @ %entry
353; CHECK-NEXT:    vmov.f32 s5, s1
354; CHECK-NEXT:    vmov.f32 s7, s3
355; CHECK-NEXT:    vmov q0, q1
356; CHECK-NEXT:    bx lr
357;
358; CHECKBE-LABEL: vmovn32_b4:
359; CHECKBE:       @ %bb.0: @ %entry
360; CHECKBE-NEXT:    vrev64.32 q2, q0
361; CHECKBE-NEXT:    vrev64.32 q3, q1
362; CHECKBE-NEXT:    vmov.f32 s13, s9
363; CHECKBE-NEXT:    vmov.f32 s15, s11
364; CHECKBE-NEXT:    vrev64.32 q0, q3
365; CHECKBE-NEXT:    bx lr
366entry:
367  %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
368  ret <4 x i32> %out
369}
370
371define arm_aapcs_vfpcc <4 x i32> @vmovn32_single_t(<4 x i32> %src1) {
372; CHECK-LABEL: vmovn32_single_t:
373; CHECK:       @ %bb.0: @ %entry
374; CHECK-NEXT:    vmov.f32 s1, s0
375; CHECK-NEXT:    vmov.f32 s3, s2
376; CHECK-NEXT:    bx lr
377;
378; CHECKBE-LABEL: vmovn32_single_t:
379; CHECKBE:       @ %bb.0: @ %entry
380; CHECKBE-NEXT:    vrev64.32 q1, q0
381; CHECKBE-NEXT:    vmov.f32 s5, s4
382; CHECKBE-NEXT:    vmov.f32 s7, s6
383; CHECKBE-NEXT:    vrev64.32 q0, q1
384; CHECKBE-NEXT:    bx lr
385entry:
386  %out = shufflevector <4 x i32> %src1, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
387  ret <4 x i32> %out
388}
389
390
391
392
393define arm_aapcs_vfpcc <8 x i16> @vmovn16_t1(<8 x i16> %src1, <8 x i16> %src2) {
394; CHECK-LABEL: vmovn16_t1:
395; CHECK:       @ %bb.0: @ %entry
396; CHECK-NEXT:    vmovnt.i32 q0, q1
397; CHECK-NEXT:    bx lr
398;
399; CHECKBE-LABEL: vmovn16_t1:
400; CHECKBE:       @ %bb.0: @ %entry
401; CHECKBE-NEXT:    vrev64.16 q2, q1
402; CHECKBE-NEXT:    vrev64.16 q1, q0
403; CHECKBE-NEXT:    vmovnt.i32 q1, q2
404; CHECKBE-NEXT:    vrev64.16 q0, q1
405; CHECKBE-NEXT:    bx lr
406entry:
407  %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
408  ret <8 x i16> %out
409}
410
411define arm_aapcs_vfpcc <8 x i16> @vmovn16_t2(<8 x i16> %src1, <8 x i16> %src2) {
412; CHECK-LABEL: vmovn16_t2:
413; CHECK:       @ %bb.0: @ %entry
414; CHECK-NEXT:    vmovnt.i32 q1, q0
415; CHECK-NEXT:    vmov q0, q1
416; CHECK-NEXT:    bx lr
417;
418; CHECKBE-LABEL: vmovn16_t2:
419; CHECKBE:       @ %bb.0: @ %entry
420; CHECKBE-NEXT:    vrev64.16 q2, q0
421; CHECKBE-NEXT:    vrev64.16 q3, q1
422; CHECKBE-NEXT:    vmovnt.i32 q3, q2
423; CHECKBE-NEXT:    vrev64.16 q0, q3
424; CHECKBE-NEXT:    bx lr
425entry:
426  %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6>
427  ret <8 x i16> %out
428}
429
430define arm_aapcs_vfpcc <8 x i16> @vmovn16_b1(<8 x i16> %src1, <8 x i16> %src2) {
431; CHECK-LABEL: vmovn16_b1:
432; CHECK:       @ %bb.0: @ %entry
433; CHECK-NEXT:    vmovnb.i32 q1, q0
434; CHECK-NEXT:    vmov q0, q1
435; CHECK-NEXT:    bx lr
436;
437; CHECKBE-LABEL: vmovn16_b1:
438; CHECKBE:       @ %bb.0: @ %entry
439; CHECKBE-NEXT:    vrev64.16 q2, q0
440; CHECKBE-NEXT:    vrev64.16 q3, q1
441; CHECKBE-NEXT:    vmovnb.i32 q3, q2
442; CHECKBE-NEXT:    vrev64.16 q0, q3
443; CHECKBE-NEXT:    bx lr
444entry:
445  %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
446  ret <8 x i16> %out
447}
448
449define arm_aapcs_vfpcc <8 x i16> @vmovn16_b2(<8 x i16> %src1, <8 x i16> %src2) {
450; CHECK-LABEL: vmovn16_b2:
451; CHECK:       @ %bb.0: @ %entry
452; CHECK-NEXT:    vmovx.f16 s5, s5
453; CHECK-NEXT:    vmovx.f16 s4, s4
454; CHECK-NEXT:    vmovx.f16 s6, s6
455; CHECK-NEXT:    vmovx.f16 s7, s7
456; CHECK-NEXT:    vins.f16 s5, s1
457; CHECK-NEXT:    vins.f16 s4, s0
458; CHECK-NEXT:    vins.f16 s6, s2
459; CHECK-NEXT:    vins.f16 s7, s3
460; CHECK-NEXT:    vmov q0, q1
461; CHECK-NEXT:    bx lr
462;
463; CHECKBE-LABEL: vmovn16_b2:
464; CHECKBE:       @ %bb.0: @ %entry
465; CHECKBE-NEXT:    vrev64.16 q2, q0
466; CHECKBE-NEXT:    vrev64.16 q0, q1
467; CHECKBE-NEXT:    vmovx.f16 s5, s1
468; CHECKBE-NEXT:    vmovx.f16 s4, s0
469; CHECKBE-NEXT:    vmovx.f16 s6, s2
470; CHECKBE-NEXT:    vmovx.f16 s7, s3
471; CHECKBE-NEXT:    vins.f16 s5, s9
472; CHECKBE-NEXT:    vins.f16 s4, s8
473; CHECKBE-NEXT:    vins.f16 s6, s10
474; CHECKBE-NEXT:    vins.f16 s7, s11
475; CHECKBE-NEXT:    vrev64.16 q0, q1
476; CHECKBE-NEXT:    bx lr
477entry:
478  %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 9, i32 0, i32 11, i32 2, i32 13, i32 4, i32 15, i32 6>
479  ret <8 x i16> %out
480}
481
482define arm_aapcs_vfpcc <8 x i16> @vmovn16_b3(<8 x i16> %src1, <8 x i16> %src2) {
483; CHECK-LABEL: vmovn16_b3:
484; CHECK:       @ %bb.0: @ %entry
485; CHECK-NEXT:    vmovx.f16 s1, s1
486; CHECK-NEXT:    vmovx.f16 s0, s0
487; CHECK-NEXT:    vmovx.f16 s2, s2
488; CHECK-NEXT:    vmovx.f16 s3, s3
489; CHECK-NEXT:    vins.f16 s1, s5
490; CHECK-NEXT:    vins.f16 s0, s4
491; CHECK-NEXT:    vins.f16 s2, s6
492; CHECK-NEXT:    vins.f16 s3, s7
493; CHECK-NEXT:    bx lr
494;
495; CHECKBE-LABEL: vmovn16_b3:
496; CHECKBE:       @ %bb.0: @ %entry
497; CHECKBE-NEXT:    vrev64.16 q2, q1
498; CHECKBE-NEXT:    vrev64.16 q1, q0
499; CHECKBE-NEXT:    vmovx.f16 s5, s5
500; CHECKBE-NEXT:    vmovx.f16 s4, s4
501; CHECKBE-NEXT:    vmovx.f16 s6, s6
502; CHECKBE-NEXT:    vmovx.f16 s7, s7
503; CHECKBE-NEXT:    vins.f16 s5, s9
504; CHECKBE-NEXT:    vins.f16 s4, s8
505; CHECKBE-NEXT:    vins.f16 s6, s10
506; CHECKBE-NEXT:    vins.f16 s7, s11
507; CHECKBE-NEXT:    vrev64.16 q0, q1
508; CHECKBE-NEXT:    bx lr
509entry:
510  %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 1, i32 8, i32 3, i32 10, i32 5, i32 12, i32 7, i32 14>
511  ret <8 x i16> %out
512}
513
514define arm_aapcs_vfpcc <8 x i16> @vmovn16_b4(<8 x i16> %src1, <8 x i16> %src2) {
515; CHECK-LABEL: vmovn16_b4:
516; CHECK:       @ %bb.0: @ %entry
517; CHECK-NEXT:    vmovnb.i32 q0, q1
518; CHECK-NEXT:    bx lr
519;
520; CHECKBE-LABEL: vmovn16_b4:
521; CHECKBE:       @ %bb.0: @ %entry
522; CHECKBE-NEXT:    vrev64.16 q2, q1
523; CHECKBE-NEXT:    vrev64.16 q1, q0
524; CHECKBE-NEXT:    vmovnb.i32 q1, q2
525; CHECKBE-NEXT:    vrev64.16 q0, q1
526; CHECKBE-NEXT:    bx lr
527entry:
528  %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
529  ret <8 x i16> %out
530}
531
532define arm_aapcs_vfpcc <8 x i16> @vmovn16_single_t(<8 x i16> %src1) {
533; CHECK-LABEL: vmovn16_single_t:
534; CHECK:       @ %bb.0: @ %entry
535; CHECK-NEXT:    vmovnt.i32 q0, q0
536; CHECK-NEXT:    bx lr
537;
538; CHECKBE-LABEL: vmovn16_single_t:
539; CHECKBE:       @ %bb.0: @ %entry
540; CHECKBE-NEXT:    vrev64.16 q1, q0
541; CHECKBE-NEXT:    vmovnt.i32 q1, q1
542; CHECKBE-NEXT:    vrev64.16 q0, q1
543; CHECKBE-NEXT:    bx lr
544entry:
545  %out = shufflevector <8 x i16> %src1, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
546  ret <8 x i16> %out
547}
548
549
550define arm_aapcs_vfpcc <16 x i8> @vmovn8_b1(<16 x i8> %src1, <16 x i8> %src2) {
551; CHECK-LABEL: vmovn8_b1:
552; CHECK:       @ %bb.0: @ %entry
553; CHECK-NEXT:    vmovnt.i16 q0, q1
554; CHECK-NEXT:    bx lr
555;
556; CHECKBE-LABEL: vmovn8_b1:
557; CHECKBE:       @ %bb.0: @ %entry
558; CHECKBE-NEXT:    vrev64.8 q2, q1
559; CHECKBE-NEXT:    vrev64.8 q1, q0
560; CHECKBE-NEXT:    vmovnt.i16 q1, q2
561; CHECKBE-NEXT:    vrev64.8 q0, q1
562; CHECKBE-NEXT:    bx lr
563entry:
564  %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
565  ret <16 x i8> %out
566}
567
568define arm_aapcs_vfpcc <16 x i8> @vmovn8_b2(<16 x i8> %src1, <16 x i8> %src2) {
569; CHECK-LABEL: vmovn8_b2:
570; CHECK:       @ %bb.0: @ %entry
571; CHECK-NEXT:    vmovnt.i16 q1, q0
572; CHECK-NEXT:    vmov q0, q1
573; CHECK-NEXT:    bx lr
574;
575; CHECKBE-LABEL: vmovn8_b2:
576; CHECKBE:       @ %bb.0: @ %entry
577; CHECKBE-NEXT:    vrev64.8 q2, q0
578; CHECKBE-NEXT:    vrev64.8 q3, q1
579; CHECKBE-NEXT:    vmovnt.i16 q3, q2
580; CHECKBE-NEXT:    vrev64.8 q0, q3
581; CHECKBE-NEXT:    bx lr
582entry:
583  %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 16, i32 0, i32 18, i32 2, i32 20, i32 4, i32 22, i32 6, i32 24, i32 8, i32 26, i32 10, i32 28, i32 12, i32 30, i32 14>
584  ret <16 x i8> %out
585}
586
587define arm_aapcs_vfpcc <16 x i8> @vmovn8_t1(<16 x i8> %src1, <16 x i8> %src2) {
588; CHECK-LABEL: vmovn8_t1:
589; CHECK:       @ %bb.0: @ %entry
590; CHECK-NEXT:    vmovnb.i16 q1, q0
591; CHECK-NEXT:    vmov q0, q1
592; CHECK-NEXT:    bx lr
593;
594; CHECKBE-LABEL: vmovn8_t1:
595; CHECKBE:       @ %bb.0: @ %entry
596; CHECKBE-NEXT:    vrev64.8 q2, q0
597; CHECKBE-NEXT:    vrev64.8 q3, q1
598; CHECKBE-NEXT:    vmovnb.i16 q3, q2
599; CHECKBE-NEXT:    vrev64.8 q0, q3
600; CHECKBE-NEXT:    bx lr
601entry:
602  %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
603  ret <16 x i8> %out
604}
605
606define arm_aapcs_vfpcc <16 x i8> @vmovn8_t2(<16 x i8> %src1, <16 x i8> %src2) {
607; CHECK-LABEL: vmovn8_t2:
608; CHECK:       @ %bb.0: @ %entry
609; CHECK-NEXT:    vmov q2, q0
610; CHECK-NEXT:    vmov.u8 r0, q1[1]
611; CHECK-NEXT:    vmov.8 q0[0], r0
612; CHECK-NEXT:    vmov.u8 r0, q2[0]
613; CHECK-NEXT:    vmov.8 q0[1], r0
614; CHECK-NEXT:    vmov.u8 r0, q1[3]
615; CHECK-NEXT:    vmov.8 q0[2], r0
616; CHECK-NEXT:    vmov.u8 r0, q2[2]
617; CHECK-NEXT:    vmov.8 q0[3], r0
618; CHECK-NEXT:    vmov.u8 r0, q1[5]
619; CHECK-NEXT:    vmov.8 q0[4], r0
620; CHECK-NEXT:    vmov.u8 r0, q2[4]
621; CHECK-NEXT:    vmov.8 q0[5], r0
622; CHECK-NEXT:    vmov.u8 r0, q1[7]
623; CHECK-NEXT:    vmov.8 q0[6], r0
624; CHECK-NEXT:    vmov.u8 r0, q2[6]
625; CHECK-NEXT:    vmov.8 q0[7], r0
626; CHECK-NEXT:    vmov.u8 r0, q1[9]
627; CHECK-NEXT:    vmov.8 q0[8], r0
628; CHECK-NEXT:    vmov.u8 r0, q2[8]
629; CHECK-NEXT:    vmov.8 q0[9], r0
630; CHECK-NEXT:    vmov.u8 r0, q1[11]
631; CHECK-NEXT:    vmov.8 q0[10], r0
632; CHECK-NEXT:    vmov.u8 r0, q2[10]
633; CHECK-NEXT:    vmov.8 q0[11], r0
634; CHECK-NEXT:    vmov.u8 r0, q1[13]
635; CHECK-NEXT:    vmov.8 q0[12], r0
636; CHECK-NEXT:    vmov.u8 r0, q2[12]
637; CHECK-NEXT:    vmov.8 q0[13], r0
638; CHECK-NEXT:    vmov.u8 r0, q1[15]
639; CHECK-NEXT:    vmov.8 q0[14], r0
640; CHECK-NEXT:    vmov.u8 r0, q2[14]
641; CHECK-NEXT:    vmov.8 q0[15], r0
642; CHECK-NEXT:    bx lr
643;
644; CHECKBE-LABEL: vmovn8_t2:
645; CHECKBE:       @ %bb.0: @ %entry
646; CHECKBE-NEXT:    vrev64.8 q2, q1
647; CHECKBE-NEXT:    vrev64.8 q3, q0
648; CHECKBE-NEXT:    vmov.u8 r0, q2[1]
649; CHECKBE-NEXT:    vmov.8 q1[0], r0
650; CHECKBE-NEXT:    vmov.u8 r0, q3[0]
651; CHECKBE-NEXT:    vmov.8 q1[1], r0
652; CHECKBE-NEXT:    vmov.u8 r0, q2[3]
653; CHECKBE-NEXT:    vmov.8 q1[2], r0
654; CHECKBE-NEXT:    vmov.u8 r0, q3[2]
655; CHECKBE-NEXT:    vmov.8 q1[3], r0
656; CHECKBE-NEXT:    vmov.u8 r0, q2[5]
657; CHECKBE-NEXT:    vmov.8 q1[4], r0
658; CHECKBE-NEXT:    vmov.u8 r0, q3[4]
659; CHECKBE-NEXT:    vmov.8 q1[5], r0
660; CHECKBE-NEXT:    vmov.u8 r0, q2[7]
661; CHECKBE-NEXT:    vmov.8 q1[6], r0
662; CHECKBE-NEXT:    vmov.u8 r0, q3[6]
663; CHECKBE-NEXT:    vmov.8 q1[7], r0
664; CHECKBE-NEXT:    vmov.u8 r0, q2[9]
665; CHECKBE-NEXT:    vmov.8 q1[8], r0
666; CHECKBE-NEXT:    vmov.u8 r0, q3[8]
667; CHECKBE-NEXT:    vmov.8 q1[9], r0
668; CHECKBE-NEXT:    vmov.u8 r0, q2[11]
669; CHECKBE-NEXT:    vmov.8 q1[10], r0
670; CHECKBE-NEXT:    vmov.u8 r0, q3[10]
671; CHECKBE-NEXT:    vmov.8 q1[11], r0
672; CHECKBE-NEXT:    vmov.u8 r0, q2[13]
673; CHECKBE-NEXT:    vmov.8 q1[12], r0
674; CHECKBE-NEXT:    vmov.u8 r0, q3[12]
675; CHECKBE-NEXT:    vmov.8 q1[13], r0
676; CHECKBE-NEXT:    vmov.u8 r0, q2[15]
677; CHECKBE-NEXT:    vmov.8 q1[14], r0
678; CHECKBE-NEXT:    vmov.u8 r0, q3[14]
679; CHECKBE-NEXT:    vmov.8 q1[15], r0
680; CHECKBE-NEXT:    vrev64.8 q0, q1
681; CHECKBE-NEXT:    bx lr
682entry:
683  %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 17, i32 0, i32 19, i32 2, i32 21, i32 4, i32 23, i32 6, i32 25, i32 8, i32 27, i32 10, i32 29, i32 12, i32 31, i32 14>
684  ret <16 x i8> %out
685}
686
687define arm_aapcs_vfpcc <16 x i8> @vmovn8_t3(<16 x i8> %src1, <16 x i8> %src2) {
688; CHECK-LABEL: vmovn8_t3:
689; CHECK:       @ %bb.0: @ %entry
690; CHECK-NEXT:    vmov.u8 r0, q0[1]
691; CHECK-NEXT:    vmov q2, q0
692; CHECK-NEXT:    vmov.8 q0[0], r0
693; CHECK-NEXT:    vmov.u8 r0, q1[0]
694; CHECK-NEXT:    vmov.8 q0[1], r0
695; CHECK-NEXT:    vmov.u8 r0, q2[3]
696; CHECK-NEXT:    vmov.8 q0[2], r0
697; CHECK-NEXT:    vmov.u8 r0, q1[2]
698; CHECK-NEXT:    vmov.8 q0[3], r0
699; CHECK-NEXT:    vmov.u8 r0, q2[5]
700; CHECK-NEXT:    vmov.8 q0[4], r0
701; CHECK-NEXT:    vmov.u8 r0, q1[4]
702; CHECK-NEXT:    vmov.8 q0[5], r0
703; CHECK-NEXT:    vmov.u8 r0, q2[7]
704; CHECK-NEXT:    vmov.8 q0[6], r0
705; CHECK-NEXT:    vmov.u8 r0, q1[6]
706; CHECK-NEXT:    vmov.8 q0[7], r0
707; CHECK-NEXT:    vmov.u8 r0, q2[9]
708; CHECK-NEXT:    vmov.8 q0[8], r0
709; CHECK-NEXT:    vmov.u8 r0, q1[8]
710; CHECK-NEXT:    vmov.8 q0[9], r0
711; CHECK-NEXT:    vmov.u8 r0, q2[11]
712; CHECK-NEXT:    vmov.8 q0[10], r0
713; CHECK-NEXT:    vmov.u8 r0, q1[10]
714; CHECK-NEXT:    vmov.8 q0[11], r0
715; CHECK-NEXT:    vmov.u8 r0, q2[13]
716; CHECK-NEXT:    vmov.8 q0[12], r0
717; CHECK-NEXT:    vmov.u8 r0, q1[12]
718; CHECK-NEXT:    vmov.8 q0[13], r0
719; CHECK-NEXT:    vmov.u8 r0, q2[15]
720; CHECK-NEXT:    vmov.8 q0[14], r0
721; CHECK-NEXT:    vmov.u8 r0, q1[14]
722; CHECK-NEXT:    vmov.8 q0[15], r0
723; CHECK-NEXT:    bx lr
724;
725; CHECKBE-LABEL: vmovn8_t3:
726; CHECKBE:       @ %bb.0: @ %entry
727; CHECKBE-NEXT:    vrev64.8 q3, q0
728; CHECKBE-NEXT:    vrev64.8 q0, q1
729; CHECKBE-NEXT:    vmov.u8 r0, q3[1]
730; CHECKBE-NEXT:    vmov.8 q2[0], r0
731; CHECKBE-NEXT:    vmov.u8 r0, q0[0]
732; CHECKBE-NEXT:    vmov.8 q2[1], r0
733; CHECKBE-NEXT:    vmov.u8 r0, q3[3]
734; CHECKBE-NEXT:    vmov.8 q2[2], r0
735; CHECKBE-NEXT:    vmov.u8 r0, q0[2]
736; CHECKBE-NEXT:    vmov.8 q2[3], r0
737; CHECKBE-NEXT:    vmov.u8 r0, q3[5]
738; CHECKBE-NEXT:    vmov.8 q2[4], r0
739; CHECKBE-NEXT:    vmov.u8 r0, q0[4]
740; CHECKBE-NEXT:    vmov.8 q2[5], r0
741; CHECKBE-NEXT:    vmov.u8 r0, q3[7]
742; CHECKBE-NEXT:    vmov.8 q2[6], r0
743; CHECKBE-NEXT:    vmov.u8 r0, q0[6]
744; CHECKBE-NEXT:    vmov.8 q2[7], r0
745; CHECKBE-NEXT:    vmov.u8 r0, q3[9]
746; CHECKBE-NEXT:    vmov.8 q2[8], r0
747; CHECKBE-NEXT:    vmov.u8 r0, q0[8]
748; CHECKBE-NEXT:    vmov.8 q2[9], r0
749; CHECKBE-NEXT:    vmov.u8 r0, q3[11]
750; CHECKBE-NEXT:    vmov.8 q2[10], r0
751; CHECKBE-NEXT:    vmov.u8 r0, q0[10]
752; CHECKBE-NEXT:    vmov.8 q2[11], r0
753; CHECKBE-NEXT:    vmov.u8 r0, q3[13]
754; CHECKBE-NEXT:    vmov.8 q2[12], r0
755; CHECKBE-NEXT:    vmov.u8 r0, q0[12]
756; CHECKBE-NEXT:    vmov.8 q2[13], r0
757; CHECKBE-NEXT:    vmov.u8 r0, q3[15]
758; CHECKBE-NEXT:    vmov.8 q2[14], r0
759; CHECKBE-NEXT:    vmov.u8 r0, q0[14]
760; CHECKBE-NEXT:    vmov.8 q2[15], r0
761; CHECKBE-NEXT:    vrev64.8 q0, q2
762; CHECKBE-NEXT:    bx lr
763entry:
764  %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 1, i32 16, i32 3, i32 18, i32 5, i32 20, i32 7, i32 22, i32 9, i32 24, i32 11, i32 26, i32 13, i32 28, i32 15, i32 30>
765  ret <16 x i8> %out
766}
767
768define arm_aapcs_vfpcc <16 x i8> @vmovn8_t4(<16 x i8> %src1, <16 x i8> %src2) {
769; CHECK-LABEL: vmovn8_t4:
770; CHECK:       @ %bb.0: @ %entry
771; CHECK-NEXT:    vmovnb.i16 q0, q1
772; CHECK-NEXT:    bx lr
773;
774; CHECKBE-LABEL: vmovn8_t4:
775; CHECKBE:       @ %bb.0: @ %entry
776; CHECKBE-NEXT:    vrev64.8 q2, q1
777; CHECKBE-NEXT:    vrev64.8 q1, q0
778; CHECKBE-NEXT:    vmovnb.i16 q1, q2
779; CHECKBE-NEXT:    vrev64.8 q0, q1
780; CHECKBE-NEXT:    bx lr
781entry:
782  %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
783  ret <16 x i8> %out
784}
785
786define arm_aapcs_vfpcc <16 x i8> @vmovn8_single_t(<16 x i8> %src1) {
787; CHECK-LABEL: vmovn8_single_t:
788; CHECK:       @ %bb.0: @ %entry
789; CHECK-NEXT:    vmovnt.i16 q0, q0
790; CHECK-NEXT:    bx lr
791;
792; CHECKBE-LABEL: vmovn8_single_t:
793; CHECKBE:       @ %bb.0: @ %entry
794; CHECKBE-NEXT:    vrev64.8 q1, q0
795; CHECKBE-NEXT:    vmovnt.i16 q1, q1
796; CHECKBE-NEXT:    vrev64.8 q0, q1
797; CHECKBE-NEXT:    bx lr
798entry:
799  %out = shufflevector <16 x i8> %src1, <16 x i8> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
800  ret <16 x i8> %out
801}
802
803
804define arm_aapcs_vfpcc <8 x i16> @vmovn32trunct_undef2(<8 x i16> %a) {
805; CHECK-LABEL: vmovn32trunct_undef2:
806; CHECK:       @ %bb.0: @ %entry
807; CHECK-NEXT:    bx lr
808;
809; CHECKBE-LABEL: vmovn32trunct_undef2:
810; CHECKBE:       @ %bb.0: @ %entry
811; CHECKBE-NEXT:    bx lr
812entry:
813  %c1 = call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> %a)
814  %c2 = call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> undef)
815  %strided.vec = shufflevector <4 x i32> %c1, <4 x i32> %c2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
816  %out = trunc <8 x i32> %strided.vec to <8 x i16>
817  ret <8 x i16> %out
818}
819
820define arm_aapcs_vfpcc <8 x i16> @vmovn32trunct_undef1(<8 x i16> %a) {
821; CHECK-LABEL: vmovn32trunct_undef1:
822; CHECK:       @ %bb.0: @ %entry
823; CHECK-NEXT:    vmovnt.i32 q0, q0
824; CHECK-NEXT:    bx lr
825;
826; CHECKBE-LABEL: vmovn32trunct_undef1:
827; CHECKBE:       @ %bb.0: @ %entry
828; CHECKBE-NEXT:    vrev64.16 q1, q0
829; CHECKBE-NEXT:    vmovnt.i32 q1, q1
830; CHECKBE-NEXT:    vrev64.16 q0, q1
831; CHECKBE-NEXT:    bx lr
832entry:
833  %c1 = call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> undef)
834  %c2 = call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> %a)
835  %strided.vec = shufflevector <4 x i32> %c1, <4 x i32> %c2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
836  %out = trunc <8 x i32> %strided.vec to <8 x i16>
837  ret <8 x i16> %out
838}
839
840define arm_aapcs_vfpcc <8 x i16> @vmovn16b_undef2(<16 x i8> %a) {
841; CHECK-LABEL: vmovn16b_undef2:
842; CHECK:       @ %bb.0: @ %entry
843; CHECK-NEXT:    bx lr
844;
845; CHECKBE-LABEL: vmovn16b_undef2:
846; CHECKBE:       @ %bb.0: @ %entry
847; CHECKBE-NEXT:    vrev64.8 q1, q0
848; CHECKBE-NEXT:    vrev64.16 q0, q1
849; CHECKBE-NEXT:    bx lr
850entry:
851  %c1 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> %a)
852  %c2 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> undef)
853  %out = shufflevector <8 x i16> %c1, <8 x i16> %c2, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
854  ret <8 x i16> %out
855}
856
857define arm_aapcs_vfpcc <8 x i16> @vmovn16b_undef1(<16 x i8> %a) {
858; CHECK-LABEL: vmovn16b_undef1:
859; CHECK:       @ %bb.0: @ %entry
860; CHECK-NEXT:    bx lr
861;
862; CHECKBE-LABEL: vmovn16b_undef1:
863; CHECKBE:       @ %bb.0: @ %entry
864; CHECKBE-NEXT:    vrev64.8 q1, q0
865; CHECKBE-NEXT:    vrev64.16 q0, q1
866; CHECKBE-NEXT:    bx lr
867entry:
868  %c1 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> undef)
869  %c2 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> %a)
870  %out = shufflevector <8 x i16> %c1, <8 x i16> %c2, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
871  ret <8 x i16> %out
872}
873
874define arm_aapcs_vfpcc <8 x i16> @vmovn32_badlanes(<4 x i32> %src1) {
875; CHECK-LABEL: vmovn32_badlanes:
876; CHECK:       @ %bb.0: @ %entry
877; CHECK-NEXT:    vmov r0, r1, d0
878; CHECK-NEXT:    vmov.16 q1[1], r0
879; CHECK-NEXT:    vmov r0, s2
880; CHECK-NEXT:    vmov.16 q1[3], r1
881; CHECK-NEXT:    vmov.16 q1[5], r1
882; CHECK-NEXT:    vmov.16 q1[7], r0
883; CHECK-NEXT:    vmov q0, q1
884; CHECK-NEXT:    bx lr
885;
886; CHECKBE-LABEL: vmovn32_badlanes:
887; CHECKBE:       @ %bb.0: @ %entry
888; CHECKBE-NEXT:    vrev64.32 q1, q0
889; CHECKBE-NEXT:    vmov r0, r1, d2
890; CHECKBE-NEXT:    vmov.16 q2[1], r0
891; CHECKBE-NEXT:    vmov r0, s6
892; CHECKBE-NEXT:    vmov.16 q2[3], r1
893; CHECKBE-NEXT:    vmov.16 q2[5], r1
894; CHECKBE-NEXT:    vmov.16 q2[7], r0
895; CHECKBE-NEXT:    vrev64.16 q0, q2
896; CHECKBE-NEXT:    bx lr
897entry:
898  %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> undef, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 1, i32 7, i32 2>
899  %out = trunc <8 x i32> %strided.vec to <8 x i16>
900  ret <8 x i16> %out
901}
902
903define arm_aapcs_vfpcc <16 x i8> @vmovn16trunct_undef2(<16 x i8> %a) {
904; CHECK-LABEL: vmovn16trunct_undef2:
905; CHECK:       @ %bb.0: @ %entry
906; CHECK-NEXT:    bx lr
907;
908; CHECKBE-LABEL: vmovn16trunct_undef2:
909; CHECKBE:       @ %bb.0: @ %entry
910; CHECKBE-NEXT:    bx lr
911entry:
912  %c1 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> %a)
913  %c2 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> undef)
914  %strided.vec = shufflevector <8 x i16> %c1, <8 x i16> %c2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
915  %out = trunc <16 x i16> %strided.vec to <16 x i8>
916  ret <16 x i8> %out
917}
918
919define arm_aapcs_vfpcc <16 x i8> @vmovn16trunct_undef1(<16 x i8> %a) {
920; CHECK-LABEL: vmovn16trunct_undef1:
921; CHECK:       @ %bb.0: @ %entry
922; CHECK-NEXT:    vmovnt.i16 q0, q0
923; CHECK-NEXT:    bx lr
924;
925; CHECKBE-LABEL: vmovn16trunct_undef1:
926; CHECKBE:       @ %bb.0: @ %entry
927; CHECKBE-NEXT:    vrev64.8 q1, q0
928; CHECKBE-NEXT:    vmovnt.i16 q1, q1
929; CHECKBE-NEXT:    vrev64.8 q0, q1
930; CHECKBE-NEXT:    bx lr
931entry:
932  %c1 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> undef)
933  %c2 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> %a)
934  %strided.vec = shufflevector <8 x i16> %c1, <8 x i16> %c2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
935  %out = trunc <16 x i16> %strided.vec to <16 x i8>
936  ret <16 x i8> %out
937}
938
939declare <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16>)
940declare <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8>)
941