1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
3
4define <2 x i64> @extract0_i32_zext_insert0_i64_undef(<4 x i32> %x) {
5; CHECK-LABEL: extract0_i32_zext_insert0_i64_undef:
6; CHECK:       // %bb.0:
7; CHECK-NEXT:    movi v1.2d, #0000000000000000
8; CHECK-NEXT:    zip1 v0.4s, v0.4s, v1.4s
9; CHECK-NEXT:    ret
10  %e = extractelement <4 x i32> %x, i32 0
11  %z = zext i32 %e to i64
12  %r = insertelement <2 x i64> undef, i64 %z, i32 0
13  ret <2 x i64> %r
14}
15
16define <2 x i64> @extract0_i32_zext_insert0_i64_zero(<4 x i32> %x) {
17; CHECK-LABEL: extract0_i32_zext_insert0_i64_zero:
18; CHECK:       // %bb.0:
19; CHECK-NEXT:    fmov w8, s0
20; CHECK-NEXT:    movi v0.2d, #0000000000000000
21; CHECK-NEXT:    mov v0.d[0], x8
22; CHECK-NEXT:    ret
23  %e = extractelement <4 x i32> %x, i32 0
24  %z = zext i32 %e to i64
25  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
26  ret <2 x i64> %r
27}
28
29define <2 x i64> @extract1_i32_zext_insert0_i64_undef(<4 x i32> %x) {
30; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef:
31; CHECK:       // %bb.0:
32; CHECK-NEXT:    zip1 v0.4s, v0.4s, v0.4s
33; CHECK-NEXT:    movi v1.2d, #0000000000000000
34; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
35; CHECK-NEXT:    ret
36  %e = extractelement <4 x i32> %x, i32 1
37  %z = zext i32 %e to i64
38  %r = insertelement <2 x i64> undef, i64 %z, i32 0
39  ret <2 x i64> %r
40}
41
42define <2 x i64> @extract1_i32_zext_insert0_i64_zero(<4 x i32> %x) {
43; CHECK-LABEL: extract1_i32_zext_insert0_i64_zero:
44; CHECK:       // %bb.0:
45; CHECK-NEXT:    mov w8, v0.s[1]
46; CHECK-NEXT:    movi v0.2d, #0000000000000000
47; CHECK-NEXT:    mov v0.d[0], x8
48; CHECK-NEXT:    ret
49  %e = extractelement <4 x i32> %x, i32 1
50  %z = zext i32 %e to i64
51  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
52  ret <2 x i64> %r
53}
54
55define <2 x i64> @extract2_i32_zext_insert0_i64_undef(<4 x i32> %x) {
56; CHECK-LABEL: extract2_i32_zext_insert0_i64_undef:
57; CHECK:       // %bb.0:
58; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v0.4s
59; CHECK-NEXT:    movi v1.2d, #0000000000000000
60; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
61; CHECK-NEXT:    ret
62  %e = extractelement <4 x i32> %x, i32 2
63  %z = zext i32 %e to i64
64  %r = insertelement <2 x i64> undef, i64 %z, i32 0
65  ret <2 x i64> %r
66}
67
68define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) {
69; CHECK-LABEL: extract2_i32_zext_insert0_i64_zero:
70; CHECK:       // %bb.0:
71; CHECK-NEXT:    mov w8, v0.s[2]
72; CHECK-NEXT:    movi v0.2d, #0000000000000000
73; CHECK-NEXT:    mov v0.d[0], x8
74; CHECK-NEXT:    ret
75  %e = extractelement <4 x i32> %x, i32 2
76  %z = zext i32 %e to i64
77  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
78  ret <2 x i64> %r
79}
80
81define <2 x i64> @extract3_i32_zext_insert0_i64_undef(<4 x i32> %x) {
82; CHECK-LABEL: extract3_i32_zext_insert0_i64_undef:
83; CHECK:       // %bb.0:
84; CHECK-NEXT:    movi v1.2d, #0000000000000000
85; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
86; CHECK-NEXT:    ret
87  %e = extractelement <4 x i32> %x, i32 3
88  %z = zext i32 %e to i64
89  %r = insertelement <2 x i64> undef, i64 %z, i32 0
90  ret <2 x i64> %r
91}
92
93define <2 x i64> @extract3_i32_zext_insert0_i64_zero(<4 x i32> %x) {
94; CHECK-LABEL: extract3_i32_zext_insert0_i64_zero:
95; CHECK:       // %bb.0:
96; CHECK-NEXT:    mov w8, v0.s[3]
97; CHECK-NEXT:    movi v0.2d, #0000000000000000
98; CHECK-NEXT:    mov v0.d[0], x8
99; CHECK-NEXT:    ret
100  %e = extractelement <4 x i32> %x, i32 3
101  %z = zext i32 %e to i64
102  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
103  ret <2 x i64> %r
104}
105
106define <2 x i64> @extract0_i32_zext_insert1_i64_undef(<4 x i32> %x) {
107; CHECK-LABEL: extract0_i32_zext_insert1_i64_undef:
108; CHECK:       // %bb.0:
109; CHECK-NEXT:    movi v1.2d, #0000000000000000
110; CHECK-NEXT:    zip1 v1.4s, v0.4s, v1.4s
111; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #8
112; CHECK-NEXT:    ret
113  %e = extractelement <4 x i32> %x, i32 0
114  %z = zext i32 %e to i64
115  %r = insertelement <2 x i64> undef, i64 %z, i32 1
116  ret <2 x i64> %r
117}
118
119define <2 x i64> @extract0_i32_zext_insert1_i64_zero(<4 x i32> %x) {
120; CHECK-LABEL: extract0_i32_zext_insert1_i64_zero:
121; CHECK:       // %bb.0:
122; CHECK-NEXT:    fmov w8, s0
123; CHECK-NEXT:    movi v0.2d, #0000000000000000
124; CHECK-NEXT:    mov v0.d[1], x8
125; CHECK-NEXT:    ret
126  %e = extractelement <4 x i32> %x, i32 0
127  %z = zext i32 %e to i64
128  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
129  ret <2 x i64> %r
130}
131
132define <2 x i64> @extract1_i32_zext_insert1_i64_undef(<4 x i32> %x) {
133; CHECK-LABEL: extract1_i32_zext_insert1_i64_undef:
134; CHECK:       // %bb.0:
135; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
136; CHECK-NEXT:    movi v1.2d, #0000000000000000
137; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #4
138; CHECK-NEXT:    ret
139  %e = extractelement <4 x i32> %x, i32 1
140  %z = zext i32 %e to i64
141  %r = insertelement <2 x i64> undef, i64 %z, i32 1
142  ret <2 x i64> %r
143}
144
145define <2 x i64> @extract1_i32_zext_insert1_i64_zero(<4 x i32> %x) {
146; CHECK-LABEL: extract1_i32_zext_insert1_i64_zero:
147; CHECK:       // %bb.0:
148; CHECK-NEXT:    mov w8, v0.s[1]
149; CHECK-NEXT:    movi v0.2d, #0000000000000000
150; CHECK-NEXT:    mov v0.d[1], x8
151; CHECK-NEXT:    ret
152  %e = extractelement <4 x i32> %x, i32 1
153  %z = zext i32 %e to i64
154  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
155  ret <2 x i64> %r
156}
157
158define <2 x i64> @extract2_i32_zext_insert1_i64_undef(<4 x i32> %x) {
159; CHECK-LABEL: extract2_i32_zext_insert1_i64_undef:
160; CHECK:       // %bb.0:
161; CHECK-NEXT:    mov v0.s[3], wzr
162; CHECK-NEXT:    ret
163  %e = extractelement <4 x i32> %x, i32 2
164  %z = zext i32 %e to i64
165  %r = insertelement <2 x i64> undef, i64 %z, i32 1
166  ret <2 x i64> %r
167}
168
169define <2 x i64> @extract2_i32_zext_insert1_i64_zero(<4 x i32> %x) {
170; CHECK-LABEL: extract2_i32_zext_insert1_i64_zero:
171; CHECK:       // %bb.0:
172; CHECK-NEXT:    mov w8, v0.s[2]
173; CHECK-NEXT:    movi v0.2d, #0000000000000000
174; CHECK-NEXT:    mov v0.d[1], x8
175; CHECK-NEXT:    ret
176  %e = extractelement <4 x i32> %x, i32 2
177  %z = zext i32 %e to i64
178  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
179  ret <2 x i64> %r
180}
181
182define <2 x i64> @extract3_i32_zext_insert1_i64_undef(<4 x i32> %x) {
183; CHECK-LABEL: extract3_i32_zext_insert1_i64_undef:
184; CHECK:       // %bb.0:
185; CHECK-NEXT:    movi v1.2d, #0000000000000000
186; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #4
187; CHECK-NEXT:    ret
188  %e = extractelement <4 x i32> %x, i32 3
189  %z = zext i32 %e to i64
190  %r = insertelement <2 x i64> undef, i64 %z, i32 1
191  ret <2 x i64> %r
192}
193
194define <2 x i64> @extract3_i32_zext_insert1_i64_zero(<4 x i32> %x) {
195; CHECK-LABEL: extract3_i32_zext_insert1_i64_zero:
196; CHECK:       // %bb.0:
197; CHECK-NEXT:    mov w8, v0.s[3]
198; CHECK-NEXT:    movi v0.2d, #0000000000000000
199; CHECK-NEXT:    mov v0.d[1], x8
200; CHECK-NEXT:    ret
201  %e = extractelement <4 x i32> %x, i32 3
202  %z = zext i32 %e to i64
203  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
204  ret <2 x i64> %r
205}
206
207define <2 x i64> @extract0_i16_zext_insert0_i64_undef(<8 x i16> %x) {
208; CHECK-LABEL: extract0_i16_zext_insert0_i64_undef:
209; CHECK:       // %bb.0:
210; CHECK-NEXT:    umov w8, v0.h[0]
211; CHECK-NEXT:    and x8, x8, #0xffff
212; CHECK-NEXT:    fmov d0, x8
213; CHECK-NEXT:    ret
214  %e = extractelement <8 x i16> %x, i32 0
215  %z = zext i16 %e to i64
216  %r = insertelement <2 x i64> undef, i64 %z, i32 0
217  ret <2 x i64> %r
218}
219
220define <2 x i64> @extract0_i16_zext_insert0_i64_zero(<8 x i16> %x) {
221; CHECK-LABEL: extract0_i16_zext_insert0_i64_zero:
222; CHECK:       // %bb.0:
223; CHECK-NEXT:    umov w8, v0.h[0]
224; CHECK-NEXT:    and x8, x8, #0xffff
225; CHECK-NEXT:    movi v0.2d, #0000000000000000
226; CHECK-NEXT:    mov v0.d[0], x8
227; CHECK-NEXT:    ret
228  %e = extractelement <8 x i16> %x, i32 0
229  %z = zext i16 %e to i64
230  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
231  ret <2 x i64> %r
232}
233
234define <2 x i64> @extract1_i16_zext_insert0_i64_undef(<8 x i16> %x) {
235; CHECK-LABEL: extract1_i16_zext_insert0_i64_undef:
236; CHECK:       // %bb.0:
237; CHECK-NEXT:    umov w8, v0.h[1]
238; CHECK-NEXT:    and x8, x8, #0xffff
239; CHECK-NEXT:    fmov d0, x8
240; CHECK-NEXT:    ret
241  %e = extractelement <8 x i16> %x, i32 1
242  %z = zext i16 %e to i64
243  %r = insertelement <2 x i64> undef, i64 %z, i32 0
244  ret <2 x i64> %r
245}
246
247define <2 x i64> @extract1_i16_zext_insert0_i64_zero(<8 x i16> %x) {
248; CHECK-LABEL: extract1_i16_zext_insert0_i64_zero:
249; CHECK:       // %bb.0:
250; CHECK-NEXT:    umov w8, v0.h[1]
251; CHECK-NEXT:    and x8, x8, #0xffff
252; CHECK-NEXT:    movi v0.2d, #0000000000000000
253; CHECK-NEXT:    mov v0.d[0], x8
254; CHECK-NEXT:    ret
255  %e = extractelement <8 x i16> %x, i32 1
256  %z = zext i16 %e to i64
257  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
258  ret <2 x i64> %r
259}
260
261define <2 x i64> @extract2_i16_zext_insert0_i64_undef(<8 x i16> %x) {
262; CHECK-LABEL: extract2_i16_zext_insert0_i64_undef:
263; CHECK:       // %bb.0:
264; CHECK-NEXT:    umov w8, v0.h[2]
265; CHECK-NEXT:    and x8, x8, #0xffff
266; CHECK-NEXT:    fmov d0, x8
267; CHECK-NEXT:    ret
268  %e = extractelement <8 x i16> %x, i32 2
269  %z = zext i16 %e to i64
270  %r = insertelement <2 x i64> undef, i64 %z, i32 0
271  ret <2 x i64> %r
272}
273
274define <2 x i64> @extract2_i16_zext_insert0_i64_zero(<8 x i16> %x) {
275; CHECK-LABEL: extract2_i16_zext_insert0_i64_zero:
276; CHECK:       // %bb.0:
277; CHECK-NEXT:    umov w8, v0.h[2]
278; CHECK-NEXT:    and x8, x8, #0xffff
279; CHECK-NEXT:    movi v0.2d, #0000000000000000
280; CHECK-NEXT:    mov v0.d[0], x8
281; CHECK-NEXT:    ret
282  %e = extractelement <8 x i16> %x, i32 2
283  %z = zext i16 %e to i64
284  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
285  ret <2 x i64> %r
286}
287
288define <2 x i64> @extract3_i16_zext_insert0_i64_undef(<8 x i16> %x) {
289; CHECK-LABEL: extract3_i16_zext_insert0_i64_undef:
290; CHECK:       // %bb.0:
291; CHECK-NEXT:    umov w8, v0.h[3]
292; CHECK-NEXT:    and x8, x8, #0xffff
293; CHECK-NEXT:    fmov d0, x8
294; CHECK-NEXT:    ret
295  %e = extractelement <8 x i16> %x, i32 3
296  %z = zext i16 %e to i64
297  %r = insertelement <2 x i64> undef, i64 %z, i32 0
298  ret <2 x i64> %r
299}
300
301define <2 x i64> @extract3_i16_zext_insert0_i64_zero(<8 x i16> %x) {
302; CHECK-LABEL: extract3_i16_zext_insert0_i64_zero:
303; CHECK:       // %bb.0:
304; CHECK-NEXT:    umov w8, v0.h[3]
305; CHECK-NEXT:    and x8, x8, #0xffff
306; CHECK-NEXT:    movi v0.2d, #0000000000000000
307; CHECK-NEXT:    mov v0.d[0], x8
308; CHECK-NEXT:    ret
309  %e = extractelement <8 x i16> %x, i32 3
310  %z = zext i16 %e to i64
311  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
312  ret <2 x i64> %r
313}
314
315define <2 x i64> @extract0_i16_zext_insert1_i64_undef(<8 x i16> %x) {
316; CHECK-LABEL: extract0_i16_zext_insert1_i64_undef:
317; CHECK:       // %bb.0:
318; CHECK-NEXT:    umov w8, v0.h[0]
319; CHECK-NEXT:    and x8, x8, #0xffff
320; CHECK-NEXT:    dup v0.2d, x8
321; CHECK-NEXT:    ret
322  %e = extractelement <8 x i16> %x, i32 0
323  %z = zext i16 %e to i64
324  %r = insertelement <2 x i64> undef, i64 %z, i32 1
325  ret <2 x i64> %r
326}
327
328define <2 x i64> @extract0_i16_zext_insert1_i64_zero(<8 x i16> %x) {
329; CHECK-LABEL: extract0_i16_zext_insert1_i64_zero:
330; CHECK:       // %bb.0:
331; CHECK-NEXT:    umov w8, v0.h[0]
332; CHECK-NEXT:    and x8, x8, #0xffff
333; CHECK-NEXT:    movi v0.2d, #0000000000000000
334; CHECK-NEXT:    mov v0.d[1], x8
335; CHECK-NEXT:    ret
336  %e = extractelement <8 x i16> %x, i32 0
337  %z = zext i16 %e to i64
338  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
339  ret <2 x i64> %r
340}
341
342define <2 x i64> @extract1_i16_zext_insert1_i64_undef(<8 x i16> %x) {
343; CHECK-LABEL: extract1_i16_zext_insert1_i64_undef:
344; CHECK:       // %bb.0:
345; CHECK-NEXT:    umov w8, v0.h[1]
346; CHECK-NEXT:    and x8, x8, #0xffff
347; CHECK-NEXT:    dup v0.2d, x8
348; CHECK-NEXT:    ret
349  %e = extractelement <8 x i16> %x, i32 1
350  %z = zext i16 %e to i64
351  %r = insertelement <2 x i64> undef, i64 %z, i32 1
352  ret <2 x i64> %r
353}
354
355define <2 x i64> @extract1_i16_zext_insert1_i64_zero(<8 x i16> %x) {
356; CHECK-LABEL: extract1_i16_zext_insert1_i64_zero:
357; CHECK:       // %bb.0:
358; CHECK-NEXT:    umov w8, v0.h[1]
359; CHECK-NEXT:    and x8, x8, #0xffff
360; CHECK-NEXT:    movi v0.2d, #0000000000000000
361; CHECK-NEXT:    mov v0.d[1], x8
362; CHECK-NEXT:    ret
363  %e = extractelement <8 x i16> %x, i32 1
364  %z = zext i16 %e to i64
365  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
366  ret <2 x i64> %r
367}
368
369define <2 x i64> @extract2_i16_zext_insert1_i64_undef(<8 x i16> %x) {
370; CHECK-LABEL: extract2_i16_zext_insert1_i64_undef:
371; CHECK:       // %bb.0:
372; CHECK-NEXT:    umov w8, v0.h[2]
373; CHECK-NEXT:    and x8, x8, #0xffff
374; CHECK-NEXT:    dup v0.2d, x8
375; CHECK-NEXT:    ret
376  %e = extractelement <8 x i16> %x, i32 2
377  %z = zext i16 %e to i64
378  %r = insertelement <2 x i64> undef, i64 %z, i32 1
379  ret <2 x i64> %r
380}
381
382define <2 x i64> @extract2_i16_zext_insert1_i64_zero(<8 x i16> %x) {
383; CHECK-LABEL: extract2_i16_zext_insert1_i64_zero:
384; CHECK:       // %bb.0:
385; CHECK-NEXT:    umov w8, v0.h[2]
386; CHECK-NEXT:    and x8, x8, #0xffff
387; CHECK-NEXT:    movi v0.2d, #0000000000000000
388; CHECK-NEXT:    mov v0.d[1], x8
389; CHECK-NEXT:    ret
390  %e = extractelement <8 x i16> %x, i32 2
391  %z = zext i16 %e to i64
392  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
393  ret <2 x i64> %r
394}
395
396define <2 x i64> @extract3_i16_zext_insert1_i64_undef(<8 x i16> %x) {
397; CHECK-LABEL: extract3_i16_zext_insert1_i64_undef:
398; CHECK:       // %bb.0:
399; CHECK-NEXT:    umov w8, v0.h[3]
400; CHECK-NEXT:    and x8, x8, #0xffff
401; CHECK-NEXT:    dup v0.2d, x8
402; CHECK-NEXT:    ret
403  %e = extractelement <8 x i16> %x, i32 3
404  %z = zext i16 %e to i64
405  %r = insertelement <2 x i64> undef, i64 %z, i32 1
406  ret <2 x i64> %r
407}
408
409define <2 x i64> @extract3_i16_zext_insert1_i64_zero(<8 x i16> %x) {
410; CHECK-LABEL: extract3_i16_zext_insert1_i64_zero:
411; CHECK:       // %bb.0:
412; CHECK-NEXT:    umov w8, v0.h[3]
413; CHECK-NEXT:    and x8, x8, #0xffff
414; CHECK-NEXT:    movi v0.2d, #0000000000000000
415; CHECK-NEXT:    mov v0.d[1], x8
416; CHECK-NEXT:    ret
417  %e = extractelement <8 x i16> %x, i32 3
418  %z = zext i16 %e to i64
419  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
420  ret <2 x i64> %r
421}
422
423; This would crash because we did not expect to create
424; a shuffle for a vector where the source operand is
425; not the same size as the result.
426; TODO: Should we handle this pattern? Ie, is moving to/from
427; registers the optimal code?
428
429define <4 x i32> @larger_bv_than_source(<4 x i16> %t0) {
430; CHECK-LABEL: larger_bv_than_source:
431; CHECK:       // %bb.0:
432; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
433; CHECK-NEXT:    umov w8, v0.h[2]
434; CHECK-NEXT:    fmov s0, w8
435; CHECK-NEXT:    ret
436  %t1 = extractelement <4 x i16> %t0, i32 2
437  %vgetq_lane = zext i16 %t1 to i32
438  %t2 = insertelement <4 x i32> undef, i32 %vgetq_lane, i64 0
439  ret <4 x i32> %t2
440}
441
442