1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx2 | FileCheck %s
3
4; Test that we correctly fold a shuffle that performs a swizzle of another
5; shuffle node according to the rule
6;  shuffle (shuffle (x, undef, M0), undef, M1) -> shuffle(x, undef, M2)
7;
8; We only do this if the resulting mask is legal to avoid introducing an
9; illegal shuffle that is expanded into a sub-optimal sequence of instructions
10; during lowering stage.
11
12; Check that we produce a single vector permute / shuffle in all cases.
13
14define <8 x i32> @swizzle_1(<8 x i32> %v) {
15; CHECK-LABEL: swizzle_1:
16; CHECK:       # %bb.0:
17; CHECK-NEXT:    vmovaps {{.*#+}} ymm1 = [1,3,2,0,4,5,6,7]
18; CHECK-NEXT:    vpermps %ymm0, %ymm1, %ymm0
19; CHECK-NEXT:    retq
20  %1 = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 7, i32 5, i32 6, i32 4>
21  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 7, i32 5, i32 6, i32 4>
22  ret <8 x i32> %2
23}
24
25define <8 x i32> @swizzle_2(<8 x i32> %v) {
26; CHECK-LABEL: swizzle_2:
27; CHECK:       # %bb.0:
28; CHECK-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
29; CHECK-NEXT:    retq
30  %1 = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 0, i32 1, i32 2, i32 3>
31  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 0, i32 1, i32 2, i32 3>
32  ret <8 x i32> %2
33}
34
35define <8 x i32> @swizzle_3(<8 x i32> %v) {
36; CHECK-LABEL: swizzle_3:
37; CHECK:       # %bb.0:
38; CHECK-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
39; CHECK-NEXT:    retq
40  %1 = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
41  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
42  ret <8 x i32> %2
43}
44
45define <8 x i32> @swizzle_4(<8 x i32> %v) {
46; CHECK-LABEL: swizzle_4:
47; CHECK:       # %bb.0:
48; CHECK-NEXT:    vmovaps {{.*#+}} ymm1 = [3,1,2,0,6,5,4,7]
49; CHECK-NEXT:    vpermps %ymm0, %ymm1, %ymm0
50; CHECK-NEXT:    retq
51  %1 = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> <i32 4, i32 7, i32 5, i32 6, i32 3, i32 2, i32 0, i32 1>
52  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <8 x i32> <i32 4, i32 7, i32 5, i32 6, i32 3, i32 2, i32 0, i32 1>
53  ret <8 x i32> %2
54}
55
56define <8 x i32> @swizzle_5(<8 x i32> %v) {
57; CHECK-LABEL: swizzle_5:
58; CHECK:       # %bb.0:
59; CHECK-NEXT:    vmovaps {{.*#+}} ymm1 = [3,0,1,2,7,6,4,5]
60; CHECK-NEXT:    vpermps %ymm0, %ymm1, %ymm0
61; CHECK-NEXT:    retq
62  %1 = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> <i32 7, i32 4, i32 6, i32 5, i32 0, i32 2, i32 1, i32 3>
63  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <8 x i32> <i32 7, i32 4, i32 6, i32 5, i32 0, i32 2, i32 1, i32 3>
64  ret <8 x i32> %2
65}
66
67define <8 x i32> @swizzle_6(<8 x i32> %v) {
68; CHECK-LABEL: swizzle_6:
69; CHECK:       # %bb.0:
70; CHECK-NEXT:    vmovaps {{.*#+}} ymm1 = [3,1,0,2,4,5,6,7]
71; CHECK-NEXT:    vpermps %ymm0, %ymm1, %ymm0
72; CHECK-NEXT:    retq
73  %1 = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> <i32 2, i32 1, i32 3, i32 0, i32 4, i32 7, i32 6, i32 5>
74  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <8 x i32> <i32 2, i32 1, i32 3, i32 0, i32 4, i32 7, i32 6, i32 5>
75  ret <8 x i32> %2
76}
77
78define <8 x i32> @swizzle_7(<8 x i32> %v) {
79; CHECK-LABEL: swizzle_7:
80; CHECK:       # %bb.0:
81; CHECK-NEXT:    vmovaps {{.*#+}} ymm1 = [0,2,3,1,4,5,6,7]
82; CHECK-NEXT:    vpermps %ymm0, %ymm1, %ymm0
83; CHECK-NEXT:    retq
84  %1 = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 1, i32 2, i32 5, i32 4, i32 6, i32 7>
85  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 1, i32 2, i32 5, i32 4, i32 6, i32 7>
86  ret <8 x i32> %2
87}
88
89