1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
4
5declare <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
6declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
7declare <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
8declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
9
10declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
11declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
12
13; Tests showing replacement of variable rotates with immediate splat versions.
14
15define <16 x i32> @test_splat_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
16; KNL-LABEL: test_splat_rol_v16i32:
17; KNL:       # %bb.0:
18; KNL-NEXT:    kmovw %edi, %k1
19; KNL-NEXT:    vprold $5, %zmm0, %zmm1 {%k1}
20; KNL-NEXT:    vprold $6, %zmm0, %zmm2 {%k1} {z}
21; KNL-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
22; KNL-NEXT:    vprold $7, %zmm0, %zmm0
23; KNL-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
24; KNL-NEXT:    retq
25;
26; SKX-LABEL: test_splat_rol_v16i32:
27; SKX:       # %bb.0:
28; SKX-NEXT:    kmovd %edi, %k1
29; SKX-NEXT:    vprold $5, %zmm0, %zmm1 {%k1}
30; SKX-NEXT:    vprold $6, %zmm0, %zmm2 {%k1} {z}
31; SKX-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
32; SKX-NEXT:    vprold $7, %zmm0, %zmm0
33; SKX-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
34; SKX-NEXT:    retq
35  %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2)
36  %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>, <16 x i32> zeroinitializer, i16 %x2)
37  %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>, <16 x i32> %x1, i16 -1)
38  %res3 = add <16 x i32> %res, %res1
39  %res4 = add <16 x i32> %res3, %res2
40  ret <16 x i32> %res4
41}
42
43define <8 x i64>@test_splat_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
44; KNL-LABEL: test_splat_rol_v8i64:
45; KNL:       # %bb.0:
46; KNL-NEXT:    kmovw %edi, %k1
47; KNL-NEXT:    vprolq $5, %zmm0, %zmm1 {%k1}
48; KNL-NEXT:    vprolq $6, %zmm0, %zmm2 {%k1} {z}
49; KNL-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
50; KNL-NEXT:    vprolq $7, %zmm0, %zmm0
51; KNL-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
52; KNL-NEXT:    retq
53;
54; SKX-LABEL: test_splat_rol_v8i64:
55; SKX:       # %bb.0:
56; SKX-NEXT:    kmovd %edi, %k1
57; SKX-NEXT:    vprolq $5, %zmm0, %zmm1 {%k1}
58; SKX-NEXT:    vprolq $6, %zmm0, %zmm2 {%k1} {z}
59; SKX-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
60; SKX-NEXT:    vprolq $7, %zmm0, %zmm0
61; SKX-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
62; SKX-NEXT:    retq
63  %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2)
64  %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6>, <8 x i64> zeroinitializer, i8 %x2)
65  %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>, <8 x i64> %x1, i8 -1)
66  %res3 = add <8 x i64> %res, %res1
67  %res4 = add <8 x i64> %res3, %res2
68  ret <8 x i64> %res4
69}
70
71define <16 x i32> @test_splat_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
72; KNL-LABEL: test_splat_ror_v16i32:
73; KNL:       # %bb.0:
74; KNL-NEXT:    kmovw %edi, %k1
75; KNL-NEXT:    vprord $5, %zmm0, %zmm1 {%k1}
76; KNL-NEXT:    vprord $6, %zmm0, %zmm2 {%k1} {z}
77; KNL-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
78; KNL-NEXT:    vprord $7, %zmm0, %zmm0
79; KNL-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
80; KNL-NEXT:    retq
81;
82; SKX-LABEL: test_splat_ror_v16i32:
83; SKX:       # %bb.0:
84; SKX-NEXT:    kmovd %edi, %k1
85; SKX-NEXT:    vprord $5, %zmm0, %zmm1 {%k1}
86; SKX-NEXT:    vprord $6, %zmm0, %zmm2 {%k1} {z}
87; SKX-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
88; SKX-NEXT:    vprord $7, %zmm0, %zmm0
89; SKX-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
90; SKX-NEXT:    retq
91  %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2)
92  %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>, <16 x i32> zeroinitializer, i16 %x2)
93  %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>, <16 x i32> %x1, i16 -1)
94  %res3 = add <16 x i32> %res, %res1
95  %res4 = add <16 x i32> %res3, %res2
96  ret <16 x i32> %res4
97}
98
99define <8 x i64>@test_splat_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
100; KNL-LABEL: test_splat_ror_v8i64:
101; KNL:       # %bb.0:
102; KNL-NEXT:    kmovw %edi, %k1
103; KNL-NEXT:    vprorq $5, %zmm0, %zmm1 {%k1}
104; KNL-NEXT:    vprorq $6, %zmm0, %zmm2 {%k1} {z}
105; KNL-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
106; KNL-NEXT:    vprorq $7, %zmm0, %zmm0
107; KNL-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
108; KNL-NEXT:    retq
109;
110; SKX-LABEL: test_splat_ror_v8i64:
111; SKX:       # %bb.0:
112; SKX-NEXT:    kmovd %edi, %k1
113; SKX-NEXT:    vprorq $5, %zmm0, %zmm1 {%k1}
114; SKX-NEXT:    vprorq $6, %zmm0, %zmm2 {%k1} {z}
115; SKX-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
116; SKX-NEXT:    vprorq $7, %zmm0, %zmm0
117; SKX-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
118; SKX-NEXT:    retq
119  %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2)
120  %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6>, <8 x i64> zeroinitializer, i8 %x2)
121  %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>, <8 x i64> %x1, i8 -1)
122  %res3 = add <8 x i64> %res, %res1
123  %res4 = add <8 x i64> %res3, %res2
124  ret <8 x i64> %res4
125}
126
127; Tests showing replacement of out-of-bounds variable rotates with in-bounds immediate splat versions.
128
129define <16 x i32> @test_splat_bounds_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
130; KNL-LABEL: test_splat_bounds_rol_v16i32:
131; KNL:       # %bb.0:
132; KNL-NEXT:    kmovw %edi, %k1
133; KNL-NEXT:    vprold $1, %zmm0, %zmm1 {%k1}
134; KNL-NEXT:    vprold $31, %zmm0, %zmm2 {%k1} {z}
135; KNL-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
136; KNL-NEXT:    vprold $30, %zmm0, %zmm0
137; KNL-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
138; KNL-NEXT:    retq
139;
140; SKX-LABEL: test_splat_bounds_rol_v16i32:
141; SKX:       # %bb.0:
142; SKX-NEXT:    kmovd %edi, %k1
143; SKX-NEXT:    vprold $1, %zmm0, %zmm1 {%k1}
144; SKX-NEXT:    vprold $31, %zmm0, %zmm2 {%k1} {z}
145; SKX-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
146; SKX-NEXT:    vprold $30, %zmm0, %zmm0
147; SKX-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
148; SKX-NEXT:    retq
149  %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2)
150  %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2)
151  %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1)
152  %res3 = add <16 x i32> %res, %res1
153  %res4 = add <16 x i32> %res3, %res2
154  ret <16 x i32> %res4
155}
156
157define <8 x i64>@test_splat_bounds_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
158; KNL-LABEL: test_splat_bounds_rol_v8i64:
159; KNL:       # %bb.0:
160; KNL-NEXT:    kmovw %edi, %k1
161; KNL-NEXT:    vprolq $62, %zmm0, %zmm1 {%k1}
162; KNL-NEXT:    vprolq $1, %zmm0, %zmm2 {%k1} {z}
163; KNL-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
164; KNL-NEXT:    vprolq $63, %zmm0, %zmm0
165; KNL-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
166; KNL-NEXT:    retq
167;
168; SKX-LABEL: test_splat_bounds_rol_v8i64:
169; SKX:       # %bb.0:
170; SKX-NEXT:    kmovd %edi, %k1
171; SKX-NEXT:    vprolq $62, %zmm0, %zmm1 {%k1}
172; SKX-NEXT:    vprolq $1, %zmm0, %zmm2 {%k1} {z}
173; SKX-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
174; SKX-NEXT:    vprolq $63, %zmm0, %zmm0
175; SKX-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
176; SKX-NEXT:    retq
177  %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2)
178  %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2)
179  %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1)
180  %res3 = add <8 x i64> %res, %res1
181  %res4 = add <8 x i64> %res3, %res2
182  ret <8 x i64> %res4
183}
184
185define <16 x i32> @test_splat_bounds_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
186; KNL-LABEL: test_splat_bounds_ror_v16i32:
187; KNL:       # %bb.0:
188; KNL-NEXT:    kmovw %edi, %k1
189; KNL-NEXT:    vprord $1, %zmm0, %zmm1 {%k1}
190; KNL-NEXT:    vprord $31, %zmm0, %zmm2 {%k1} {z}
191; KNL-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
192; KNL-NEXT:    vprord $30, %zmm0, %zmm0
193; KNL-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
194; KNL-NEXT:    retq
195;
196; SKX-LABEL: test_splat_bounds_ror_v16i32:
197; SKX:       # %bb.0:
198; SKX-NEXT:    kmovd %edi, %k1
199; SKX-NEXT:    vprord $1, %zmm0, %zmm1 {%k1}
200; SKX-NEXT:    vprord $31, %zmm0, %zmm2 {%k1} {z}
201; SKX-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
202; SKX-NEXT:    vprord $30, %zmm0, %zmm0
203; SKX-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
204; SKX-NEXT:    retq
205  %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2)
206  %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2)
207  %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1)
208  %res3 = add <16 x i32> %res, %res1
209  %res4 = add <16 x i32> %res3, %res2
210  ret <16 x i32> %res4
211}
212
213define <8 x i64>@test_splat_bounds_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
214; KNL-LABEL: test_splat_bounds_ror_v8i64:
215; KNL:       # %bb.0:
216; KNL-NEXT:    kmovw %edi, %k1
217; KNL-NEXT:    vprorq $62, %zmm0, %zmm1 {%k1}
218; KNL-NEXT:    vprorq $1, %zmm0, %zmm2 {%k1} {z}
219; KNL-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
220; KNL-NEXT:    vprorq $63, %zmm0, %zmm0
221; KNL-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
222; KNL-NEXT:    retq
223;
224; SKX-LABEL: test_splat_bounds_ror_v8i64:
225; SKX:       # %bb.0:
226; SKX-NEXT:    kmovd %edi, %k1
227; SKX-NEXT:    vprorq $62, %zmm0, %zmm1 {%k1}
228; SKX-NEXT:    vprorq $1, %zmm0, %zmm2 {%k1} {z}
229; SKX-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
230; SKX-NEXT:    vprorq $63, %zmm0, %zmm0
231; SKX-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
232; SKX-NEXT:    retq
233  %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2)
234  %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2)
235  %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1)
236  %res3 = add <8 x i64> %res, %res1
237  %res4 = add <8 x i64> %res3, %res2
238  ret <8 x i64> %res4
239}
240
241; Constant folding
242; We also test with a target shuffle so that this can't be constant folded upon creation, it must
243; wait until the target shuffle has been constant folded in combineX86ShufflesRecursively.
244
245define <8 x i64> @test_fold_rol_v8i64() {
246; CHECK-LABEL: test_fold_rol_v8i64:
247; CHECK:       # %bb.0:
248; CHECK-NEXT:    vmovaps {{.*#+}} zmm0 = [1,2,4,9223372036854775808,2,4611686018427387904,9223372036854775808,9223372036854775808]
249; CHECK-NEXT:    retq
250  %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, <8 x i64> <i64 0, i64 1, i64 2, i64 63, i64 65, i64 65534, i64 65535, i64 -1>, <8 x i64> zeroinitializer, i8 -1)
251  ret <8 x i64> %res
252}
253
254define <16 x i32> @test_fold_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1) {
255; CHECK-LABEL: test_fold_rol_v16i32:
256; CHECK:       # %bb.0:
257; CHECK-NEXT:    vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
258; CHECK-NEXT:    vprolvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
259; CHECK-NEXT:    retq
260  %res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> zeroinitializer, i16 -1)
261  %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %res0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x i32> zeroinitializer, i16 -1)
262  ret <16 x i32> %res1
263}
264
265define <8 x i64> @test_fold_ror_v8i64() {
266; CHECK-LABEL: test_fold_ror_v8i64:
267; CHECK:       # %bb.0:
268; CHECK-NEXT:    vpbroadcastq {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1]
269; CHECK-NEXT:    vprorvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
270; CHECK-NEXT:    retq
271  %res0 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, <8 x i64> zeroinitializer, i8 -1)
272  %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %res0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, <8 x i64> zeroinitializer, i8 -1)
273  ret <8 x i64> %res1
274}
275
276define <16 x i32> @test_fold_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1) {
277; CHECK-LABEL: test_fold_ror_v16i32:
278; CHECK:       # %bb.0:
279; CHECK-NEXT:    vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
280; CHECK-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
281; CHECK-NEXT:    retq
282  %res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> zeroinitializer, i16 -1)
283  %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %res0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x i32> zeroinitializer, i16 -1)
284  ret <16 x i32> %res1
285}
286