1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=cannonlake | FileCheck %s 3 4; These test cases demonstrate cases where vpermt2/vpermi2 could benefit from being commuted. 5 6declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 7 8define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p) { 9; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512: 10; CHECK: ## %bb.0: 11; CHECK-NEXT: vpermt2d (%rdi), %zmm1, %zmm0 12; CHECK-NEXT: retq 13 %x2 = load <16 x i32>, <16 x i32>* %x2p 14 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 15 ret <16 x i32> %res 16} 17 18declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8) 19 20define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) { 21; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512: 22; CHECK: ## %bb.0: 23; CHECK-NEXT: vpermt2pd %zmm2, %zmm1, %zmm0 24; CHECK-NEXT: retq 25 %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1) 26 ret <8 x double> %res 27} 28 29declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16) 30 31define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) { 32; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512: 33; CHECK: ## %bb.0: 34; CHECK-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0 35; CHECK-NEXT: retq 36 %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1) 37 ret <16 x float> %res 38} 39 40declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 41 42define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) { 43; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512: 44; CHECK: ## %bb.0: 45; CHECK-NEXT: vpermt2q %zmm2, %zmm1, %zmm0 46; CHECK-NEXT: retq 47 %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 48 ret <8 x i64> %res 49} 50 51declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 52 53define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, i16 %x3) { 54; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512: 55; CHECK: ## %bb.0: 56; CHECK-NEXT: kmovd %esi, %k1 57; CHECK-NEXT: vpermi2d (%rdi), %zmm1, %zmm0 {%k1} {z} 58; CHECK-NEXT: retq 59 %x2 = load <16 x i32>, <16 x i32>* %x2p 60 %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 61 ret <16 x i32> %res 62} 63 64declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8) 65 66define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, double* %x2ptr, i8 %x3) { 67; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512: 68; CHECK: ## %bb.0: 69; CHECK-NEXT: kmovd %esi, %k1 70; CHECK-NEXT: vpermi2pd (%rdi){1to8}, %zmm1, %zmm0 {%k1} {z} 71; CHECK-NEXT: retq 72 %x2s = load double, double* %x2ptr 73 %x2ins = insertelement <8 x double> undef, double %x2s, i32 0 74 %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer 75 %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) 76 ret <8 x double> %res 77} 78 79declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16) 80 81define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { 82; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512: 83; CHECK: ## %bb.0: 84; CHECK-NEXT: kmovd %edi, %k1 85; CHECK-NEXT: vpermi2ps %zmm2, %zmm1, %zmm0 {%k1} {z} 86; CHECK-NEXT: retq 87 %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) 88 ret <16 x float> %res 89} 90 91 92declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 93 94define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 95; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512: 96; CHECK: ## %bb.0: 97; CHECK-NEXT: kmovd %edi, %k1 98; CHECK-NEXT: vpermi2q %zmm2, %zmm1, %zmm0 {%k1} {z} 99; CHECK-NEXT: retq 100 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 101 ret <8 x i64> %res 102} 103 104declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 105 106define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { 107; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512: 108; CHECK: ## %bb.0: 109; CHECK-NEXT: vpermi2d %zmm2, %zmm1, %zmm0 110; CHECK-NEXT: retq 111 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 112 ret <16 x i32> %res 113} 114 115declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 116 117define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 118; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128: 119; CHECK: ## %bb.0: 120; CHECK-NEXT: vpermi2d %xmm2, %xmm1, %xmm0 121; CHECK-NEXT: retq 122 %res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 123 ret <4 x i32> %res 124} 125 126declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 127 128define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 129; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128: 130; CHECK: ## %bb.0: 131; CHECK-NEXT: kmovd %edi, %k1 132; CHECK-NEXT: vpermi2d %xmm2, %xmm1, %xmm0 {%k1} {z} 133; CHECK-NEXT: retq 134 %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 135 ret <4 x i32> %res 136} 137 138define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128_broadcast(<4 x i32> %x0, <4 x i32> %x1, i32* %x2ptr, i8 %x3) { 139; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128_broadcast: 140; CHECK: ## %bb.0: 141; CHECK-NEXT: kmovd %esi, %k1 142; CHECK-NEXT: vpermi2d (%rdi){1to4}, %xmm1, %xmm0 {%k1} {z} 143; CHECK-NEXT: retq 144 %x2s = load i32, i32* %x2ptr 145 %x2ins = insertelement <4 x i32> undef, i32 %x2s, i32 0 146 %x2 = shufflevector <4 x i32> %x2ins, <4 x i32> undef, <4 x i32> zeroinitializer 147 %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 148 ret <4 x i32> %res 149} 150 151declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 152 153define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 154; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256: 155; CHECK: ## %bb.0: 156; CHECK-NEXT: vpermi2d %ymm2, %ymm1, %ymm0 157; CHECK-NEXT: retq 158 %res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 159 ret <8 x i32> %res 160} 161 162declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 163 164define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 165; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256: 166; CHECK: ## %bb.0: 167; CHECK-NEXT: kmovd %edi, %k1 168; CHECK-NEXT: vpermi2d %ymm2, %ymm1, %ymm0 {%k1} {z} 169; CHECK-NEXT: retq 170 %res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 171 ret <8 x i32> %res 172} 173 174declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8) 175 176define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2) { 177; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128: 178; CHECK: ## %bb.0: 179; CHECK-NEXT: vpermt2pd %xmm2, %xmm1, %xmm0 180; CHECK-NEXT: retq 181 %res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1) 182 ret <2 x double> %res 183} 184 185declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8) 186 187define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2) { 188; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256: 189; CHECK: ## %bb.0: 190; CHECK-NEXT: vpermt2pd %ymm2, %ymm1, %ymm0 191; CHECK-NEXT: retq 192 %res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1) 193 ret <4 x double> %res 194} 195 196declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8) 197 198define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2) { 199; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128: 200; CHECK: ## %bb.0: 201; CHECK-NEXT: vpermt2ps %xmm2, %xmm1, %xmm0 202; CHECK-NEXT: retq 203 %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1) 204 ret <4 x float> %res 205} 206 207declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8) 208 209define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2) { 210; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256: 211; CHECK: ## %bb.0: 212; CHECK-NEXT: vpermt2ps %ymm2, %ymm1, %ymm0 213; CHECK-NEXT: retq 214 %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) 215 ret <8 x float> %res 216} 217 218define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256_load(<8 x float> %x0, <8 x i32> %x1, <8 x float>* %x2p) { 219; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256_load: 220; CHECK: ## %bb.0: 221; CHECK-NEXT: vpermt2ps (%rdi), %ymm1, %ymm0 222; CHECK-NEXT: retq 223 %x2 = load <8 x float>, <8 x float>* %x2p 224 %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) 225 ret <8 x float> %res 226} 227 228define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256_broadcast(<8 x float> %x0, <8 x i32> %x1, float* %x2ptr) { 229; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256_broadcast: 230; CHECK: ## %bb.0: 231; CHECK-NEXT: vpermt2ps (%rdi){1to8}, %ymm1, %ymm0 232; CHECK-NEXT: retq 233 %x2s = load float, float* %x2ptr 234 %x2ins = insertelement <8 x float> undef, float %x2s, i32 0 235 %x2 = shufflevector <8 x float> %x2ins, <8 x float> undef, <8 x i32> zeroinitializer 236 %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) 237 ret <8 x float> %res 238} 239 240declare <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 241 242define <16 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) { 243; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_128: 244; CHECK: ## %bb.0: 245; CHECK-NEXT: vpermt2b %xmm2, %xmm1, %xmm0 246; CHECK-NEXT: retq 247 %res = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 248 ret <16 x i8> %res 249} 250 251declare <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 252 253define <32 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) { 254; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_256: 255; CHECK: ## %bb.0: 256; CHECK-NEXT: vpermt2b %ymm2, %ymm1, %ymm0 257; CHECK-NEXT: retq 258 %res = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 259 ret <32 x i8> %res 260} 261 262declare <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 263 264define <16 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) { 265; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_128: 266; CHECK: ## %bb.0: 267; CHECK-NEXT: vpermi2b %xmm2, %xmm1, %xmm0 268; CHECK-NEXT: retq 269 %res = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 270 ret <16 x i8> %res 271} 272 273define <16 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_128_load(<16 x i8> %x0, <16 x i8> %x1, <16 x i8>* %x2p) { 274; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_128_load: 275; CHECK: ## %bb.0: 276; CHECK-NEXT: vpermi2b (%rdi), %xmm1, %xmm0 277; CHECK-NEXT: retq 278 %x2 = load <16 x i8>, <16 x i8>* %x2p 279 %res = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 280 ret <16 x i8> %res 281} 282 283declare <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 284 285define <32 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) { 286; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_256: 287; CHECK: ## %bb.0: 288; CHECK-NEXT: vpermi2b %ymm2, %ymm1, %ymm0 289; CHECK-NEXT: retq 290 %res = call <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 291 ret <32 x i8> %res 292} 293 294declare <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 295 296define <16 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 297; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128: 298; CHECK: ## %bb.0: 299; CHECK-NEXT: kmovd %edi, %k1 300; CHECK-NEXT: vpermi2b %xmm2, %xmm1, %xmm0 {%k1} {z} 301; CHECK-NEXT: retq 302 %res = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 303 ret <16 x i8> %res 304} 305 306define <16 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_128_load(<16 x i8> %x0, <16 x i8> %x1, <16 x i8>* %x2p, i16 %x3) { 307; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128_load: 308; CHECK: ## %bb.0: 309; CHECK-NEXT: kmovd %esi, %k1 310; CHECK-NEXT: vpermi2b (%rdi), %xmm1, %xmm0 {%k1} {z} 311; CHECK-NEXT: retq 312 %x2 = load <16 x i8>, <16 x i8>* %x2p 313 %res = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 314 ret <16 x i8> %res 315} 316 317declare <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 318 319define <32 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 320; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_256: 321; CHECK: ## %bb.0: 322; CHECK-NEXT: kmovd %edi, %k1 323; CHECK-NEXT: vpermi2b %ymm2, %ymm1, %ymm0 {%k1} {z} 324; CHECK-NEXT: retq 325 %res = call <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 326 ret <32 x i8> %res 327} 328 329define <32 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_256_load(<32 x i8> %x0, <32 x i8> %x1, <32 x i8>* %x2p, i32 %x3) { 330; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_256_load: 331; CHECK: ## %bb.0: 332; CHECK-NEXT: kmovd %esi, %k1 333; CHECK-NEXT: vpermi2b (%rdi), %ymm1, %ymm0 {%k1} {z} 334; CHECK-NEXT: retq 335 %x2 = load <32 x i8>, <32 x i8>* %x2p 336 %res = call <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 337 ret <32 x i8> %res 338} 339