1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI 5 6define <8 x i64> @var_shuffle_v8i64(<8 x i64> %v, <8 x i64> %indices) nounwind { 7; AVX512-LABEL: var_shuffle_v8i64: 8; AVX512: # %bb.0: 9; AVX512-NEXT: vpermpd %zmm0, %zmm1, %zmm0 10; AVX512-NEXT: retq 11 %index0 = extractelement <8 x i64> %indices, i32 0 12 %index1 = extractelement <8 x i64> %indices, i32 1 13 %index2 = extractelement <8 x i64> %indices, i32 2 14 %index3 = extractelement <8 x i64> %indices, i32 3 15 %index4 = extractelement <8 x i64> %indices, i32 4 16 %index5 = extractelement <8 x i64> %indices, i32 5 17 %index6 = extractelement <8 x i64> %indices, i32 6 18 %index7 = extractelement <8 x i64> %indices, i32 7 19 %v0 = extractelement <8 x i64> %v, i64 %index0 20 %v1 = extractelement <8 x i64> %v, i64 %index1 21 %v2 = extractelement <8 x i64> %v, i64 %index2 22 %v3 = extractelement <8 x i64> %v, i64 %index3 23 %v4 = extractelement <8 x i64> %v, i64 %index4 24 %v5 = extractelement <8 x i64> %v, i64 %index5 25 %v6 = extractelement <8 x i64> %v, i64 %index6 26 %v7 = extractelement <8 x i64> %v, i64 %index7 27 %ret0 = insertelement <8 x i64> undef, i64 %v0, i32 0 28 %ret1 = insertelement <8 x i64> %ret0, i64 %v1, i32 1 29 %ret2 = insertelement <8 x i64> %ret1, i64 %v2, i32 2 30 %ret3 = insertelement <8 x i64> %ret2, i64 %v3, i32 3 31 %ret4 = insertelement <8 x i64> %ret3, i64 %v4, i32 4 32 %ret5 = insertelement <8 x i64> %ret4, i64 %v5, i32 5 33 %ret6 = insertelement <8 x i64> %ret5, i64 %v6, i32 6 34 %ret7 = insertelement <8 x i64> %ret6, i64 %v7, i32 7 35 ret <8 x i64> %ret7 36} 37 38define <16 x i32> @var_shuffle_v16i32(<16 x i32> %v, <16 x i32> %indices) nounwind { 39; AVX512-LABEL: var_shuffle_v16i32: 40; AVX512: # %bb.0: 41; AVX512-NEXT: vpermps %zmm0, %zmm1, %zmm0 42; AVX512-NEXT: retq 43 %index0 = extractelement <16 x i32> %indices, i32 0 44 %index1 = extractelement <16 x i32> %indices, i32 1 45 %index2 = extractelement <16 x i32> %indices, i32 2 46 %index3 = extractelement <16 x i32> %indices, i32 3 47 %index4 = extractelement <16 x i32> %indices, i32 4 48 %index5 = extractelement <16 x i32> %indices, i32 5 49 %index6 = extractelement <16 x i32> %indices, i32 6 50 %index7 = extractelement <16 x i32> %indices, i32 7 51 %index8 = extractelement <16 x i32> %indices, i32 8 52 %index9 = extractelement <16 x i32> %indices, i32 9 53 %index10 = extractelement <16 x i32> %indices, i32 10 54 %index11 = extractelement <16 x i32> %indices, i32 11 55 %index12 = extractelement <16 x i32> %indices, i32 12 56 %index13 = extractelement <16 x i32> %indices, i32 13 57 %index14 = extractelement <16 x i32> %indices, i32 14 58 %index15 = extractelement <16 x i32> %indices, i32 15 59 %v0 = extractelement <16 x i32> %v, i32 %index0 60 %v1 = extractelement <16 x i32> %v, i32 %index1 61 %v2 = extractelement <16 x i32> %v, i32 %index2 62 %v3 = extractelement <16 x i32> %v, i32 %index3 63 %v4 = extractelement <16 x i32> %v, i32 %index4 64 %v5 = extractelement <16 x i32> %v, i32 %index5 65 %v6 = extractelement <16 x i32> %v, i32 %index6 66 %v7 = extractelement <16 x i32> %v, i32 %index7 67 %v8 = extractelement <16 x i32> %v, i32 %index8 68 %v9 = extractelement <16 x i32> %v, i32 %index9 69 %v10 = extractelement <16 x i32> %v, i32 %index10 70 %v11 = extractelement <16 x i32> %v, i32 %index11 71 %v12 = extractelement <16 x i32> %v, i32 %index12 72 %v13 = extractelement <16 x i32> %v, i32 %index13 73 %v14 = extractelement <16 x i32> %v, i32 %index14 74 %v15 = extractelement <16 x i32> %v, i32 %index15 75 %ret0 = insertelement <16 x i32> undef, i32 %v0, i32 0 76 %ret1 = insertelement <16 x i32> %ret0, i32 %v1, i32 1 77 %ret2 = insertelement <16 x i32> %ret1, i32 %v2, i32 2 78 %ret3 = insertelement <16 x i32> %ret2, i32 %v3, i32 3 79 %ret4 = insertelement <16 x i32> %ret3, i32 %v4, i32 4 80 %ret5 = insertelement <16 x i32> %ret4, i32 %v5, i32 5 81 %ret6 = insertelement <16 x i32> %ret5, i32 %v6, i32 6 82 %ret7 = insertelement <16 x i32> %ret6, i32 %v7, i32 7 83 %ret8 = insertelement <16 x i32> %ret7, i32 %v8, i32 8 84 %ret9 = insertelement <16 x i32> %ret8, i32 %v9, i32 9 85 %ret10 = insertelement <16 x i32> %ret9, i32 %v10, i32 10 86 %ret11 = insertelement <16 x i32> %ret10, i32 %v11, i32 11 87 %ret12 = insertelement <16 x i32> %ret11, i32 %v12, i32 12 88 %ret13 = insertelement <16 x i32> %ret12, i32 %v13, i32 13 89 %ret14 = insertelement <16 x i32> %ret13, i32 %v14, i32 14 90 %ret15 = insertelement <16 x i32> %ret14, i32 %v15, i32 15 91 ret <16 x i32> %ret15 92} 93 94define <32 x i16> @var_shuffle_v32i16(<32 x i16> %v, <32 x i16> %indices) nounwind { 95; AVX512F-LABEL: var_shuffle_v32i16: 96; AVX512F: # %bb.0: 97; AVX512F-NEXT: pushq %rbp 98; AVX512F-NEXT: movq %rsp, %rbp 99; AVX512F-NEXT: andq $-64, %rsp 100; AVX512F-NEXT: subq $128, %rsp 101; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 102; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm3 103; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm4 104; AVX512F-NEXT: vmovd %xmm4, %eax 105; AVX512F-NEXT: vmovaps %zmm0, (%rsp) 106; AVX512F-NEXT: andl $31, %eax 107; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax 108; AVX512F-NEXT: vmovd %eax, %xmm0 109; AVX512F-NEXT: vpextrw $1, %xmm4, %eax 110; AVX512F-NEXT: andl $31, %eax 111; AVX512F-NEXT: vpinsrw $1, (%rsp,%rax,2), %xmm0, %xmm0 112; AVX512F-NEXT: vpextrw $2, %xmm4, %eax 113; AVX512F-NEXT: andl $31, %eax 114; AVX512F-NEXT: vpinsrw $2, (%rsp,%rax,2), %xmm0, %xmm0 115; AVX512F-NEXT: vpextrw $3, %xmm4, %eax 116; AVX512F-NEXT: andl $31, %eax 117; AVX512F-NEXT: vpinsrw $3, (%rsp,%rax,2), %xmm0, %xmm0 118; AVX512F-NEXT: vpextrw $4, %xmm4, %eax 119; AVX512F-NEXT: andl $31, %eax 120; AVX512F-NEXT: vpinsrw $4, (%rsp,%rax,2), %xmm0, %xmm0 121; AVX512F-NEXT: vpextrw $5, %xmm4, %eax 122; AVX512F-NEXT: andl $31, %eax 123; AVX512F-NEXT: vpinsrw $5, (%rsp,%rax,2), %xmm0, %xmm0 124; AVX512F-NEXT: vpextrw $6, %xmm4, %eax 125; AVX512F-NEXT: andl $31, %eax 126; AVX512F-NEXT: vpinsrw $6, (%rsp,%rax,2), %xmm0, %xmm0 127; AVX512F-NEXT: vpextrw $7, %xmm4, %eax 128; AVX512F-NEXT: andl $31, %eax 129; AVX512F-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0 130; AVX512F-NEXT: vmovd %xmm3, %eax 131; AVX512F-NEXT: andl $31, %eax 132; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax 133; AVX512F-NEXT: vmovd %eax, %xmm4 134; AVX512F-NEXT: vpextrw $1, %xmm3, %eax 135; AVX512F-NEXT: andl $31, %eax 136; AVX512F-NEXT: vpinsrw $1, (%rsp,%rax,2), %xmm4, %xmm4 137; AVX512F-NEXT: vpextrw $2, %xmm3, %eax 138; AVX512F-NEXT: andl $31, %eax 139; AVX512F-NEXT: vpinsrw $2, (%rsp,%rax,2), %xmm4, %xmm4 140; AVX512F-NEXT: vpextrw $3, %xmm3, %eax 141; AVX512F-NEXT: andl $31, %eax 142; AVX512F-NEXT: vpinsrw $3, (%rsp,%rax,2), %xmm4, %xmm4 143; AVX512F-NEXT: vpextrw $4, %xmm3, %eax 144; AVX512F-NEXT: andl $31, %eax 145; AVX512F-NEXT: vpinsrw $4, (%rsp,%rax,2), %xmm4, %xmm4 146; AVX512F-NEXT: vpextrw $5, %xmm3, %eax 147; AVX512F-NEXT: andl $31, %eax 148; AVX512F-NEXT: vpinsrw $5, (%rsp,%rax,2), %xmm4, %xmm4 149; AVX512F-NEXT: vpextrw $6, %xmm3, %eax 150; AVX512F-NEXT: andl $31, %eax 151; AVX512F-NEXT: vpinsrw $6, (%rsp,%rax,2), %xmm4, %xmm4 152; AVX512F-NEXT: vpextrw $7, %xmm3, %eax 153; AVX512F-NEXT: andl $31, %eax 154; AVX512F-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm4, %xmm3 155; AVX512F-NEXT: vmovd %xmm2, %eax 156; AVX512F-NEXT: andl $31, %eax 157; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax 158; AVX512F-NEXT: vmovd %eax, %xmm4 159; AVX512F-NEXT: vpextrw $1, %xmm2, %eax 160; AVX512F-NEXT: andl $31, %eax 161; AVX512F-NEXT: vpinsrw $1, (%rsp,%rax,2), %xmm4, %xmm4 162; AVX512F-NEXT: vpextrw $2, %xmm2, %eax 163; AVX512F-NEXT: andl $31, %eax 164; AVX512F-NEXT: vpinsrw $2, (%rsp,%rax,2), %xmm4, %xmm4 165; AVX512F-NEXT: vpextrw $3, %xmm2, %eax 166; AVX512F-NEXT: andl $31, %eax 167; AVX512F-NEXT: vpinsrw $3, (%rsp,%rax,2), %xmm4, %xmm4 168; AVX512F-NEXT: vpextrw $4, %xmm2, %eax 169; AVX512F-NEXT: andl $31, %eax 170; AVX512F-NEXT: vpinsrw $4, (%rsp,%rax,2), %xmm4, %xmm4 171; AVX512F-NEXT: vpextrw $5, %xmm2, %eax 172; AVX512F-NEXT: andl $31, %eax 173; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax 174; AVX512F-NEXT: vpinsrw $5, %eax, %xmm4, %xmm4 175; AVX512F-NEXT: vpextrw $6, %xmm2, %eax 176; AVX512F-NEXT: andl $31, %eax 177; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax 178; AVX512F-NEXT: vpinsrw $6, %eax, %xmm4, %xmm4 179; AVX512F-NEXT: vpextrw $7, %xmm2, %eax 180; AVX512F-NEXT: andl $31, %eax 181; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax 182; AVX512F-NEXT: vpinsrw $7, %eax, %xmm4, %xmm2 183; AVX512F-NEXT: vmovd %xmm1, %eax 184; AVX512F-NEXT: andl $31, %eax 185; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax 186; AVX512F-NEXT: vmovd %eax, %xmm4 187; AVX512F-NEXT: vpextrw $1, %xmm1, %eax 188; AVX512F-NEXT: andl $31, %eax 189; AVX512F-NEXT: vpinsrw $1, (%rsp,%rax,2), %xmm4, %xmm4 190; AVX512F-NEXT: vpextrw $2, %xmm1, %eax 191; AVX512F-NEXT: andl $31, %eax 192; AVX512F-NEXT: vpinsrw $2, (%rsp,%rax,2), %xmm4, %xmm4 193; AVX512F-NEXT: vpextrw $3, %xmm1, %eax 194; AVX512F-NEXT: andl $31, %eax 195; AVX512F-NEXT: vpinsrw $3, (%rsp,%rax,2), %xmm4, %xmm4 196; AVX512F-NEXT: vpextrw $4, %xmm1, %eax 197; AVX512F-NEXT: andl $31, %eax 198; AVX512F-NEXT: vpinsrw $4, (%rsp,%rax,2), %xmm4, %xmm4 199; AVX512F-NEXT: vpextrw $5, %xmm1, %eax 200; AVX512F-NEXT: andl $31, %eax 201; AVX512F-NEXT: vpinsrw $5, (%rsp,%rax,2), %xmm4, %xmm4 202; AVX512F-NEXT: vpextrw $6, %xmm1, %eax 203; AVX512F-NEXT: andl $31, %eax 204; AVX512F-NEXT: vpinsrw $6, (%rsp,%rax,2), %xmm4, %xmm4 205; AVX512F-NEXT: vpextrw $7, %xmm1, %eax 206; AVX512F-NEXT: andl $31, %eax 207; AVX512F-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm4, %xmm1 208; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm3, %ymm0 209; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 210; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 211; AVX512F-NEXT: movq %rbp, %rsp 212; AVX512F-NEXT: popq %rbp 213; AVX512F-NEXT: retq 214; 215; AVX512BW-LABEL: var_shuffle_v32i16: 216; AVX512BW: # %bb.0: 217; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0 218; AVX512BW-NEXT: retq 219; 220; AVX512VBMI-LABEL: var_shuffle_v32i16: 221; AVX512VBMI: # %bb.0: 222; AVX512VBMI-NEXT: vpermw %zmm0, %zmm1, %zmm0 223; AVX512VBMI-NEXT: retq 224 %index0 = extractelement <32 x i16> %indices, i32 0 225 %index1 = extractelement <32 x i16> %indices, i32 1 226 %index2 = extractelement <32 x i16> %indices, i32 2 227 %index3 = extractelement <32 x i16> %indices, i32 3 228 %index4 = extractelement <32 x i16> %indices, i32 4 229 %index5 = extractelement <32 x i16> %indices, i32 5 230 %index6 = extractelement <32 x i16> %indices, i32 6 231 %index7 = extractelement <32 x i16> %indices, i32 7 232 %index8 = extractelement <32 x i16> %indices, i32 8 233 %index9 = extractelement <32 x i16> %indices, i32 9 234 %index10 = extractelement <32 x i16> %indices, i32 10 235 %index11 = extractelement <32 x i16> %indices, i32 11 236 %index12 = extractelement <32 x i16> %indices, i32 12 237 %index13 = extractelement <32 x i16> %indices, i32 13 238 %index14 = extractelement <32 x i16> %indices, i32 14 239 %index15 = extractelement <32 x i16> %indices, i32 15 240 %index16 = extractelement <32 x i16> %indices, i32 16 241 %index17 = extractelement <32 x i16> %indices, i32 17 242 %index18 = extractelement <32 x i16> %indices, i32 18 243 %index19 = extractelement <32 x i16> %indices, i32 19 244 %index20 = extractelement <32 x i16> %indices, i32 20 245 %index21 = extractelement <32 x i16> %indices, i32 21 246 %index22 = extractelement <32 x i16> %indices, i32 22 247 %index23 = extractelement <32 x i16> %indices, i32 23 248 %index24 = extractelement <32 x i16> %indices, i32 24 249 %index25 = extractelement <32 x i16> %indices, i32 25 250 %index26 = extractelement <32 x i16> %indices, i32 26 251 %index27 = extractelement <32 x i16> %indices, i32 27 252 %index28 = extractelement <32 x i16> %indices, i32 28 253 %index29 = extractelement <32 x i16> %indices, i32 29 254 %index30 = extractelement <32 x i16> %indices, i32 30 255 %index31 = extractelement <32 x i16> %indices, i32 31 256 %v0 = extractelement <32 x i16> %v, i16 %index0 257 %v1 = extractelement <32 x i16> %v, i16 %index1 258 %v2 = extractelement <32 x i16> %v, i16 %index2 259 %v3 = extractelement <32 x i16> %v, i16 %index3 260 %v4 = extractelement <32 x i16> %v, i16 %index4 261 %v5 = extractelement <32 x i16> %v, i16 %index5 262 %v6 = extractelement <32 x i16> %v, i16 %index6 263 %v7 = extractelement <32 x i16> %v, i16 %index7 264 %v8 = extractelement <32 x i16> %v, i16 %index8 265 %v9 = extractelement <32 x i16> %v, i16 %index9 266 %v10 = extractelement <32 x i16> %v, i16 %index10 267 %v11 = extractelement <32 x i16> %v, i16 %index11 268 %v12 = extractelement <32 x i16> %v, i16 %index12 269 %v13 = extractelement <32 x i16> %v, i16 %index13 270 %v14 = extractelement <32 x i16> %v, i16 %index14 271 %v15 = extractelement <32 x i16> %v, i16 %index15 272 %v16 = extractelement <32 x i16> %v, i16 %index16 273 %v17 = extractelement <32 x i16> %v, i16 %index17 274 %v18 = extractelement <32 x i16> %v, i16 %index18 275 %v19 = extractelement <32 x i16> %v, i16 %index19 276 %v20 = extractelement <32 x i16> %v, i16 %index20 277 %v21 = extractelement <32 x i16> %v, i16 %index21 278 %v22 = extractelement <32 x i16> %v, i16 %index22 279 %v23 = extractelement <32 x i16> %v, i16 %index23 280 %v24 = extractelement <32 x i16> %v, i16 %index24 281 %v25 = extractelement <32 x i16> %v, i16 %index25 282 %v26 = extractelement <32 x i16> %v, i16 %index26 283 %v27 = extractelement <32 x i16> %v, i16 %index27 284 %v28 = extractelement <32 x i16> %v, i16 %index28 285 %v29 = extractelement <32 x i16> %v, i16 %index29 286 %v30 = extractelement <32 x i16> %v, i16 %index30 287 %v31 = extractelement <32 x i16> %v, i16 %index31 288 %ret0 = insertelement <32 x i16> undef, i16 %v0, i32 0 289 %ret1 = insertelement <32 x i16> %ret0, i16 %v1, i32 1 290 %ret2 = insertelement <32 x i16> %ret1, i16 %v2, i32 2 291 %ret3 = insertelement <32 x i16> %ret2, i16 %v3, i32 3 292 %ret4 = insertelement <32 x i16> %ret3, i16 %v4, i32 4 293 %ret5 = insertelement <32 x i16> %ret4, i16 %v5, i32 5 294 %ret6 = insertelement <32 x i16> %ret5, i16 %v6, i32 6 295 %ret7 = insertelement <32 x i16> %ret6, i16 %v7, i32 7 296 %ret8 = insertelement <32 x i16> %ret7, i16 %v8, i32 8 297 %ret9 = insertelement <32 x i16> %ret8, i16 %v9, i32 9 298 %ret10 = insertelement <32 x i16> %ret9, i16 %v10, i32 10 299 %ret11 = insertelement <32 x i16> %ret10, i16 %v11, i32 11 300 %ret12 = insertelement <32 x i16> %ret11, i16 %v12, i32 12 301 %ret13 = insertelement <32 x i16> %ret12, i16 %v13, i32 13 302 %ret14 = insertelement <32 x i16> %ret13, i16 %v14, i32 14 303 %ret15 = insertelement <32 x i16> %ret14, i16 %v15, i32 15 304 %ret16 = insertelement <32 x i16> %ret15, i16 %v16, i32 16 305 %ret17 = insertelement <32 x i16> %ret16, i16 %v17, i32 17 306 %ret18 = insertelement <32 x i16> %ret17, i16 %v18, i32 18 307 %ret19 = insertelement <32 x i16> %ret18, i16 %v19, i32 19 308 %ret20 = insertelement <32 x i16> %ret19, i16 %v20, i32 20 309 %ret21 = insertelement <32 x i16> %ret20, i16 %v21, i32 21 310 %ret22 = insertelement <32 x i16> %ret21, i16 %v22, i32 22 311 %ret23 = insertelement <32 x i16> %ret22, i16 %v23, i32 23 312 %ret24 = insertelement <32 x i16> %ret23, i16 %v24, i32 24 313 %ret25 = insertelement <32 x i16> %ret24, i16 %v25, i32 25 314 %ret26 = insertelement <32 x i16> %ret25, i16 %v26, i32 26 315 %ret27 = insertelement <32 x i16> %ret26, i16 %v27, i32 27 316 %ret28 = insertelement <32 x i16> %ret27, i16 %v28, i32 28 317 %ret29 = insertelement <32 x i16> %ret28, i16 %v29, i32 29 318 %ret30 = insertelement <32 x i16> %ret29, i16 %v30, i32 30 319 %ret31 = insertelement <32 x i16> %ret30, i16 %v31, i32 31 320 ret <32 x i16> %ret31 321} 322 323define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind { 324; AVX512F-LABEL: var_shuffle_v64i8: 325; AVX512F: # %bb.0: 326; AVX512F-NEXT: pushq %rbp 327; AVX512F-NEXT: movq %rsp, %rbp 328; AVX512F-NEXT: andq $-64, %rsp 329; AVX512F-NEXT: subq $128, %rsp 330; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 331; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm3 332; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm4 333; AVX512F-NEXT: vmovd %xmm4, %eax 334; AVX512F-NEXT: vmovaps %zmm0, (%rsp) 335; AVX512F-NEXT: andl $63, %eax 336; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 337; AVX512F-NEXT: vmovd %eax, %xmm0 338; AVX512F-NEXT: vpextrb $1, %xmm4, %eax 339; AVX512F-NEXT: andl $63, %eax 340; AVX512F-NEXT: vpinsrb $1, (%rsp,%rax), %xmm0, %xmm0 341; AVX512F-NEXT: vpextrb $2, %xmm4, %eax 342; AVX512F-NEXT: andl $63, %eax 343; AVX512F-NEXT: vpinsrb $2, (%rsp,%rax), %xmm0, %xmm0 344; AVX512F-NEXT: vpextrb $3, %xmm4, %eax 345; AVX512F-NEXT: andl $63, %eax 346; AVX512F-NEXT: vpinsrb $3, (%rsp,%rax), %xmm0, %xmm0 347; AVX512F-NEXT: vpextrb $4, %xmm4, %eax 348; AVX512F-NEXT: andl $63, %eax 349; AVX512F-NEXT: vpinsrb $4, (%rsp,%rax), %xmm0, %xmm0 350; AVX512F-NEXT: vpextrb $5, %xmm4, %eax 351; AVX512F-NEXT: andl $63, %eax 352; AVX512F-NEXT: vpinsrb $5, (%rsp,%rax), %xmm0, %xmm0 353; AVX512F-NEXT: vpextrb $6, %xmm4, %eax 354; AVX512F-NEXT: andl $63, %eax 355; AVX512F-NEXT: vpinsrb $6, (%rsp,%rax), %xmm0, %xmm0 356; AVX512F-NEXT: vpextrb $7, %xmm4, %eax 357; AVX512F-NEXT: andl $63, %eax 358; AVX512F-NEXT: vpinsrb $7, (%rsp,%rax), %xmm0, %xmm0 359; AVX512F-NEXT: vpextrb $8, %xmm4, %eax 360; AVX512F-NEXT: andl $63, %eax 361; AVX512F-NEXT: vpinsrb $8, (%rsp,%rax), %xmm0, %xmm0 362; AVX512F-NEXT: vpextrb $9, %xmm4, %eax 363; AVX512F-NEXT: andl $63, %eax 364; AVX512F-NEXT: vpinsrb $9, (%rsp,%rax), %xmm0, %xmm0 365; AVX512F-NEXT: vpextrb $10, %xmm4, %eax 366; AVX512F-NEXT: andl $63, %eax 367; AVX512F-NEXT: vpinsrb $10, (%rsp,%rax), %xmm0, %xmm0 368; AVX512F-NEXT: vpextrb $11, %xmm4, %eax 369; AVX512F-NEXT: andl $63, %eax 370; AVX512F-NEXT: vpinsrb $11, (%rsp,%rax), %xmm0, %xmm0 371; AVX512F-NEXT: vpextrb $12, %xmm4, %eax 372; AVX512F-NEXT: andl $63, %eax 373; AVX512F-NEXT: vpinsrb $12, (%rsp,%rax), %xmm0, %xmm0 374; AVX512F-NEXT: vpextrb $13, %xmm4, %eax 375; AVX512F-NEXT: andl $63, %eax 376; AVX512F-NEXT: vpinsrb $13, (%rsp,%rax), %xmm0, %xmm0 377; AVX512F-NEXT: vpextrb $14, %xmm4, %eax 378; AVX512F-NEXT: andl $63, %eax 379; AVX512F-NEXT: vpinsrb $14, (%rsp,%rax), %xmm0, %xmm0 380; AVX512F-NEXT: vpextrb $15, %xmm4, %eax 381; AVX512F-NEXT: andl $63, %eax 382; AVX512F-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0 383; AVX512F-NEXT: vmovd %xmm3, %eax 384; AVX512F-NEXT: andl $63, %eax 385; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 386; AVX512F-NEXT: vmovd %eax, %xmm4 387; AVX512F-NEXT: vpextrb $1, %xmm3, %eax 388; AVX512F-NEXT: andl $63, %eax 389; AVX512F-NEXT: vpinsrb $1, (%rsp,%rax), %xmm4, %xmm4 390; AVX512F-NEXT: vpextrb $2, %xmm3, %eax 391; AVX512F-NEXT: andl $63, %eax 392; AVX512F-NEXT: vpinsrb $2, (%rsp,%rax), %xmm4, %xmm4 393; AVX512F-NEXT: vpextrb $3, %xmm3, %eax 394; AVX512F-NEXT: andl $63, %eax 395; AVX512F-NEXT: vpinsrb $3, (%rsp,%rax), %xmm4, %xmm4 396; AVX512F-NEXT: vpextrb $4, %xmm3, %eax 397; AVX512F-NEXT: andl $63, %eax 398; AVX512F-NEXT: vpinsrb $4, (%rsp,%rax), %xmm4, %xmm4 399; AVX512F-NEXT: vpextrb $5, %xmm3, %eax 400; AVX512F-NEXT: andl $63, %eax 401; AVX512F-NEXT: vpinsrb $5, (%rsp,%rax), %xmm4, %xmm4 402; AVX512F-NEXT: vpextrb $6, %xmm3, %eax 403; AVX512F-NEXT: andl $63, %eax 404; AVX512F-NEXT: vpinsrb $6, (%rsp,%rax), %xmm4, %xmm4 405; AVX512F-NEXT: vpextrb $7, %xmm3, %eax 406; AVX512F-NEXT: andl $63, %eax 407; AVX512F-NEXT: vpinsrb $7, (%rsp,%rax), %xmm4, %xmm4 408; AVX512F-NEXT: vpextrb $8, %xmm3, %eax 409; AVX512F-NEXT: andl $63, %eax 410; AVX512F-NEXT: vpinsrb $8, (%rsp,%rax), %xmm4, %xmm4 411; AVX512F-NEXT: vpextrb $9, %xmm3, %eax 412; AVX512F-NEXT: andl $63, %eax 413; AVX512F-NEXT: vpinsrb $9, (%rsp,%rax), %xmm4, %xmm4 414; AVX512F-NEXT: vpextrb $10, %xmm3, %eax 415; AVX512F-NEXT: andl $63, %eax 416; AVX512F-NEXT: vpinsrb $10, (%rsp,%rax), %xmm4, %xmm4 417; AVX512F-NEXT: vpextrb $11, %xmm3, %eax 418; AVX512F-NEXT: andl $63, %eax 419; AVX512F-NEXT: vpinsrb $11, (%rsp,%rax), %xmm4, %xmm4 420; AVX512F-NEXT: vpextrb $12, %xmm3, %eax 421; AVX512F-NEXT: andl $63, %eax 422; AVX512F-NEXT: vpinsrb $12, (%rsp,%rax), %xmm4, %xmm4 423; AVX512F-NEXT: vpextrb $13, %xmm3, %eax 424; AVX512F-NEXT: andl $63, %eax 425; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 426; AVX512F-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 427; AVX512F-NEXT: vpextrb $14, %xmm3, %eax 428; AVX512F-NEXT: andl $63, %eax 429; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 430; AVX512F-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 431; AVX512F-NEXT: vpextrb $15, %xmm3, %eax 432; AVX512F-NEXT: andl $63, %eax 433; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 434; AVX512F-NEXT: vpinsrb $15, %eax, %xmm4, %xmm3 435; AVX512F-NEXT: vmovd %xmm2, %eax 436; AVX512F-NEXT: andl $63, %eax 437; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 438; AVX512F-NEXT: vmovd %eax, %xmm4 439; AVX512F-NEXT: vpextrb $1, %xmm2, %eax 440; AVX512F-NEXT: andl $63, %eax 441; AVX512F-NEXT: vpinsrb $1, (%rsp,%rax), %xmm4, %xmm4 442; AVX512F-NEXT: vpextrb $2, %xmm2, %eax 443; AVX512F-NEXT: andl $63, %eax 444; AVX512F-NEXT: vpinsrb $2, (%rsp,%rax), %xmm4, %xmm4 445; AVX512F-NEXT: vpextrb $3, %xmm2, %eax 446; AVX512F-NEXT: andl $63, %eax 447; AVX512F-NEXT: vpinsrb $3, (%rsp,%rax), %xmm4, %xmm4 448; AVX512F-NEXT: vpextrb $4, %xmm2, %eax 449; AVX512F-NEXT: andl $63, %eax 450; AVX512F-NEXT: vpinsrb $4, (%rsp,%rax), %xmm4, %xmm4 451; AVX512F-NEXT: vpextrb $5, %xmm2, %eax 452; AVX512F-NEXT: andl $63, %eax 453; AVX512F-NEXT: vpinsrb $5, (%rsp,%rax), %xmm4, %xmm4 454; AVX512F-NEXT: vpextrb $6, %xmm2, %eax 455; AVX512F-NEXT: andl $63, %eax 456; AVX512F-NEXT: vpinsrb $6, (%rsp,%rax), %xmm4, %xmm4 457; AVX512F-NEXT: vpextrb $7, %xmm2, %eax 458; AVX512F-NEXT: andl $63, %eax 459; AVX512F-NEXT: vpinsrb $7, (%rsp,%rax), %xmm4, %xmm4 460; AVX512F-NEXT: vpextrb $8, %xmm2, %eax 461; AVX512F-NEXT: andl $63, %eax 462; AVX512F-NEXT: vpinsrb $8, (%rsp,%rax), %xmm4, %xmm4 463; AVX512F-NEXT: vpextrb $9, %xmm2, %eax 464; AVX512F-NEXT: andl $63, %eax 465; AVX512F-NEXT: vpinsrb $9, (%rsp,%rax), %xmm4, %xmm4 466; AVX512F-NEXT: vpextrb $10, %xmm2, %eax 467; AVX512F-NEXT: andl $63, %eax 468; AVX512F-NEXT: vpinsrb $10, (%rsp,%rax), %xmm4, %xmm4 469; AVX512F-NEXT: vpextrb $11, %xmm2, %eax 470; AVX512F-NEXT: andl $63, %eax 471; AVX512F-NEXT: vpinsrb $11, (%rsp,%rax), %xmm4, %xmm4 472; AVX512F-NEXT: vpextrb $12, %xmm2, %eax 473; AVX512F-NEXT: andl $63, %eax 474; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 475; AVX512F-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 476; AVX512F-NEXT: vpextrb $13, %xmm2, %eax 477; AVX512F-NEXT: andl $63, %eax 478; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 479; AVX512F-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 480; AVX512F-NEXT: vpextrb $14, %xmm2, %eax 481; AVX512F-NEXT: andl $63, %eax 482; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 483; AVX512F-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 484; AVX512F-NEXT: vpextrb $15, %xmm2, %eax 485; AVX512F-NEXT: andl $63, %eax 486; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 487; AVX512F-NEXT: vpinsrb $15, %eax, %xmm4, %xmm2 488; AVX512F-NEXT: vmovd %xmm1, %eax 489; AVX512F-NEXT: andl $63, %eax 490; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 491; AVX512F-NEXT: vmovd %eax, %xmm4 492; AVX512F-NEXT: vpextrb $1, %xmm1, %eax 493; AVX512F-NEXT: andl $63, %eax 494; AVX512F-NEXT: vpinsrb $1, (%rsp,%rax), %xmm4, %xmm4 495; AVX512F-NEXT: vpextrb $2, %xmm1, %eax 496; AVX512F-NEXT: andl $63, %eax 497; AVX512F-NEXT: vpinsrb $2, (%rsp,%rax), %xmm4, %xmm4 498; AVX512F-NEXT: vpextrb $3, %xmm1, %eax 499; AVX512F-NEXT: andl $63, %eax 500; AVX512F-NEXT: vpinsrb $3, (%rsp,%rax), %xmm4, %xmm4 501; AVX512F-NEXT: vpextrb $4, %xmm1, %eax 502; AVX512F-NEXT: andl $63, %eax 503; AVX512F-NEXT: vpinsrb $4, (%rsp,%rax), %xmm4, %xmm4 504; AVX512F-NEXT: vpextrb $5, %xmm1, %eax 505; AVX512F-NEXT: andl $63, %eax 506; AVX512F-NEXT: vpinsrb $5, (%rsp,%rax), %xmm4, %xmm4 507; AVX512F-NEXT: vpextrb $6, %xmm1, %eax 508; AVX512F-NEXT: andl $63, %eax 509; AVX512F-NEXT: vpinsrb $6, (%rsp,%rax), %xmm4, %xmm4 510; AVX512F-NEXT: vpextrb $7, %xmm1, %eax 511; AVX512F-NEXT: andl $63, %eax 512; AVX512F-NEXT: vpinsrb $7, (%rsp,%rax), %xmm4, %xmm4 513; AVX512F-NEXT: vpextrb $8, %xmm1, %eax 514; AVX512F-NEXT: andl $63, %eax 515; AVX512F-NEXT: vpinsrb $8, (%rsp,%rax), %xmm4, %xmm4 516; AVX512F-NEXT: vpextrb $9, %xmm1, %eax 517; AVX512F-NEXT: andl $63, %eax 518; AVX512F-NEXT: vpinsrb $9, (%rsp,%rax), %xmm4, %xmm4 519; AVX512F-NEXT: vpextrb $10, %xmm1, %eax 520; AVX512F-NEXT: andl $63, %eax 521; AVX512F-NEXT: vpinsrb $10, (%rsp,%rax), %xmm4, %xmm4 522; AVX512F-NEXT: vpextrb $11, %xmm1, %eax 523; AVX512F-NEXT: andl $63, %eax 524; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 525; AVX512F-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 526; AVX512F-NEXT: vpextrb $12, %xmm1, %eax 527; AVX512F-NEXT: andl $63, %eax 528; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 529; AVX512F-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 530; AVX512F-NEXT: vpextrb $13, %xmm1, %eax 531; AVX512F-NEXT: andl $63, %eax 532; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 533; AVX512F-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 534; AVX512F-NEXT: vpextrb $14, %xmm1, %eax 535; AVX512F-NEXT: andl $63, %eax 536; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 537; AVX512F-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 538; AVX512F-NEXT: vpextrb $15, %xmm1, %eax 539; AVX512F-NEXT: andl $63, %eax 540; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 541; AVX512F-NEXT: vpinsrb $15, %eax, %xmm4, %xmm1 542; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm3, %ymm0 543; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 544; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 545; AVX512F-NEXT: movq %rbp, %rsp 546; AVX512F-NEXT: popq %rbp 547; AVX512F-NEXT: retq 548; 549; AVX512BW-LABEL: var_shuffle_v64i8: 550; AVX512BW: # %bb.0: 551; AVX512BW-NEXT: pushq %rbp 552; AVX512BW-NEXT: movq %rsp, %rbp 553; AVX512BW-NEXT: andq $-64, %rsp 554; AVX512BW-NEXT: subq $128, %rsp 555; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2 556; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3 557; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm4 558; AVX512BW-NEXT: vmovd %xmm4, %eax 559; AVX512BW-NEXT: vmovaps %zmm0, (%rsp) 560; AVX512BW-NEXT: andl $63, %eax 561; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 562; AVX512BW-NEXT: vmovd %eax, %xmm0 563; AVX512BW-NEXT: vpextrb $1, %xmm4, %eax 564; AVX512BW-NEXT: andl $63, %eax 565; AVX512BW-NEXT: vpinsrb $1, (%rsp,%rax), %xmm0, %xmm0 566; AVX512BW-NEXT: vpextrb $2, %xmm4, %eax 567; AVX512BW-NEXT: andl $63, %eax 568; AVX512BW-NEXT: vpinsrb $2, (%rsp,%rax), %xmm0, %xmm0 569; AVX512BW-NEXT: vpextrb $3, %xmm4, %eax 570; AVX512BW-NEXT: andl $63, %eax 571; AVX512BW-NEXT: vpinsrb $3, (%rsp,%rax), %xmm0, %xmm0 572; AVX512BW-NEXT: vpextrb $4, %xmm4, %eax 573; AVX512BW-NEXT: andl $63, %eax 574; AVX512BW-NEXT: vpinsrb $4, (%rsp,%rax), %xmm0, %xmm0 575; AVX512BW-NEXT: vpextrb $5, %xmm4, %eax 576; AVX512BW-NEXT: andl $63, %eax 577; AVX512BW-NEXT: vpinsrb $5, (%rsp,%rax), %xmm0, %xmm0 578; AVX512BW-NEXT: vpextrb $6, %xmm4, %eax 579; AVX512BW-NEXT: andl $63, %eax 580; AVX512BW-NEXT: vpinsrb $6, (%rsp,%rax), %xmm0, %xmm0 581; AVX512BW-NEXT: vpextrb $7, %xmm4, %eax 582; AVX512BW-NEXT: andl $63, %eax 583; AVX512BW-NEXT: vpinsrb $7, (%rsp,%rax), %xmm0, %xmm0 584; AVX512BW-NEXT: vpextrb $8, %xmm4, %eax 585; AVX512BW-NEXT: andl $63, %eax 586; AVX512BW-NEXT: vpinsrb $8, (%rsp,%rax), %xmm0, %xmm0 587; AVX512BW-NEXT: vpextrb $9, %xmm4, %eax 588; AVX512BW-NEXT: andl $63, %eax 589; AVX512BW-NEXT: vpinsrb $9, (%rsp,%rax), %xmm0, %xmm0 590; AVX512BW-NEXT: vpextrb $10, %xmm4, %eax 591; AVX512BW-NEXT: andl $63, %eax 592; AVX512BW-NEXT: vpinsrb $10, (%rsp,%rax), %xmm0, %xmm0 593; AVX512BW-NEXT: vpextrb $11, %xmm4, %eax 594; AVX512BW-NEXT: andl $63, %eax 595; AVX512BW-NEXT: vpinsrb $11, (%rsp,%rax), %xmm0, %xmm0 596; AVX512BW-NEXT: vpextrb $12, %xmm4, %eax 597; AVX512BW-NEXT: andl $63, %eax 598; AVX512BW-NEXT: vpinsrb $12, (%rsp,%rax), %xmm0, %xmm0 599; AVX512BW-NEXT: vpextrb $13, %xmm4, %eax 600; AVX512BW-NEXT: andl $63, %eax 601; AVX512BW-NEXT: vpinsrb $13, (%rsp,%rax), %xmm0, %xmm0 602; AVX512BW-NEXT: vpextrb $14, %xmm4, %eax 603; AVX512BW-NEXT: andl $63, %eax 604; AVX512BW-NEXT: vpinsrb $14, (%rsp,%rax), %xmm0, %xmm0 605; AVX512BW-NEXT: vpextrb $15, %xmm4, %eax 606; AVX512BW-NEXT: andl $63, %eax 607; AVX512BW-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0 608; AVX512BW-NEXT: vmovd %xmm3, %eax 609; AVX512BW-NEXT: andl $63, %eax 610; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 611; AVX512BW-NEXT: vmovd %eax, %xmm4 612; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax 613; AVX512BW-NEXT: andl $63, %eax 614; AVX512BW-NEXT: vpinsrb $1, (%rsp,%rax), %xmm4, %xmm4 615; AVX512BW-NEXT: vpextrb $2, %xmm3, %eax 616; AVX512BW-NEXT: andl $63, %eax 617; AVX512BW-NEXT: vpinsrb $2, (%rsp,%rax), %xmm4, %xmm4 618; AVX512BW-NEXT: vpextrb $3, %xmm3, %eax 619; AVX512BW-NEXT: andl $63, %eax 620; AVX512BW-NEXT: vpinsrb $3, (%rsp,%rax), %xmm4, %xmm4 621; AVX512BW-NEXT: vpextrb $4, %xmm3, %eax 622; AVX512BW-NEXT: andl $63, %eax 623; AVX512BW-NEXT: vpinsrb $4, (%rsp,%rax), %xmm4, %xmm4 624; AVX512BW-NEXT: vpextrb $5, %xmm3, %eax 625; AVX512BW-NEXT: andl $63, %eax 626; AVX512BW-NEXT: vpinsrb $5, (%rsp,%rax), %xmm4, %xmm4 627; AVX512BW-NEXT: vpextrb $6, %xmm3, %eax 628; AVX512BW-NEXT: andl $63, %eax 629; AVX512BW-NEXT: vpinsrb $6, (%rsp,%rax), %xmm4, %xmm4 630; AVX512BW-NEXT: vpextrb $7, %xmm3, %eax 631; AVX512BW-NEXT: andl $63, %eax 632; AVX512BW-NEXT: vpinsrb $7, (%rsp,%rax), %xmm4, %xmm4 633; AVX512BW-NEXT: vpextrb $8, %xmm3, %eax 634; AVX512BW-NEXT: andl $63, %eax 635; AVX512BW-NEXT: vpinsrb $8, (%rsp,%rax), %xmm4, %xmm4 636; AVX512BW-NEXT: vpextrb $9, %xmm3, %eax 637; AVX512BW-NEXT: andl $63, %eax 638; AVX512BW-NEXT: vpinsrb $9, (%rsp,%rax), %xmm4, %xmm4 639; AVX512BW-NEXT: vpextrb $10, %xmm3, %eax 640; AVX512BW-NEXT: andl $63, %eax 641; AVX512BW-NEXT: vpinsrb $10, (%rsp,%rax), %xmm4, %xmm4 642; AVX512BW-NEXT: vpextrb $11, %xmm3, %eax 643; AVX512BW-NEXT: andl $63, %eax 644; AVX512BW-NEXT: vpinsrb $11, (%rsp,%rax), %xmm4, %xmm4 645; AVX512BW-NEXT: vpextrb $12, %xmm3, %eax 646; AVX512BW-NEXT: andl $63, %eax 647; AVX512BW-NEXT: vpinsrb $12, (%rsp,%rax), %xmm4, %xmm4 648; AVX512BW-NEXT: vpextrb $13, %xmm3, %eax 649; AVX512BW-NEXT: andl $63, %eax 650; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 651; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 652; AVX512BW-NEXT: vpextrb $14, %xmm3, %eax 653; AVX512BW-NEXT: andl $63, %eax 654; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 655; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 656; AVX512BW-NEXT: vpextrb $15, %xmm3, %eax 657; AVX512BW-NEXT: andl $63, %eax 658; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 659; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm3 660; AVX512BW-NEXT: vmovd %xmm2, %eax 661; AVX512BW-NEXT: andl $63, %eax 662; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 663; AVX512BW-NEXT: vmovd %eax, %xmm4 664; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax 665; AVX512BW-NEXT: andl $63, %eax 666; AVX512BW-NEXT: vpinsrb $1, (%rsp,%rax), %xmm4, %xmm4 667; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax 668; AVX512BW-NEXT: andl $63, %eax 669; AVX512BW-NEXT: vpinsrb $2, (%rsp,%rax), %xmm4, %xmm4 670; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax 671; AVX512BW-NEXT: andl $63, %eax 672; AVX512BW-NEXT: vpinsrb $3, (%rsp,%rax), %xmm4, %xmm4 673; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax 674; AVX512BW-NEXT: andl $63, %eax 675; AVX512BW-NEXT: vpinsrb $4, (%rsp,%rax), %xmm4, %xmm4 676; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax 677; AVX512BW-NEXT: andl $63, %eax 678; AVX512BW-NEXT: vpinsrb $5, (%rsp,%rax), %xmm4, %xmm4 679; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax 680; AVX512BW-NEXT: andl $63, %eax 681; AVX512BW-NEXT: vpinsrb $6, (%rsp,%rax), %xmm4, %xmm4 682; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax 683; AVX512BW-NEXT: andl $63, %eax 684; AVX512BW-NEXT: vpinsrb $7, (%rsp,%rax), %xmm4, %xmm4 685; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax 686; AVX512BW-NEXT: andl $63, %eax 687; AVX512BW-NEXT: vpinsrb $8, (%rsp,%rax), %xmm4, %xmm4 688; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax 689; AVX512BW-NEXT: andl $63, %eax 690; AVX512BW-NEXT: vpinsrb $9, (%rsp,%rax), %xmm4, %xmm4 691; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax 692; AVX512BW-NEXT: andl $63, %eax 693; AVX512BW-NEXT: vpinsrb $10, (%rsp,%rax), %xmm4, %xmm4 694; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax 695; AVX512BW-NEXT: andl $63, %eax 696; AVX512BW-NEXT: vpinsrb $11, (%rsp,%rax), %xmm4, %xmm4 697; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax 698; AVX512BW-NEXT: andl $63, %eax 699; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 700; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 701; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax 702; AVX512BW-NEXT: andl $63, %eax 703; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 704; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 705; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax 706; AVX512BW-NEXT: andl $63, %eax 707; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 708; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 709; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax 710; AVX512BW-NEXT: andl $63, %eax 711; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 712; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm2 713; AVX512BW-NEXT: vmovd %xmm1, %eax 714; AVX512BW-NEXT: andl $63, %eax 715; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 716; AVX512BW-NEXT: vmovd %eax, %xmm4 717; AVX512BW-NEXT: vpextrb $1, %xmm1, %eax 718; AVX512BW-NEXT: andl $63, %eax 719; AVX512BW-NEXT: vpinsrb $1, (%rsp,%rax), %xmm4, %xmm4 720; AVX512BW-NEXT: vpextrb $2, %xmm1, %eax 721; AVX512BW-NEXT: andl $63, %eax 722; AVX512BW-NEXT: vpinsrb $2, (%rsp,%rax), %xmm4, %xmm4 723; AVX512BW-NEXT: vpextrb $3, %xmm1, %eax 724; AVX512BW-NEXT: andl $63, %eax 725; AVX512BW-NEXT: vpinsrb $3, (%rsp,%rax), %xmm4, %xmm4 726; AVX512BW-NEXT: vpextrb $4, %xmm1, %eax 727; AVX512BW-NEXT: andl $63, %eax 728; AVX512BW-NEXT: vpinsrb $4, (%rsp,%rax), %xmm4, %xmm4 729; AVX512BW-NEXT: vpextrb $5, %xmm1, %eax 730; AVX512BW-NEXT: andl $63, %eax 731; AVX512BW-NEXT: vpinsrb $5, (%rsp,%rax), %xmm4, %xmm4 732; AVX512BW-NEXT: vpextrb $6, %xmm1, %eax 733; AVX512BW-NEXT: andl $63, %eax 734; AVX512BW-NEXT: vpinsrb $6, (%rsp,%rax), %xmm4, %xmm4 735; AVX512BW-NEXT: vpextrb $7, %xmm1, %eax 736; AVX512BW-NEXT: andl $63, %eax 737; AVX512BW-NEXT: vpinsrb $7, (%rsp,%rax), %xmm4, %xmm4 738; AVX512BW-NEXT: vpextrb $8, %xmm1, %eax 739; AVX512BW-NEXT: andl $63, %eax 740; AVX512BW-NEXT: vpinsrb $8, (%rsp,%rax), %xmm4, %xmm4 741; AVX512BW-NEXT: vpextrb $9, %xmm1, %eax 742; AVX512BW-NEXT: andl $63, %eax 743; AVX512BW-NEXT: vpinsrb $9, (%rsp,%rax), %xmm4, %xmm4 744; AVX512BW-NEXT: vpextrb $10, %xmm1, %eax 745; AVX512BW-NEXT: andl $63, %eax 746; AVX512BW-NEXT: vpinsrb $10, (%rsp,%rax), %xmm4, %xmm4 747; AVX512BW-NEXT: vpextrb $11, %xmm1, %eax 748; AVX512BW-NEXT: andl $63, %eax 749; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 750; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 751; AVX512BW-NEXT: vpextrb $12, %xmm1, %eax 752; AVX512BW-NEXT: andl $63, %eax 753; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 754; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 755; AVX512BW-NEXT: vpextrb $13, %xmm1, %eax 756; AVX512BW-NEXT: andl $63, %eax 757; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 758; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 759; AVX512BW-NEXT: vpextrb $14, %xmm1, %eax 760; AVX512BW-NEXT: andl $63, %eax 761; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 762; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 763; AVX512BW-NEXT: vpextrb $15, %xmm1, %eax 764; AVX512BW-NEXT: andl $63, %eax 765; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 766; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm1 767; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm3, %ymm0 768; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 769; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 770; AVX512BW-NEXT: movq %rbp, %rsp 771; AVX512BW-NEXT: popq %rbp 772; AVX512BW-NEXT: retq 773; 774; AVX512VBMI-LABEL: var_shuffle_v64i8: 775; AVX512VBMI: # %bb.0: 776; AVX512VBMI-NEXT: vpermb %zmm0, %zmm1, %zmm0 777; AVX512VBMI-NEXT: retq 778 %index0 = extractelement <64 x i8> %indices, i32 0 779 %index1 = extractelement <64 x i8> %indices, i32 1 780 %index2 = extractelement <64 x i8> %indices, i32 2 781 %index3 = extractelement <64 x i8> %indices, i32 3 782 %index4 = extractelement <64 x i8> %indices, i32 4 783 %index5 = extractelement <64 x i8> %indices, i32 5 784 %index6 = extractelement <64 x i8> %indices, i32 6 785 %index7 = extractelement <64 x i8> %indices, i32 7 786 %index8 = extractelement <64 x i8> %indices, i32 8 787 %index9 = extractelement <64 x i8> %indices, i32 9 788 %index10 = extractelement <64 x i8> %indices, i32 10 789 %index11 = extractelement <64 x i8> %indices, i32 11 790 %index12 = extractelement <64 x i8> %indices, i32 12 791 %index13 = extractelement <64 x i8> %indices, i32 13 792 %index14 = extractelement <64 x i8> %indices, i32 14 793 %index15 = extractelement <64 x i8> %indices, i32 15 794 %index16 = extractelement <64 x i8> %indices, i32 16 795 %index17 = extractelement <64 x i8> %indices, i32 17 796 %index18 = extractelement <64 x i8> %indices, i32 18 797 %index19 = extractelement <64 x i8> %indices, i32 19 798 %index20 = extractelement <64 x i8> %indices, i32 20 799 %index21 = extractelement <64 x i8> %indices, i32 21 800 %index22 = extractelement <64 x i8> %indices, i32 22 801 %index23 = extractelement <64 x i8> %indices, i32 23 802 %index24 = extractelement <64 x i8> %indices, i32 24 803 %index25 = extractelement <64 x i8> %indices, i32 25 804 %index26 = extractelement <64 x i8> %indices, i32 26 805 %index27 = extractelement <64 x i8> %indices, i32 27 806 %index28 = extractelement <64 x i8> %indices, i32 28 807 %index29 = extractelement <64 x i8> %indices, i32 29 808 %index30 = extractelement <64 x i8> %indices, i32 30 809 %index31 = extractelement <64 x i8> %indices, i32 31 810 %index32 = extractelement <64 x i8> %indices, i32 32 811 %index33 = extractelement <64 x i8> %indices, i32 33 812 %index34 = extractelement <64 x i8> %indices, i32 34 813 %index35 = extractelement <64 x i8> %indices, i32 35 814 %index36 = extractelement <64 x i8> %indices, i32 36 815 %index37 = extractelement <64 x i8> %indices, i32 37 816 %index38 = extractelement <64 x i8> %indices, i32 38 817 %index39 = extractelement <64 x i8> %indices, i32 39 818 %index40 = extractelement <64 x i8> %indices, i32 40 819 %index41 = extractelement <64 x i8> %indices, i32 41 820 %index42 = extractelement <64 x i8> %indices, i32 42 821 %index43 = extractelement <64 x i8> %indices, i32 43 822 %index44 = extractelement <64 x i8> %indices, i32 44 823 %index45 = extractelement <64 x i8> %indices, i32 45 824 %index46 = extractelement <64 x i8> %indices, i32 46 825 %index47 = extractelement <64 x i8> %indices, i32 47 826 %index48 = extractelement <64 x i8> %indices, i32 48 827 %index49 = extractelement <64 x i8> %indices, i32 49 828 %index50 = extractelement <64 x i8> %indices, i32 50 829 %index51 = extractelement <64 x i8> %indices, i32 51 830 %index52 = extractelement <64 x i8> %indices, i32 52 831 %index53 = extractelement <64 x i8> %indices, i32 53 832 %index54 = extractelement <64 x i8> %indices, i32 54 833 %index55 = extractelement <64 x i8> %indices, i32 55 834 %index56 = extractelement <64 x i8> %indices, i32 56 835 %index57 = extractelement <64 x i8> %indices, i32 57 836 %index58 = extractelement <64 x i8> %indices, i32 58 837 %index59 = extractelement <64 x i8> %indices, i32 59 838 %index60 = extractelement <64 x i8> %indices, i32 60 839 %index61 = extractelement <64 x i8> %indices, i32 61 840 %index62 = extractelement <64 x i8> %indices, i32 62 841 %index63 = extractelement <64 x i8> %indices, i32 63 842 %v0 = extractelement <64 x i8> %v, i8 %index0 843 %v1 = extractelement <64 x i8> %v, i8 %index1 844 %v2 = extractelement <64 x i8> %v, i8 %index2 845 %v3 = extractelement <64 x i8> %v, i8 %index3 846 %v4 = extractelement <64 x i8> %v, i8 %index4 847 %v5 = extractelement <64 x i8> %v, i8 %index5 848 %v6 = extractelement <64 x i8> %v, i8 %index6 849 %v7 = extractelement <64 x i8> %v, i8 %index7 850 %v8 = extractelement <64 x i8> %v, i8 %index8 851 %v9 = extractelement <64 x i8> %v, i8 %index9 852 %v10 = extractelement <64 x i8> %v, i8 %index10 853 %v11 = extractelement <64 x i8> %v, i8 %index11 854 %v12 = extractelement <64 x i8> %v, i8 %index12 855 %v13 = extractelement <64 x i8> %v, i8 %index13 856 %v14 = extractelement <64 x i8> %v, i8 %index14 857 %v15 = extractelement <64 x i8> %v, i8 %index15 858 %v16 = extractelement <64 x i8> %v, i8 %index16 859 %v17 = extractelement <64 x i8> %v, i8 %index17 860 %v18 = extractelement <64 x i8> %v, i8 %index18 861 %v19 = extractelement <64 x i8> %v, i8 %index19 862 %v20 = extractelement <64 x i8> %v, i8 %index20 863 %v21 = extractelement <64 x i8> %v, i8 %index21 864 %v22 = extractelement <64 x i8> %v, i8 %index22 865 %v23 = extractelement <64 x i8> %v, i8 %index23 866 %v24 = extractelement <64 x i8> %v, i8 %index24 867 %v25 = extractelement <64 x i8> %v, i8 %index25 868 %v26 = extractelement <64 x i8> %v, i8 %index26 869 %v27 = extractelement <64 x i8> %v, i8 %index27 870 %v28 = extractelement <64 x i8> %v, i8 %index28 871 %v29 = extractelement <64 x i8> %v, i8 %index29 872 %v30 = extractelement <64 x i8> %v, i8 %index30 873 %v31 = extractelement <64 x i8> %v, i8 %index31 874 %v32 = extractelement <64 x i8> %v, i8 %index32 875 %v33 = extractelement <64 x i8> %v, i8 %index33 876 %v34 = extractelement <64 x i8> %v, i8 %index34 877 %v35 = extractelement <64 x i8> %v, i8 %index35 878 %v36 = extractelement <64 x i8> %v, i8 %index36 879 %v37 = extractelement <64 x i8> %v, i8 %index37 880 %v38 = extractelement <64 x i8> %v, i8 %index38 881 %v39 = extractelement <64 x i8> %v, i8 %index39 882 %v40 = extractelement <64 x i8> %v, i8 %index40 883 %v41 = extractelement <64 x i8> %v, i8 %index41 884 %v42 = extractelement <64 x i8> %v, i8 %index42 885 %v43 = extractelement <64 x i8> %v, i8 %index43 886 %v44 = extractelement <64 x i8> %v, i8 %index44 887 %v45 = extractelement <64 x i8> %v, i8 %index45 888 %v46 = extractelement <64 x i8> %v, i8 %index46 889 %v47 = extractelement <64 x i8> %v, i8 %index47 890 %v48 = extractelement <64 x i8> %v, i8 %index48 891 %v49 = extractelement <64 x i8> %v, i8 %index49 892 %v50 = extractelement <64 x i8> %v, i8 %index50 893 %v51 = extractelement <64 x i8> %v, i8 %index51 894 %v52 = extractelement <64 x i8> %v, i8 %index52 895 %v53 = extractelement <64 x i8> %v, i8 %index53 896 %v54 = extractelement <64 x i8> %v, i8 %index54 897 %v55 = extractelement <64 x i8> %v, i8 %index55 898 %v56 = extractelement <64 x i8> %v, i8 %index56 899 %v57 = extractelement <64 x i8> %v, i8 %index57 900 %v58 = extractelement <64 x i8> %v, i8 %index58 901 %v59 = extractelement <64 x i8> %v, i8 %index59 902 %v60 = extractelement <64 x i8> %v, i8 %index60 903 %v61 = extractelement <64 x i8> %v, i8 %index61 904 %v62 = extractelement <64 x i8> %v, i8 %index62 905 %v63 = extractelement <64 x i8> %v, i8 %index63 906 %ret0 = insertelement <64 x i8> undef, i8 %v0, i32 0 907 %ret1 = insertelement <64 x i8> %ret0, i8 %v1, i32 1 908 %ret2 = insertelement <64 x i8> %ret1, i8 %v2, i32 2 909 %ret3 = insertelement <64 x i8> %ret2, i8 %v3, i32 3 910 %ret4 = insertelement <64 x i8> %ret3, i8 %v4, i32 4 911 %ret5 = insertelement <64 x i8> %ret4, i8 %v5, i32 5 912 %ret6 = insertelement <64 x i8> %ret5, i8 %v6, i32 6 913 %ret7 = insertelement <64 x i8> %ret6, i8 %v7, i32 7 914 %ret8 = insertelement <64 x i8> %ret7, i8 %v8, i32 8 915 %ret9 = insertelement <64 x i8> %ret8, i8 %v9, i32 9 916 %ret10 = insertelement <64 x i8> %ret9, i8 %v10, i32 10 917 %ret11 = insertelement <64 x i8> %ret10, i8 %v11, i32 11 918 %ret12 = insertelement <64 x i8> %ret11, i8 %v12, i32 12 919 %ret13 = insertelement <64 x i8> %ret12, i8 %v13, i32 13 920 %ret14 = insertelement <64 x i8> %ret13, i8 %v14, i32 14 921 %ret15 = insertelement <64 x i8> %ret14, i8 %v15, i32 15 922 %ret16 = insertelement <64 x i8> %ret15, i8 %v16, i32 16 923 %ret17 = insertelement <64 x i8> %ret16, i8 %v17, i32 17 924 %ret18 = insertelement <64 x i8> %ret17, i8 %v18, i32 18 925 %ret19 = insertelement <64 x i8> %ret18, i8 %v19, i32 19 926 %ret20 = insertelement <64 x i8> %ret19, i8 %v20, i32 20 927 %ret21 = insertelement <64 x i8> %ret20, i8 %v21, i32 21 928 %ret22 = insertelement <64 x i8> %ret21, i8 %v22, i32 22 929 %ret23 = insertelement <64 x i8> %ret22, i8 %v23, i32 23 930 %ret24 = insertelement <64 x i8> %ret23, i8 %v24, i32 24 931 %ret25 = insertelement <64 x i8> %ret24, i8 %v25, i32 25 932 %ret26 = insertelement <64 x i8> %ret25, i8 %v26, i32 26 933 %ret27 = insertelement <64 x i8> %ret26, i8 %v27, i32 27 934 %ret28 = insertelement <64 x i8> %ret27, i8 %v28, i32 28 935 %ret29 = insertelement <64 x i8> %ret28, i8 %v29, i32 29 936 %ret30 = insertelement <64 x i8> %ret29, i8 %v30, i32 30 937 %ret31 = insertelement <64 x i8> %ret30, i8 %v31, i32 31 938 %ret32 = insertelement <64 x i8> %ret31, i8 %v32, i32 32 939 %ret33 = insertelement <64 x i8> %ret32, i8 %v33, i32 33 940 %ret34 = insertelement <64 x i8> %ret33, i8 %v34, i32 34 941 %ret35 = insertelement <64 x i8> %ret34, i8 %v35, i32 35 942 %ret36 = insertelement <64 x i8> %ret35, i8 %v36, i32 36 943 %ret37 = insertelement <64 x i8> %ret36, i8 %v37, i32 37 944 %ret38 = insertelement <64 x i8> %ret37, i8 %v38, i32 38 945 %ret39 = insertelement <64 x i8> %ret38, i8 %v39, i32 39 946 %ret40 = insertelement <64 x i8> %ret39, i8 %v40, i32 40 947 %ret41 = insertelement <64 x i8> %ret40, i8 %v41, i32 41 948 %ret42 = insertelement <64 x i8> %ret41, i8 %v42, i32 42 949 %ret43 = insertelement <64 x i8> %ret42, i8 %v43, i32 43 950 %ret44 = insertelement <64 x i8> %ret43, i8 %v44, i32 44 951 %ret45 = insertelement <64 x i8> %ret44, i8 %v45, i32 45 952 %ret46 = insertelement <64 x i8> %ret45, i8 %v46, i32 46 953 %ret47 = insertelement <64 x i8> %ret46, i8 %v47, i32 47 954 %ret48 = insertelement <64 x i8> %ret47, i8 %v48, i32 48 955 %ret49 = insertelement <64 x i8> %ret48, i8 %v49, i32 49 956 %ret50 = insertelement <64 x i8> %ret49, i8 %v50, i32 50 957 %ret51 = insertelement <64 x i8> %ret50, i8 %v51, i32 51 958 %ret52 = insertelement <64 x i8> %ret51, i8 %v52, i32 52 959 %ret53 = insertelement <64 x i8> %ret52, i8 %v53, i32 53 960 %ret54 = insertelement <64 x i8> %ret53, i8 %v54, i32 54 961 %ret55 = insertelement <64 x i8> %ret54, i8 %v55, i32 55 962 %ret56 = insertelement <64 x i8> %ret55, i8 %v56, i32 56 963 %ret57 = insertelement <64 x i8> %ret56, i8 %v57, i32 57 964 %ret58 = insertelement <64 x i8> %ret57, i8 %v58, i32 58 965 %ret59 = insertelement <64 x i8> %ret58, i8 %v59, i32 59 966 %ret60 = insertelement <64 x i8> %ret59, i8 %v60, i32 60 967 %ret61 = insertelement <64 x i8> %ret60, i8 %v61, i32 61 968 %ret62 = insertelement <64 x i8> %ret61, i8 %v62, i32 62 969 %ret63 = insertelement <64 x i8> %ret62, i8 %v63, i32 63 970 ret <64 x i8> %ret63 971} 972 973define <8 x double> @var_shuffle_v8f64(<8 x double> %v, <8 x i64> %indices) nounwind { 974; AVX512-LABEL: var_shuffle_v8f64: 975; AVX512: # %bb.0: 976; AVX512-NEXT: vpermpd %zmm0, %zmm1, %zmm0 977; AVX512-NEXT: retq 978 %index0 = extractelement <8 x i64> %indices, i32 0 979 %index1 = extractelement <8 x i64> %indices, i32 1 980 %index2 = extractelement <8 x i64> %indices, i32 2 981 %index3 = extractelement <8 x i64> %indices, i32 3 982 %index4 = extractelement <8 x i64> %indices, i32 4 983 %index5 = extractelement <8 x i64> %indices, i32 5 984 %index6 = extractelement <8 x i64> %indices, i32 6 985 %index7 = extractelement <8 x i64> %indices, i32 7 986 %v0 = extractelement <8 x double> %v, i64 %index0 987 %v1 = extractelement <8 x double> %v, i64 %index1 988 %v2 = extractelement <8 x double> %v, i64 %index2 989 %v3 = extractelement <8 x double> %v, i64 %index3 990 %v4 = extractelement <8 x double> %v, i64 %index4 991 %v5 = extractelement <8 x double> %v, i64 %index5 992 %v6 = extractelement <8 x double> %v, i64 %index6 993 %v7 = extractelement <8 x double> %v, i64 %index7 994 %ret0 = insertelement <8 x double> undef, double %v0, i32 0 995 %ret1 = insertelement <8 x double> %ret0, double %v1, i32 1 996 %ret2 = insertelement <8 x double> %ret1, double %v2, i32 2 997 %ret3 = insertelement <8 x double> %ret2, double %v3, i32 3 998 %ret4 = insertelement <8 x double> %ret3, double %v4, i32 4 999 %ret5 = insertelement <8 x double> %ret4, double %v5, i32 5 1000 %ret6 = insertelement <8 x double> %ret5, double %v6, i32 6 1001 %ret7 = insertelement <8 x double> %ret6, double %v7, i32 7 1002 ret <8 x double> %ret7 1003} 1004 1005define <16 x float> @var_shuffle_v16f32(<16 x float> %v, <16 x i32> %indices) nounwind { 1006; AVX512-LABEL: var_shuffle_v16f32: 1007; AVX512: # %bb.0: 1008; AVX512-NEXT: vpermps %zmm0, %zmm1, %zmm0 1009; AVX512-NEXT: retq 1010 %index0 = extractelement <16 x i32> %indices, i32 0 1011 %index1 = extractelement <16 x i32> %indices, i32 1 1012 %index2 = extractelement <16 x i32> %indices, i32 2 1013 %index3 = extractelement <16 x i32> %indices, i32 3 1014 %index4 = extractelement <16 x i32> %indices, i32 4 1015 %index5 = extractelement <16 x i32> %indices, i32 5 1016 %index6 = extractelement <16 x i32> %indices, i32 6 1017 %index7 = extractelement <16 x i32> %indices, i32 7 1018 %index8 = extractelement <16 x i32> %indices, i32 8 1019 %index9 = extractelement <16 x i32> %indices, i32 9 1020 %index10 = extractelement <16 x i32> %indices, i32 10 1021 %index11 = extractelement <16 x i32> %indices, i32 11 1022 %index12 = extractelement <16 x i32> %indices, i32 12 1023 %index13 = extractelement <16 x i32> %indices, i32 13 1024 %index14 = extractelement <16 x i32> %indices, i32 14 1025 %index15 = extractelement <16 x i32> %indices, i32 15 1026 %v0 = extractelement <16 x float> %v, i32 %index0 1027 %v1 = extractelement <16 x float> %v, i32 %index1 1028 %v2 = extractelement <16 x float> %v, i32 %index2 1029 %v3 = extractelement <16 x float> %v, i32 %index3 1030 %v4 = extractelement <16 x float> %v, i32 %index4 1031 %v5 = extractelement <16 x float> %v, i32 %index5 1032 %v6 = extractelement <16 x float> %v, i32 %index6 1033 %v7 = extractelement <16 x float> %v, i32 %index7 1034 %v8 = extractelement <16 x float> %v, i32 %index8 1035 %v9 = extractelement <16 x float> %v, i32 %index9 1036 %v10 = extractelement <16 x float> %v, i32 %index10 1037 %v11 = extractelement <16 x float> %v, i32 %index11 1038 %v12 = extractelement <16 x float> %v, i32 %index12 1039 %v13 = extractelement <16 x float> %v, i32 %index13 1040 %v14 = extractelement <16 x float> %v, i32 %index14 1041 %v15 = extractelement <16 x float> %v, i32 %index15 1042 %ret0 = insertelement <16 x float> undef, float %v0, i32 0 1043 %ret1 = insertelement <16 x float> %ret0, float %v1, i32 1 1044 %ret2 = insertelement <16 x float> %ret1, float %v2, i32 2 1045 %ret3 = insertelement <16 x float> %ret2, float %v3, i32 3 1046 %ret4 = insertelement <16 x float> %ret3, float %v4, i32 4 1047 %ret5 = insertelement <16 x float> %ret4, float %v5, i32 5 1048 %ret6 = insertelement <16 x float> %ret5, float %v6, i32 6 1049 %ret7 = insertelement <16 x float> %ret6, float %v7, i32 7 1050 %ret8 = insertelement <16 x float> %ret7, float %v8, i32 8 1051 %ret9 = insertelement <16 x float> %ret8, float %v9, i32 9 1052 %ret10 = insertelement <16 x float> %ret9, float %v10, i32 10 1053 %ret11 = insertelement <16 x float> %ret10, float %v11, i32 11 1054 %ret12 = insertelement <16 x float> %ret11, float %v12, i32 12 1055 %ret13 = insertelement <16 x float> %ret12, float %v13, i32 13 1056 %ret14 = insertelement <16 x float> %ret13, float %v14, i32 14 1057 %ret15 = insertelement <16 x float> %ret14, float %v15, i32 15 1058 ret <16 x float> %ret15 1059} 1060 1061define void @var_cvt_shuffle_v64f32_v64i8_idx(<64 x float>* %dst, <64 x i8> %src, i32 %b) nounwind { 1062; AVX512F-LABEL: var_cvt_shuffle_v64f32_v64i8_idx: 1063; AVX512F: # %bb.0: 1064; AVX512F-NEXT: pushq %rbp 1065; AVX512F-NEXT: movq %rsp, %rbp 1066; AVX512F-NEXT: andq $-64, %rsp 1067; AVX512F-NEXT: subq $128, %rsp 1068; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi 1069; AVX512F-NEXT: vpbroadcastd %esi, %zmm2 1070; AVX512F-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm1 1071; AVX512F-NEXT: vmovd %xmm1, %eax 1072; AVX512F-NEXT: vmovaps %zmm0, (%rsp) 1073; AVX512F-NEXT: andl $63, %eax 1074; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 1075; AVX512F-NEXT: vmovd %eax, %xmm0 1076; AVX512F-NEXT: vpextrd $1, %xmm1, %eax 1077; AVX512F-NEXT: andl $63, %eax 1078; AVX512F-NEXT: vpinsrb $1, (%rsp,%rax), %xmm0, %xmm0 1079; AVX512F-NEXT: vpextrd $2, %xmm1, %eax 1080; AVX512F-NEXT: andl $63, %eax 1081; AVX512F-NEXT: vpinsrb $2, (%rsp,%rax), %xmm0, %xmm0 1082; AVX512F-NEXT: vpextrd $3, %xmm1, %eax 1083; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 1084; AVX512F-NEXT: andl $63, %eax 1085; AVX512F-NEXT: vpinsrb $3, (%rsp,%rax), %xmm0, %xmm0 1086; AVX512F-NEXT: vmovd %xmm3, %eax 1087; AVX512F-NEXT: andl $63, %eax 1088; AVX512F-NEXT: vpinsrb $4, (%rsp,%rax), %xmm0, %xmm0 1089; AVX512F-NEXT: vpextrd $1, %xmm3, %eax 1090; AVX512F-NEXT: andl $63, %eax 1091; AVX512F-NEXT: vpinsrb $5, (%rsp,%rax), %xmm0, %xmm0 1092; AVX512F-NEXT: vpextrd $2, %xmm3, %eax 1093; AVX512F-NEXT: andl $63, %eax 1094; AVX512F-NEXT: vpinsrb $6, (%rsp,%rax), %xmm0, %xmm0 1095; AVX512F-NEXT: vpextrd $3, %xmm3, %eax 1096; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm4 1097; AVX512F-NEXT: andl $63, %eax 1098; AVX512F-NEXT: vpinsrb $7, (%rsp,%rax), %xmm0, %xmm0 1099; AVX512F-NEXT: vmovd %xmm4, %eax 1100; AVX512F-NEXT: andl $63, %eax 1101; AVX512F-NEXT: vpinsrb $8, (%rsp,%rax), %xmm0, %xmm0 1102; AVX512F-NEXT: vpextrd $1, %xmm4, %eax 1103; AVX512F-NEXT: andl $63, %eax 1104; AVX512F-NEXT: vpinsrb $9, (%rsp,%rax), %xmm0, %xmm0 1105; AVX512F-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm5 1106; AVX512F-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm3 1107; AVX512F-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 1108; AVX512F-NEXT: andl $63, %esi 1109; AVX512F-NEXT: vpinsrb $10, (%rsp,%rsi), %xmm0, %xmm0 1110; AVX512F-NEXT: vpextrd $3, %xmm4, %eax 1111; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm1 1112; AVX512F-NEXT: andl $63, %eax 1113; AVX512F-NEXT: vpinsrb $11, (%rsp,%rax), %xmm0, %xmm0 1114; AVX512F-NEXT: vmovd %xmm1, %eax 1115; AVX512F-NEXT: andl $63, %eax 1116; AVX512F-NEXT: vpinsrb $12, (%rsp,%rax), %xmm0, %xmm0 1117; AVX512F-NEXT: vpextrd $1, %xmm1, %eax 1118; AVX512F-NEXT: andl $63, %eax 1119; AVX512F-NEXT: vpinsrb $13, (%rsp,%rax), %xmm0, %xmm0 1120; AVX512F-NEXT: vpextrd $2, %xmm1, %eax 1121; AVX512F-NEXT: andl $63, %eax 1122; AVX512F-NEXT: vpinsrb $14, (%rsp,%rax), %xmm0, %xmm0 1123; AVX512F-NEXT: vpextrd $3, %xmm1, %eax 1124; AVX512F-NEXT: andl $63, %eax 1125; AVX512F-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm8 1126; AVX512F-NEXT: vmovd %xmm5, %eax 1127; AVX512F-NEXT: andl $63, %eax 1128; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 1129; AVX512F-NEXT: vmovd %eax, %xmm1 1130; AVX512F-NEXT: vpextrd $1, %xmm5, %eax 1131; AVX512F-NEXT: andl $63, %eax 1132; AVX512F-NEXT: vpinsrb $1, (%rsp,%rax), %xmm1, %xmm1 1133; AVX512F-NEXT: vpextrd $2, %xmm5, %eax 1134; AVX512F-NEXT: andl $63, %eax 1135; AVX512F-NEXT: vpinsrb $2, (%rsp,%rax), %xmm1, %xmm1 1136; AVX512F-NEXT: vpextrd $3, %xmm5, %eax 1137; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm4 1138; AVX512F-NEXT: andl $63, %eax 1139; AVX512F-NEXT: vpinsrb $3, (%rsp,%rax), %xmm1, %xmm1 1140; AVX512F-NEXT: vmovd %xmm4, %eax 1141; AVX512F-NEXT: andl $63, %eax 1142; AVX512F-NEXT: vpinsrb $4, (%rsp,%rax), %xmm1, %xmm1 1143; AVX512F-NEXT: vpextrd $1, %xmm4, %eax 1144; AVX512F-NEXT: andl $63, %eax 1145; AVX512F-NEXT: vpinsrb $5, (%rsp,%rax), %xmm1, %xmm1 1146; AVX512F-NEXT: vpextrd $2, %xmm4, %eax 1147; AVX512F-NEXT: andl $63, %eax 1148; AVX512F-NEXT: vpinsrb $6, (%rsp,%rax), %xmm1, %xmm1 1149; AVX512F-NEXT: vpextrd $3, %xmm4, %eax 1150; AVX512F-NEXT: vextracti32x4 $2, %zmm5, %xmm4 1151; AVX512F-NEXT: andl $63, %eax 1152; AVX512F-NEXT: vpinsrb $7, (%rsp,%rax), %xmm1, %xmm1 1153; AVX512F-NEXT: vmovd %xmm4, %eax 1154; AVX512F-NEXT: andl $63, %eax 1155; AVX512F-NEXT: vpinsrb $8, (%rsp,%rax), %xmm1, %xmm1 1156; AVX512F-NEXT: vpextrd $1, %xmm4, %eax 1157; AVX512F-NEXT: andl $63, %eax 1158; AVX512F-NEXT: vpinsrb $9, (%rsp,%rax), %xmm1, %xmm1 1159; AVX512F-NEXT: vpextrd $2, %xmm4, %eax 1160; AVX512F-NEXT: andl $63, %eax 1161; AVX512F-NEXT: vpinsrb $10, (%rsp,%rax), %xmm1, %xmm6 1162; AVX512F-NEXT: vpextrd $3, %xmm4, %eax 1163; AVX512F-NEXT: vextracti32x4 $3, %zmm5, %xmm1 1164; AVX512F-NEXT: andl $63, %eax 1165; AVX512F-NEXT: vpinsrb $11, (%rsp,%rax), %xmm6, %xmm4 1166; AVX512F-NEXT: vmovd %xmm1, %eax 1167; AVX512F-NEXT: andl $63, %eax 1168; AVX512F-NEXT: vpinsrb $12, (%rsp,%rax), %xmm4, %xmm4 1169; AVX512F-NEXT: vmovd %xmm3, %eax 1170; AVX512F-NEXT: andl $63, %eax 1171; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 1172; AVX512F-NEXT: vmovd %eax, %xmm5 1173; AVX512F-NEXT: vpextrd $1, %xmm3, %eax 1174; AVX512F-NEXT: andl $63, %eax 1175; AVX512F-NEXT: vpinsrb $1, (%rsp,%rax), %xmm5, %xmm5 1176; AVX512F-NEXT: vpextrd $2, %xmm3, %eax 1177; AVX512F-NEXT: andl $63, %eax 1178; AVX512F-NEXT: vpinsrb $2, (%rsp,%rax), %xmm5, %xmm5 1179; AVX512F-NEXT: vpextrd $3, %xmm3, %eax 1180; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm6 1181; AVX512F-NEXT: andl $63, %eax 1182; AVX512F-NEXT: vpinsrb $3, (%rsp,%rax), %xmm5, %xmm5 1183; AVX512F-NEXT: vmovd %xmm6, %eax 1184; AVX512F-NEXT: andl $63, %eax 1185; AVX512F-NEXT: vpinsrb $4, (%rsp,%rax), %xmm5, %xmm5 1186; AVX512F-NEXT: vpextrd $1, %xmm6, %eax 1187; AVX512F-NEXT: andl $63, %eax 1188; AVX512F-NEXT: vpinsrb $5, (%rsp,%rax), %xmm5, %xmm5 1189; AVX512F-NEXT: vpextrd $2, %xmm6, %eax 1190; AVX512F-NEXT: andl $63, %eax 1191; AVX512F-NEXT: vpinsrb $6, (%rsp,%rax), %xmm5, %xmm7 1192; AVX512F-NEXT: vpextrd $3, %xmm6, %eax 1193; AVX512F-NEXT: vextracti32x4 $2, %zmm3, %xmm5 1194; AVX512F-NEXT: andl $63, %eax 1195; AVX512F-NEXT: vpinsrb $7, (%rsp,%rax), %xmm7, %xmm6 1196; AVX512F-NEXT: vmovd %xmm5, %eax 1197; AVX512F-NEXT: andl $63, %eax 1198; AVX512F-NEXT: vpinsrb $8, (%rsp,%rax), %xmm6, %xmm6 1199; AVX512F-NEXT: vpextrd $1, %xmm5, %eax 1200; AVX512F-NEXT: andl $63, %eax 1201; AVX512F-NEXT: vpinsrb $9, (%rsp,%rax), %xmm6, %xmm6 1202; AVX512F-NEXT: vpextrd $2, %xmm5, %eax 1203; AVX512F-NEXT: andl $63, %eax 1204; AVX512F-NEXT: vpinsrb $10, (%rsp,%rax), %xmm6, %xmm6 1205; AVX512F-NEXT: vmovd %xmm2, %eax 1206; AVX512F-NEXT: andl $63, %eax 1207; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 1208; AVX512F-NEXT: vmovd %eax, %xmm7 1209; AVX512F-NEXT: vpextrd $1, %xmm2, %eax 1210; AVX512F-NEXT: andl $63, %eax 1211; AVX512F-NEXT: vpinsrb $1, (%rsp,%rax), %xmm7, %xmm7 1212; AVX512F-NEXT: vpextrd $2, %xmm2, %eax 1213; AVX512F-NEXT: andl $63, %eax 1214; AVX512F-NEXT: vpinsrb $2, (%rsp,%rax), %xmm7, %xmm7 1215; AVX512F-NEXT: vpextrd $3, %xmm2, %eax 1216; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm0 1217; AVX512F-NEXT: andl $63, %eax 1218; AVX512F-NEXT: vpinsrb $3, (%rsp,%rax), %xmm7, %xmm7 1219; AVX512F-NEXT: vmovd %xmm0, %eax 1220; AVX512F-NEXT: andl $63, %eax 1221; AVX512F-NEXT: vpinsrb $4, (%rsp,%rax), %xmm7, %xmm7 1222; AVX512F-NEXT: vpextrd $1, %xmm0, %eax 1223; AVX512F-NEXT: andl $63, %eax 1224; AVX512F-NEXT: vpinsrb $5, (%rsp,%rax), %xmm7, %xmm7 1225; AVX512F-NEXT: vpextrd $2, %xmm0, %eax 1226; AVX512F-NEXT: andl $63, %eax 1227; AVX512F-NEXT: vpinsrb $6, (%rsp,%rax), %xmm7, %xmm7 1228; AVX512F-NEXT: vpextrd $3, %xmm0, %eax 1229; AVX512F-NEXT: vextracti32x4 $2, %zmm2, %xmm0 1230; AVX512F-NEXT: andl $63, %eax 1231; AVX512F-NEXT: vpinsrb $7, (%rsp,%rax), %xmm7, %xmm7 1232; AVX512F-NEXT: vmovd %xmm0, %eax 1233; AVX512F-NEXT: andl $63, %eax 1234; AVX512F-NEXT: vpinsrb $8, (%rsp,%rax), %xmm7, %xmm7 1235; AVX512F-NEXT: vpextrd $1, %xmm0, %eax 1236; AVX512F-NEXT: andl $63, %eax 1237; AVX512F-NEXT: vpinsrb $9, (%rsp,%rax), %xmm7, %xmm7 1238; AVX512F-NEXT: vpextrd $2, %xmm0, %eax 1239; AVX512F-NEXT: andl $63, %eax 1240; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 1241; AVX512F-NEXT: vpinsrb $10, %eax, %xmm7, %xmm7 1242; AVX512F-NEXT: vpextrd $3, %xmm0, %eax 1243; AVX512F-NEXT: vextracti32x4 $3, %zmm2, %xmm0 1244; AVX512F-NEXT: andl $63, %eax 1245; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 1246; AVX512F-NEXT: vpinsrb $11, %eax, %xmm7, %xmm2 1247; AVX512F-NEXT: vmovd %xmm0, %eax 1248; AVX512F-NEXT: andl $63, %eax 1249; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 1250; AVX512F-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 1251; AVX512F-NEXT: vpextrd $1, %xmm0, %eax 1252; AVX512F-NEXT: andl $63, %eax 1253; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 1254; AVX512F-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 1255; AVX512F-NEXT: vpextrd $2, %xmm0, %eax 1256; AVX512F-NEXT: andl $63, %eax 1257; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 1258; AVX512F-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 1259; AVX512F-NEXT: vpextrd $3, %xmm0, %eax 1260; AVX512F-NEXT: andl $63, %eax 1261; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 1262; AVX512F-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0 1263; AVX512F-NEXT: vpextrd $3, %xmm5, %eax 1264; AVX512F-NEXT: vextracti32x4 $3, %zmm3, %xmm2 1265; AVX512F-NEXT: andl $63, %eax 1266; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 1267; AVX512F-NEXT: vpinsrb $11, %eax, %xmm6, %xmm3 1268; AVX512F-NEXT: vmovd %xmm2, %eax 1269; AVX512F-NEXT: andl $63, %eax 1270; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 1271; AVX512F-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 1272; AVX512F-NEXT: vpextrd $1, %xmm2, %eax 1273; AVX512F-NEXT: andl $63, %eax 1274; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 1275; AVX512F-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 1276; AVX512F-NEXT: vpextrd $2, %xmm2, %eax 1277; AVX512F-NEXT: andl $63, %eax 1278; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 1279; AVX512F-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 1280; AVX512F-NEXT: vpextrd $3, %xmm2, %eax 1281; AVX512F-NEXT: andl $63, %eax 1282; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 1283; AVX512F-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 1284; AVX512F-NEXT: vpextrd $1, %xmm1, %eax 1285; AVX512F-NEXT: andl $63, %eax 1286; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 1287; AVX512F-NEXT: vpinsrb $13, %eax, %xmm4, %xmm3 1288; AVX512F-NEXT: vpextrd $2, %xmm1, %eax 1289; AVX512F-NEXT: andl $63, %eax 1290; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 1291; AVX512F-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 1292; AVX512F-NEXT: vpextrd $3, %xmm1, %eax 1293; AVX512F-NEXT: andl $63, %eax 1294; AVX512F-NEXT: movzbl (%rsp,%rax), %eax 1295; AVX512F-NEXT: vpinsrb $15, %eax, %xmm3, %xmm1 1296; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 1297; AVX512F-NEXT: vcvtdq2ps %zmm0, %zmm0 1298; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 1299; AVX512F-NEXT: vcvtdq2ps %zmm2, %zmm2 1300; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 1301; AVX512F-NEXT: vcvtdq2ps %zmm1, %zmm1 1302; AVX512F-NEXT: vpmovsxbd %xmm8, %zmm3 1303; AVX512F-NEXT: vcvtdq2ps %zmm3, %zmm3 1304; AVX512F-NEXT: vmovaps %zmm3, 192(%rdi) 1305; AVX512F-NEXT: vmovaps %zmm1, 128(%rdi) 1306; AVX512F-NEXT: vmovaps %zmm2, 64(%rdi) 1307; AVX512F-NEXT: vmovaps %zmm0, (%rdi) 1308; AVX512F-NEXT: movq %rbp, %rsp 1309; AVX512F-NEXT: popq %rbp 1310; AVX512F-NEXT: vzeroupper 1311; AVX512F-NEXT: retq 1312; 1313; AVX512BW-LABEL: var_cvt_shuffle_v64f32_v64i8_idx: 1314; AVX512BW: # %bb.0: 1315; AVX512BW-NEXT: pushq %rbp 1316; AVX512BW-NEXT: movq %rsp, %rbp 1317; AVX512BW-NEXT: andq $-64, %rsp 1318; AVX512BW-NEXT: subq $128, %rsp 1319; AVX512BW-NEXT: # kill: def $esi killed $esi def $rsi 1320; AVX512BW-NEXT: vpbroadcastd %esi, %zmm2 1321; AVX512BW-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm1 1322; AVX512BW-NEXT: vmovd %xmm1, %eax 1323; AVX512BW-NEXT: vmovaps %zmm0, (%rsp) 1324; AVX512BW-NEXT: andl $63, %eax 1325; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 1326; AVX512BW-NEXT: vmovd %eax, %xmm0 1327; AVX512BW-NEXT: vpextrd $1, %xmm1, %eax 1328; AVX512BW-NEXT: andl $63, %eax 1329; AVX512BW-NEXT: vpinsrb $1, (%rsp,%rax), %xmm0, %xmm0 1330; AVX512BW-NEXT: vpextrd $2, %xmm1, %eax 1331; AVX512BW-NEXT: andl $63, %eax 1332; AVX512BW-NEXT: vpinsrb $2, (%rsp,%rax), %xmm0, %xmm0 1333; AVX512BW-NEXT: vpextrd $3, %xmm1, %eax 1334; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm3 1335; AVX512BW-NEXT: andl $63, %eax 1336; AVX512BW-NEXT: vpinsrb $3, (%rsp,%rax), %xmm0, %xmm0 1337; AVX512BW-NEXT: vmovd %xmm3, %eax 1338; AVX512BW-NEXT: andl $63, %eax 1339; AVX512BW-NEXT: vpinsrb $4, (%rsp,%rax), %xmm0, %xmm0 1340; AVX512BW-NEXT: vpextrd $1, %xmm3, %eax 1341; AVX512BW-NEXT: andl $63, %eax 1342; AVX512BW-NEXT: vpinsrb $5, (%rsp,%rax), %xmm0, %xmm0 1343; AVX512BW-NEXT: vpextrd $2, %xmm3, %eax 1344; AVX512BW-NEXT: andl $63, %eax 1345; AVX512BW-NEXT: vpinsrb $6, (%rsp,%rax), %xmm0, %xmm0 1346; AVX512BW-NEXT: vpextrd $3, %xmm3, %eax 1347; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm4 1348; AVX512BW-NEXT: andl $63, %eax 1349; AVX512BW-NEXT: vpinsrb $7, (%rsp,%rax), %xmm0, %xmm0 1350; AVX512BW-NEXT: vmovd %xmm4, %eax 1351; AVX512BW-NEXT: andl $63, %eax 1352; AVX512BW-NEXT: vpinsrb $8, (%rsp,%rax), %xmm0, %xmm0 1353; AVX512BW-NEXT: vpextrd $1, %xmm4, %eax 1354; AVX512BW-NEXT: andl $63, %eax 1355; AVX512BW-NEXT: vpinsrb $9, (%rsp,%rax), %xmm0, %xmm0 1356; AVX512BW-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm5 1357; AVX512BW-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm3 1358; AVX512BW-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 1359; AVX512BW-NEXT: andl $63, %esi 1360; AVX512BW-NEXT: vpinsrb $10, (%rsp,%rsi), %xmm0, %xmm0 1361; AVX512BW-NEXT: vpextrd $3, %xmm4, %eax 1362; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm1 1363; AVX512BW-NEXT: andl $63, %eax 1364; AVX512BW-NEXT: vpinsrb $11, (%rsp,%rax), %xmm0, %xmm0 1365; AVX512BW-NEXT: vmovd %xmm1, %eax 1366; AVX512BW-NEXT: andl $63, %eax 1367; AVX512BW-NEXT: vpinsrb $12, (%rsp,%rax), %xmm0, %xmm0 1368; AVX512BW-NEXT: vpextrd $1, %xmm1, %eax 1369; AVX512BW-NEXT: andl $63, %eax 1370; AVX512BW-NEXT: vpinsrb $13, (%rsp,%rax), %xmm0, %xmm0 1371; AVX512BW-NEXT: vpextrd $2, %xmm1, %eax 1372; AVX512BW-NEXT: andl $63, %eax 1373; AVX512BW-NEXT: vpinsrb $14, (%rsp,%rax), %xmm0, %xmm0 1374; AVX512BW-NEXT: vpextrd $3, %xmm1, %eax 1375; AVX512BW-NEXT: andl $63, %eax 1376; AVX512BW-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm8 1377; AVX512BW-NEXT: vmovd %xmm5, %eax 1378; AVX512BW-NEXT: andl $63, %eax 1379; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 1380; AVX512BW-NEXT: vmovd %eax, %xmm1 1381; AVX512BW-NEXT: vpextrd $1, %xmm5, %eax 1382; AVX512BW-NEXT: andl $63, %eax 1383; AVX512BW-NEXT: vpinsrb $1, (%rsp,%rax), %xmm1, %xmm1 1384; AVX512BW-NEXT: vpextrd $2, %xmm5, %eax 1385; AVX512BW-NEXT: andl $63, %eax 1386; AVX512BW-NEXT: vpinsrb $2, (%rsp,%rax), %xmm1, %xmm1 1387; AVX512BW-NEXT: vpextrd $3, %xmm5, %eax 1388; AVX512BW-NEXT: vextracti128 $1, %ymm5, %xmm4 1389; AVX512BW-NEXT: andl $63, %eax 1390; AVX512BW-NEXT: vpinsrb $3, (%rsp,%rax), %xmm1, %xmm1 1391; AVX512BW-NEXT: vmovd %xmm4, %eax 1392; AVX512BW-NEXT: andl $63, %eax 1393; AVX512BW-NEXT: vpinsrb $4, (%rsp,%rax), %xmm1, %xmm1 1394; AVX512BW-NEXT: vpextrd $1, %xmm4, %eax 1395; AVX512BW-NEXT: andl $63, %eax 1396; AVX512BW-NEXT: vpinsrb $5, (%rsp,%rax), %xmm1, %xmm1 1397; AVX512BW-NEXT: vpextrd $2, %xmm4, %eax 1398; AVX512BW-NEXT: andl $63, %eax 1399; AVX512BW-NEXT: vpinsrb $6, (%rsp,%rax), %xmm1, %xmm1 1400; AVX512BW-NEXT: vpextrd $3, %xmm4, %eax 1401; AVX512BW-NEXT: vextracti32x4 $2, %zmm5, %xmm4 1402; AVX512BW-NEXT: andl $63, %eax 1403; AVX512BW-NEXT: vpinsrb $7, (%rsp,%rax), %xmm1, %xmm1 1404; AVX512BW-NEXT: vmovd %xmm4, %eax 1405; AVX512BW-NEXT: andl $63, %eax 1406; AVX512BW-NEXT: vpinsrb $8, (%rsp,%rax), %xmm1, %xmm1 1407; AVX512BW-NEXT: vpextrd $1, %xmm4, %eax 1408; AVX512BW-NEXT: andl $63, %eax 1409; AVX512BW-NEXT: vpinsrb $9, (%rsp,%rax), %xmm1, %xmm1 1410; AVX512BW-NEXT: vpextrd $2, %xmm4, %eax 1411; AVX512BW-NEXT: andl $63, %eax 1412; AVX512BW-NEXT: vpinsrb $10, (%rsp,%rax), %xmm1, %xmm6 1413; AVX512BW-NEXT: vpextrd $3, %xmm4, %eax 1414; AVX512BW-NEXT: vextracti32x4 $3, %zmm5, %xmm1 1415; AVX512BW-NEXT: andl $63, %eax 1416; AVX512BW-NEXT: vpinsrb $11, (%rsp,%rax), %xmm6, %xmm4 1417; AVX512BW-NEXT: vmovd %xmm1, %eax 1418; AVX512BW-NEXT: andl $63, %eax 1419; AVX512BW-NEXT: vpinsrb $12, (%rsp,%rax), %xmm4, %xmm4 1420; AVX512BW-NEXT: vmovd %xmm3, %eax 1421; AVX512BW-NEXT: andl $63, %eax 1422; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 1423; AVX512BW-NEXT: vmovd %eax, %xmm5 1424; AVX512BW-NEXT: vpextrd $1, %xmm3, %eax 1425; AVX512BW-NEXT: andl $63, %eax 1426; AVX512BW-NEXT: vpinsrb $1, (%rsp,%rax), %xmm5, %xmm5 1427; AVX512BW-NEXT: vpextrd $2, %xmm3, %eax 1428; AVX512BW-NEXT: andl $63, %eax 1429; AVX512BW-NEXT: vpinsrb $2, (%rsp,%rax), %xmm5, %xmm5 1430; AVX512BW-NEXT: vpextrd $3, %xmm3, %eax 1431; AVX512BW-NEXT: vextracti128 $1, %ymm3, %xmm6 1432; AVX512BW-NEXT: andl $63, %eax 1433; AVX512BW-NEXT: vpinsrb $3, (%rsp,%rax), %xmm5, %xmm5 1434; AVX512BW-NEXT: vmovd %xmm6, %eax 1435; AVX512BW-NEXT: andl $63, %eax 1436; AVX512BW-NEXT: vpinsrb $4, (%rsp,%rax), %xmm5, %xmm5 1437; AVX512BW-NEXT: vpextrd $1, %xmm6, %eax 1438; AVX512BW-NEXT: andl $63, %eax 1439; AVX512BW-NEXT: vpinsrb $5, (%rsp,%rax), %xmm5, %xmm5 1440; AVX512BW-NEXT: vpextrd $2, %xmm6, %eax 1441; AVX512BW-NEXT: andl $63, %eax 1442; AVX512BW-NEXT: vpinsrb $6, (%rsp,%rax), %xmm5, %xmm7 1443; AVX512BW-NEXT: vpextrd $3, %xmm6, %eax 1444; AVX512BW-NEXT: vextracti32x4 $2, %zmm3, %xmm5 1445; AVX512BW-NEXT: andl $63, %eax 1446; AVX512BW-NEXT: vpinsrb $7, (%rsp,%rax), %xmm7, %xmm6 1447; AVX512BW-NEXT: vmovd %xmm5, %eax 1448; AVX512BW-NEXT: andl $63, %eax 1449; AVX512BW-NEXT: vpinsrb $8, (%rsp,%rax), %xmm6, %xmm6 1450; AVX512BW-NEXT: vpextrd $1, %xmm5, %eax 1451; AVX512BW-NEXT: andl $63, %eax 1452; AVX512BW-NEXT: vpinsrb $9, (%rsp,%rax), %xmm6, %xmm6 1453; AVX512BW-NEXT: vpextrd $2, %xmm5, %eax 1454; AVX512BW-NEXT: andl $63, %eax 1455; AVX512BW-NEXT: vpinsrb $10, (%rsp,%rax), %xmm6, %xmm6 1456; AVX512BW-NEXT: vmovd %xmm2, %eax 1457; AVX512BW-NEXT: andl $63, %eax 1458; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 1459; AVX512BW-NEXT: vmovd %eax, %xmm7 1460; AVX512BW-NEXT: vpextrd $1, %xmm2, %eax 1461; AVX512BW-NEXT: andl $63, %eax 1462; AVX512BW-NEXT: vpinsrb $1, (%rsp,%rax), %xmm7, %xmm7 1463; AVX512BW-NEXT: vpextrd $2, %xmm2, %eax 1464; AVX512BW-NEXT: andl $63, %eax 1465; AVX512BW-NEXT: vpinsrb $2, (%rsp,%rax), %xmm7, %xmm7 1466; AVX512BW-NEXT: vpextrd $3, %xmm2, %eax 1467; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm0 1468; AVX512BW-NEXT: andl $63, %eax 1469; AVX512BW-NEXT: vpinsrb $3, (%rsp,%rax), %xmm7, %xmm7 1470; AVX512BW-NEXT: vmovd %xmm0, %eax 1471; AVX512BW-NEXT: andl $63, %eax 1472; AVX512BW-NEXT: vpinsrb $4, (%rsp,%rax), %xmm7, %xmm7 1473; AVX512BW-NEXT: vpextrd $1, %xmm0, %eax 1474; AVX512BW-NEXT: andl $63, %eax 1475; AVX512BW-NEXT: vpinsrb $5, (%rsp,%rax), %xmm7, %xmm7 1476; AVX512BW-NEXT: vpextrd $2, %xmm0, %eax 1477; AVX512BW-NEXT: andl $63, %eax 1478; AVX512BW-NEXT: vpinsrb $6, (%rsp,%rax), %xmm7, %xmm7 1479; AVX512BW-NEXT: vpextrd $3, %xmm0, %eax 1480; AVX512BW-NEXT: vextracti32x4 $2, %zmm2, %xmm0 1481; AVX512BW-NEXT: andl $63, %eax 1482; AVX512BW-NEXT: vpinsrb $7, (%rsp,%rax), %xmm7, %xmm7 1483; AVX512BW-NEXT: vmovd %xmm0, %eax 1484; AVX512BW-NEXT: andl $63, %eax 1485; AVX512BW-NEXT: vpinsrb $8, (%rsp,%rax), %xmm7, %xmm7 1486; AVX512BW-NEXT: vpextrd $1, %xmm0, %eax 1487; AVX512BW-NEXT: andl $63, %eax 1488; AVX512BW-NEXT: vpinsrb $9, (%rsp,%rax), %xmm7, %xmm7 1489; AVX512BW-NEXT: vpextrd $2, %xmm0, %eax 1490; AVX512BW-NEXT: andl $63, %eax 1491; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 1492; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm7, %xmm7 1493; AVX512BW-NEXT: vpextrd $3, %xmm0, %eax 1494; AVX512BW-NEXT: vextracti32x4 $3, %zmm2, %xmm0 1495; AVX512BW-NEXT: andl $63, %eax 1496; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 1497; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm7, %xmm2 1498; AVX512BW-NEXT: vmovd %xmm0, %eax 1499; AVX512BW-NEXT: andl $63, %eax 1500; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 1501; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 1502; AVX512BW-NEXT: vpextrd $1, %xmm0, %eax 1503; AVX512BW-NEXT: andl $63, %eax 1504; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 1505; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 1506; AVX512BW-NEXT: vpextrd $2, %xmm0, %eax 1507; AVX512BW-NEXT: andl $63, %eax 1508; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 1509; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 1510; AVX512BW-NEXT: vpextrd $3, %xmm0, %eax 1511; AVX512BW-NEXT: andl $63, %eax 1512; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 1513; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0 1514; AVX512BW-NEXT: vpextrd $3, %xmm5, %eax 1515; AVX512BW-NEXT: vextracti32x4 $3, %zmm3, %xmm2 1516; AVX512BW-NEXT: andl $63, %eax 1517; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 1518; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm6, %xmm3 1519; AVX512BW-NEXT: vmovd %xmm2, %eax 1520; AVX512BW-NEXT: andl $63, %eax 1521; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 1522; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 1523; AVX512BW-NEXT: vpextrd $1, %xmm2, %eax 1524; AVX512BW-NEXT: andl $63, %eax 1525; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 1526; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 1527; AVX512BW-NEXT: vpextrd $2, %xmm2, %eax 1528; AVX512BW-NEXT: andl $63, %eax 1529; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 1530; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 1531; AVX512BW-NEXT: vpextrd $3, %xmm2, %eax 1532; AVX512BW-NEXT: andl $63, %eax 1533; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 1534; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 1535; AVX512BW-NEXT: vpextrd $1, %xmm1, %eax 1536; AVX512BW-NEXT: andl $63, %eax 1537; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 1538; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm3 1539; AVX512BW-NEXT: vpextrd $2, %xmm1, %eax 1540; AVX512BW-NEXT: andl $63, %eax 1541; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 1542; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 1543; AVX512BW-NEXT: vpextrd $3, %xmm1, %eax 1544; AVX512BW-NEXT: andl $63, %eax 1545; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax 1546; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm1 1547; AVX512BW-NEXT: vpmovsxbd %xmm0, %zmm0 1548; AVX512BW-NEXT: vcvtdq2ps %zmm0, %zmm0 1549; AVX512BW-NEXT: vpmovsxbd %xmm2, %zmm2 1550; AVX512BW-NEXT: vcvtdq2ps %zmm2, %zmm2 1551; AVX512BW-NEXT: vpmovsxbd %xmm1, %zmm1 1552; AVX512BW-NEXT: vcvtdq2ps %zmm1, %zmm1 1553; AVX512BW-NEXT: vpmovsxbd %xmm8, %zmm3 1554; AVX512BW-NEXT: vcvtdq2ps %zmm3, %zmm3 1555; AVX512BW-NEXT: vmovaps %zmm3, 192(%rdi) 1556; AVX512BW-NEXT: vmovaps %zmm1, 128(%rdi) 1557; AVX512BW-NEXT: vmovaps %zmm2, 64(%rdi) 1558; AVX512BW-NEXT: vmovaps %zmm0, (%rdi) 1559; AVX512BW-NEXT: movq %rbp, %rsp 1560; AVX512BW-NEXT: popq %rbp 1561; AVX512BW-NEXT: vzeroupper 1562; AVX512BW-NEXT: retq 1563; 1564; AVX512VBMI-LABEL: var_cvt_shuffle_v64f32_v64i8_idx: 1565; AVX512VBMI: # %bb.0: 1566; AVX512VBMI-NEXT: pushq %rbp 1567; AVX512VBMI-NEXT: movq %rsp, %rbp 1568; AVX512VBMI-NEXT: andq $-64, %rsp 1569; AVX512VBMI-NEXT: subq $128, %rsp 1570; AVX512VBMI-NEXT: # kill: def $esi killed $esi def $rsi 1571; AVX512VBMI-NEXT: vpbroadcastd %esi, %zmm1 1572; AVX512VBMI-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm2 1573; AVX512VBMI-NEXT: vmovd %xmm2, %eax 1574; AVX512VBMI-NEXT: vmovdqa64 %zmm0, (%rsp) 1575; AVX512VBMI-NEXT: andl $63, %eax 1576; AVX512VBMI-NEXT: movzbl (%rsp,%rax), %eax 1577; AVX512VBMI-NEXT: vmovd %eax, %xmm3 1578; AVX512VBMI-NEXT: vpextrd $1, %xmm2, %eax 1579; AVX512VBMI-NEXT: andl $63, %eax 1580; AVX512VBMI-NEXT: vpinsrb $1, (%rsp,%rax), %xmm3, %xmm3 1581; AVX512VBMI-NEXT: vpextrd $2, %xmm2, %eax 1582; AVX512VBMI-NEXT: andl $63, %eax 1583; AVX512VBMI-NEXT: vpinsrb $2, (%rsp,%rax), %xmm3, %xmm3 1584; AVX512VBMI-NEXT: vpextrd $3, %xmm2, %eax 1585; AVX512VBMI-NEXT: vextracti128 $1, %ymm2, %xmm4 1586; AVX512VBMI-NEXT: andl $63, %eax 1587; AVX512VBMI-NEXT: vpinsrb $3, (%rsp,%rax), %xmm3, %xmm3 1588; AVX512VBMI-NEXT: vmovd %xmm4, %eax 1589; AVX512VBMI-NEXT: andl $63, %eax 1590; AVX512VBMI-NEXT: vpinsrb $4, (%rsp,%rax), %xmm3, %xmm3 1591; AVX512VBMI-NEXT: vpextrd $1, %xmm4, %eax 1592; AVX512VBMI-NEXT: andl $63, %eax 1593; AVX512VBMI-NEXT: vpinsrb $5, (%rsp,%rax), %xmm3, %xmm3 1594; AVX512VBMI-NEXT: vpextrd $2, %xmm4, %eax 1595; AVX512VBMI-NEXT: andl $63, %eax 1596; AVX512VBMI-NEXT: vpinsrb $6, (%rsp,%rax), %xmm3, %xmm3 1597; AVX512VBMI-NEXT: vpextrd $3, %xmm4, %eax 1598; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm2, %xmm4 1599; AVX512VBMI-NEXT: andl $63, %eax 1600; AVX512VBMI-NEXT: vpinsrb $7, (%rsp,%rax), %xmm3, %xmm3 1601; AVX512VBMI-NEXT: vmovd %xmm4, %eax 1602; AVX512VBMI-NEXT: andl $63, %eax 1603; AVX512VBMI-NEXT: vpinsrb $8, (%rsp,%rax), %xmm3, %xmm3 1604; AVX512VBMI-NEXT: vpextrd $1, %xmm4, %eax 1605; AVX512VBMI-NEXT: andl $63, %eax 1606; AVX512VBMI-NEXT: vpinsrb $9, (%rsp,%rax), %xmm3, %xmm3 1607; AVX512VBMI-NEXT: andl $63, %esi 1608; AVX512VBMI-NEXT: vpinsrb $10, (%rsp,%rsi), %xmm3, %xmm3 1609; AVX512VBMI-NEXT: vpextrd $3, %xmm4, %eax 1610; AVX512VBMI-NEXT: vextracti32x4 $3, %zmm2, %xmm2 1611; AVX512VBMI-NEXT: andl $63, %eax 1612; AVX512VBMI-NEXT: vpinsrb $11, (%rsp,%rax), %xmm3, %xmm3 1613; AVX512VBMI-NEXT: vmovd %xmm2, %eax 1614; AVX512VBMI-NEXT: andl $63, %eax 1615; AVX512VBMI-NEXT: vpinsrb $12, (%rsp,%rax), %xmm3, %xmm3 1616; AVX512VBMI-NEXT: vpextrd $1, %xmm2, %eax 1617; AVX512VBMI-NEXT: andl $63, %eax 1618; AVX512VBMI-NEXT: vpinsrb $13, (%rsp,%rax), %xmm3, %xmm3 1619; AVX512VBMI-NEXT: vpextrd $2, %xmm2, %eax 1620; AVX512VBMI-NEXT: andl $63, %eax 1621; AVX512VBMI-NEXT: vpinsrb $14, (%rsp,%rax), %xmm3, %xmm3 1622; AVX512VBMI-NEXT: vpextrd $3, %xmm2, %eax 1623; AVX512VBMI-NEXT: andl $63, %eax 1624; AVX512VBMI-NEXT: vpinsrb $15, (%rsp,%rax), %xmm3, %xmm2 1625; AVX512VBMI-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm3 1626; AVX512VBMI-NEXT: vpmovdb %zmm3, %xmm3 1627; AVX512VBMI-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm4 1628; AVX512VBMI-NEXT: vpmovdb %zmm4, %xmm4 1629; AVX512VBMI-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 1630; AVX512VBMI-NEXT: vpmovdb %zmm1, %xmm1 1631; AVX512VBMI-NEXT: vpmovsxbd %xmm2, %zmm2 1632; AVX512VBMI-NEXT: vcvtdq2ps %zmm2, %zmm2 1633; AVX512VBMI-NEXT: vpermb %zmm0, %zmm1, %zmm1 1634; AVX512VBMI-NEXT: vpmovsxbd %xmm1, %zmm1 1635; AVX512VBMI-NEXT: vcvtdq2ps %zmm1, %zmm1 1636; AVX512VBMI-NEXT: vpermb %zmm0, %zmm4, %zmm4 1637; AVX512VBMI-NEXT: vpmovsxbd %xmm4, %zmm4 1638; AVX512VBMI-NEXT: vcvtdq2ps %zmm4, %zmm4 1639; AVX512VBMI-NEXT: vpermb %zmm0, %zmm3, %zmm0 1640; AVX512VBMI-NEXT: vpmovsxbd %xmm0, %zmm0 1641; AVX512VBMI-NEXT: vcvtdq2ps %zmm0, %zmm0 1642; AVX512VBMI-NEXT: vmovaps %zmm0, 128(%rdi) 1643; AVX512VBMI-NEXT: vmovaps %zmm4, 64(%rdi) 1644; AVX512VBMI-NEXT: vmovaps %zmm1, (%rdi) 1645; AVX512VBMI-NEXT: vmovaps %zmm2, 192(%rdi) 1646; AVX512VBMI-NEXT: movq %rbp, %rsp 1647; AVX512VBMI-NEXT: popq %rbp 1648; AVX512VBMI-NEXT: vzeroupper 1649; AVX512VBMI-NEXT: retq 1650 %b_broadcast_init = insertelement <64 x i32> undef, i32 %b, i32 0 1651 %b_broadcast = shufflevector <64 x i32> %b_broadcast_init, <64 x i32> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0> 1652 %sub_add__b_broadcast_ = add <64 x i32> %b_broadcast, <i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 undef, i32 -1, i32 -2, i32 -3, i32 -4, i32 -5> 1653 %index_0.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 0 1654 %index_1.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 1 1655 %index_2.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 2 1656 %index_3.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 3 1657 %index_4.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 4 1658 %index_5.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 5 1659 %index_6.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 6 1660 %index_7.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 7 1661 %index_8.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 8 1662 %index_9.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 9 1663 %index_10.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 10 1664 %index_11.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 11 1665 %index_12.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 12 1666 %index_13.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 13 1667 %index_14.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 14 1668 %index_15.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 15 1669 %index_16.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 16 1670 %index_17.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 17 1671 %index_18.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 18 1672 %index_19.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 19 1673 %index_20.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 20 1674 %index_21.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 21 1675 %index_22.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 22 1676 %index_23.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 23 1677 %index_24.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 24 1678 %index_25.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 25 1679 %index_26.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 26 1680 %index_27.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 27 1681 %index_28.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 28 1682 %index_29.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 29 1683 %index_30.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 30 1684 %index_31.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 31 1685 %index_32.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 32 1686 %index_33.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 33 1687 %index_34.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 34 1688 %index_35.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 35 1689 %index_36.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 36 1690 %index_37.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 37 1691 %index_38.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 38 1692 %index_39.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 39 1693 %index_40.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 40 1694 %index_41.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 41 1695 %index_42.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 42 1696 %index_43.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 43 1697 %index_44.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 44 1698 %index_45.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 45 1699 %index_46.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 46 1700 %index_47.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 47 1701 %index_48.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 48 1702 %index_49.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 49 1703 %index_50.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 50 1704 %index_51.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 51 1705 %index_52.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 52 1706 %index_53.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 53 1707 %index_54.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 54 1708 %index_55.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 55 1709 %index_56.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 56 1710 %index_57.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 57 1711 %index_59.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 59 1712 %index_60.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 60 1713 %index_61.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 61 1714 %index_62.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 62 1715 %index_63.i.i = extractelement <64 x i32> %sub_add__b_broadcast_, i32 63 1716 %v_0.i.i = extractelement <64 x i8> %src, i32 %index_0.i.i 1717 %v_1.i.i = extractelement <64 x i8> %src, i32 %index_1.i.i 1718 %v_2.i.i = extractelement <64 x i8> %src, i32 %index_2.i.i 1719 %v_3.i.i = extractelement <64 x i8> %src, i32 %index_3.i.i 1720 %v_4.i.i = extractelement <64 x i8> %src, i32 %index_4.i.i 1721 %v_5.i.i = extractelement <64 x i8> %src, i32 %index_5.i.i 1722 %v_6.i.i = extractelement <64 x i8> %src, i32 %index_6.i.i 1723 %v_7.i.i = extractelement <64 x i8> %src, i32 %index_7.i.i 1724 %v_8.i.i = extractelement <64 x i8> %src, i32 %index_8.i.i 1725 %v_9.i.i = extractelement <64 x i8> %src, i32 %index_9.i.i 1726 %v_10.i.i = extractelement <64 x i8> %src, i32 %index_10.i.i 1727 %v_11.i.i = extractelement <64 x i8> %src, i32 %index_11.i.i 1728 %v_12.i.i = extractelement <64 x i8> %src, i32 %index_12.i.i 1729 %v_13.i.i = extractelement <64 x i8> %src, i32 %index_13.i.i 1730 %v_14.i.i = extractelement <64 x i8> %src, i32 %index_14.i.i 1731 %v_15.i.i = extractelement <64 x i8> %src, i32 %index_15.i.i 1732 %v_16.i.i = extractelement <64 x i8> %src, i32 %index_16.i.i 1733 %v_17.i.i = extractelement <64 x i8> %src, i32 %index_17.i.i 1734 %v_18.i.i = extractelement <64 x i8> %src, i32 %index_18.i.i 1735 %v_19.i.i = extractelement <64 x i8> %src, i32 %index_19.i.i 1736 %v_20.i.i = extractelement <64 x i8> %src, i32 %index_20.i.i 1737 %v_21.i.i = extractelement <64 x i8> %src, i32 %index_21.i.i 1738 %v_22.i.i = extractelement <64 x i8> %src, i32 %index_22.i.i 1739 %v_23.i.i = extractelement <64 x i8> %src, i32 %index_23.i.i 1740 %v_24.i.i = extractelement <64 x i8> %src, i32 %index_24.i.i 1741 %v_25.i.i = extractelement <64 x i8> %src, i32 %index_25.i.i 1742 %v_26.i.i = extractelement <64 x i8> %src, i32 %index_26.i.i 1743 %v_27.i.i = extractelement <64 x i8> %src, i32 %index_27.i.i 1744 %v_28.i.i = extractelement <64 x i8> %src, i32 %index_28.i.i 1745 %v_29.i.i = extractelement <64 x i8> %src, i32 %index_29.i.i 1746 %v_30.i.i = extractelement <64 x i8> %src, i32 %index_30.i.i 1747 %v_31.i.i = extractelement <64 x i8> %src, i32 %index_31.i.i 1748 %v_32.i.i = extractelement <64 x i8> %src, i32 %index_32.i.i 1749 %v_33.i.i = extractelement <64 x i8> %src, i32 %index_33.i.i 1750 %v_34.i.i = extractelement <64 x i8> %src, i32 %index_34.i.i 1751 %v_35.i.i = extractelement <64 x i8> %src, i32 %index_35.i.i 1752 %v_36.i.i = extractelement <64 x i8> %src, i32 %index_36.i.i 1753 %v_37.i.i = extractelement <64 x i8> %src, i32 %index_37.i.i 1754 %v_38.i.i = extractelement <64 x i8> %src, i32 %index_38.i.i 1755 %v_39.i.i = extractelement <64 x i8> %src, i32 %index_39.i.i 1756 %v_40.i.i = extractelement <64 x i8> %src, i32 %index_40.i.i 1757 %v_41.i.i = extractelement <64 x i8> %src, i32 %index_41.i.i 1758 %v_42.i.i = extractelement <64 x i8> %src, i32 %index_42.i.i 1759 %v_43.i.i = extractelement <64 x i8> %src, i32 %index_43.i.i 1760 %v_44.i.i = extractelement <64 x i8> %src, i32 %index_44.i.i 1761 %v_45.i.i = extractelement <64 x i8> %src, i32 %index_45.i.i 1762 %v_46.i.i = extractelement <64 x i8> %src, i32 %index_46.i.i 1763 %v_47.i.i = extractelement <64 x i8> %src, i32 %index_47.i.i 1764 %v_48.i.i = extractelement <64 x i8> %src, i32 %index_48.i.i 1765 %v_49.i.i = extractelement <64 x i8> %src, i32 %index_49.i.i 1766 %v_50.i.i = extractelement <64 x i8> %src, i32 %index_50.i.i 1767 %v_51.i.i = extractelement <64 x i8> %src, i32 %index_51.i.i 1768 %v_52.i.i = extractelement <64 x i8> %src, i32 %index_52.i.i 1769 %v_53.i.i = extractelement <64 x i8> %src, i32 %index_53.i.i 1770 %v_54.i.i = extractelement <64 x i8> %src, i32 %index_54.i.i 1771 %v_55.i.i = extractelement <64 x i8> %src, i32 %index_55.i.i 1772 %v_56.i.i = extractelement <64 x i8> %src, i32 %index_56.i.i 1773 %v_57.i.i = extractelement <64 x i8> %src, i32 %index_57.i.i 1774 %v_58.i.i = extractelement <64 x i8> %src, i32 %b 1775 %v_59.i.i = extractelement <64 x i8> %src, i32 %index_59.i.i 1776 %v_60.i.i = extractelement <64 x i8> %src, i32 %index_60.i.i 1777 %v_61.i.i = extractelement <64 x i8> %src, i32 %index_61.i.i 1778 %v_62.i.i = extractelement <64 x i8> %src, i32 %index_62.i.i 1779 %v_63.i.i = extractelement <64 x i8> %src, i32 %index_63.i.i 1780 %dst_0.i.i = insertelement <64 x i8> undef, i8 %v_0.i.i, i32 0 1781 %dst_1.i.i = insertelement <64 x i8> %dst_0.i.i, i8 %v_1.i.i, i32 1 1782 %dst_2.i.i = insertelement <64 x i8> %dst_1.i.i, i8 %v_2.i.i, i32 2 1783 %dst_3.i.i = insertelement <64 x i8> %dst_2.i.i, i8 %v_3.i.i, i32 3 1784 %dst_4.i.i = insertelement <64 x i8> %dst_3.i.i, i8 %v_4.i.i, i32 4 1785 %dst_5.i.i = insertelement <64 x i8> %dst_4.i.i, i8 %v_5.i.i, i32 5 1786 %dst_6.i.i = insertelement <64 x i8> %dst_5.i.i, i8 %v_6.i.i, i32 6 1787 %dst_7.i.i = insertelement <64 x i8> %dst_6.i.i, i8 %v_7.i.i, i32 7 1788 %dst_8.i.i = insertelement <64 x i8> %dst_7.i.i, i8 %v_8.i.i, i32 8 1789 %dst_9.i.i = insertelement <64 x i8> %dst_8.i.i, i8 %v_9.i.i, i32 9 1790 %dst_10.i.i = insertelement <64 x i8> %dst_9.i.i, i8 %v_10.i.i, i32 10 1791 %dst_11.i.i = insertelement <64 x i8> %dst_10.i.i, i8 %v_11.i.i, i32 11 1792 %dst_12.i.i = insertelement <64 x i8> %dst_11.i.i, i8 %v_12.i.i, i32 12 1793 %dst_13.i.i = insertelement <64 x i8> %dst_12.i.i, i8 %v_13.i.i, i32 13 1794 %dst_14.i.i = insertelement <64 x i8> %dst_13.i.i, i8 %v_14.i.i, i32 14 1795 %dst_15.i.i = insertelement <64 x i8> %dst_14.i.i, i8 %v_15.i.i, i32 15 1796 %dst_16.i.i = insertelement <64 x i8> %dst_15.i.i, i8 %v_16.i.i, i32 16 1797 %dst_17.i.i = insertelement <64 x i8> %dst_16.i.i, i8 %v_17.i.i, i32 17 1798 %dst_18.i.i = insertelement <64 x i8> %dst_17.i.i, i8 %v_18.i.i, i32 18 1799 %dst_19.i.i = insertelement <64 x i8> %dst_18.i.i, i8 %v_19.i.i, i32 19 1800 %dst_20.i.i = insertelement <64 x i8> %dst_19.i.i, i8 %v_20.i.i, i32 20 1801 %dst_21.i.i = insertelement <64 x i8> %dst_20.i.i, i8 %v_21.i.i, i32 21 1802 %dst_22.i.i = insertelement <64 x i8> %dst_21.i.i, i8 %v_22.i.i, i32 22 1803 %dst_23.i.i = insertelement <64 x i8> %dst_22.i.i, i8 %v_23.i.i, i32 23 1804 %dst_24.i.i = insertelement <64 x i8> %dst_23.i.i, i8 %v_24.i.i, i32 24 1805 %dst_25.i.i = insertelement <64 x i8> %dst_24.i.i, i8 %v_25.i.i, i32 25 1806 %dst_26.i.i = insertelement <64 x i8> %dst_25.i.i, i8 %v_26.i.i, i32 26 1807 %dst_27.i.i = insertelement <64 x i8> %dst_26.i.i, i8 %v_27.i.i, i32 27 1808 %dst_28.i.i = insertelement <64 x i8> %dst_27.i.i, i8 %v_28.i.i, i32 28 1809 %dst_29.i.i = insertelement <64 x i8> %dst_28.i.i, i8 %v_29.i.i, i32 29 1810 %dst_30.i.i = insertelement <64 x i8> %dst_29.i.i, i8 %v_30.i.i, i32 30 1811 %dst_31.i.i = insertelement <64 x i8> %dst_30.i.i, i8 %v_31.i.i, i32 31 1812 %dst_32.i.i = insertelement <64 x i8> %dst_31.i.i, i8 %v_32.i.i, i32 32 1813 %dst_33.i.i = insertelement <64 x i8> %dst_32.i.i, i8 %v_33.i.i, i32 33 1814 %dst_34.i.i = insertelement <64 x i8> %dst_33.i.i, i8 %v_34.i.i, i32 34 1815 %dst_35.i.i = insertelement <64 x i8> %dst_34.i.i, i8 %v_35.i.i, i32 35 1816 %dst_36.i.i = insertelement <64 x i8> %dst_35.i.i, i8 %v_36.i.i, i32 36 1817 %dst_37.i.i = insertelement <64 x i8> %dst_36.i.i, i8 %v_37.i.i, i32 37 1818 %dst_38.i.i = insertelement <64 x i8> %dst_37.i.i, i8 %v_38.i.i, i32 38 1819 %dst_39.i.i = insertelement <64 x i8> %dst_38.i.i, i8 %v_39.i.i, i32 39 1820 %dst_40.i.i = insertelement <64 x i8> %dst_39.i.i, i8 %v_40.i.i, i32 40 1821 %dst_41.i.i = insertelement <64 x i8> %dst_40.i.i, i8 %v_41.i.i, i32 41 1822 %dst_42.i.i = insertelement <64 x i8> %dst_41.i.i, i8 %v_42.i.i, i32 42 1823 %dst_43.i.i = insertelement <64 x i8> %dst_42.i.i, i8 %v_43.i.i, i32 43 1824 %dst_44.i.i = insertelement <64 x i8> %dst_43.i.i, i8 %v_44.i.i, i32 44 1825 %dst_45.i.i = insertelement <64 x i8> %dst_44.i.i, i8 %v_45.i.i, i32 45 1826 %dst_46.i.i = insertelement <64 x i8> %dst_45.i.i, i8 %v_46.i.i, i32 46 1827 %dst_47.i.i = insertelement <64 x i8> %dst_46.i.i, i8 %v_47.i.i, i32 47 1828 %dst_48.i.i = insertelement <64 x i8> %dst_47.i.i, i8 %v_48.i.i, i32 48 1829 %dst_49.i.i = insertelement <64 x i8> %dst_48.i.i, i8 %v_49.i.i, i32 49 1830 %dst_50.i.i = insertelement <64 x i8> %dst_49.i.i, i8 %v_50.i.i, i32 50 1831 %dst_51.i.i = insertelement <64 x i8> %dst_50.i.i, i8 %v_51.i.i, i32 51 1832 %dst_52.i.i = insertelement <64 x i8> %dst_51.i.i, i8 %v_52.i.i, i32 52 1833 %dst_53.i.i = insertelement <64 x i8> %dst_52.i.i, i8 %v_53.i.i, i32 53 1834 %dst_54.i.i = insertelement <64 x i8> %dst_53.i.i, i8 %v_54.i.i, i32 54 1835 %dst_55.i.i = insertelement <64 x i8> %dst_54.i.i, i8 %v_55.i.i, i32 55 1836 %dst_56.i.i = insertelement <64 x i8> %dst_55.i.i, i8 %v_56.i.i, i32 56 1837 %dst_57.i.i = insertelement <64 x i8> %dst_56.i.i, i8 %v_57.i.i, i32 57 1838 %dst_58.i.i = insertelement <64 x i8> %dst_57.i.i, i8 %v_58.i.i, i32 58 1839 %dst_59.i.i = insertelement <64 x i8> %dst_58.i.i, i8 %v_59.i.i, i32 59 1840 %dst_60.i.i = insertelement <64 x i8> %dst_59.i.i, i8 %v_60.i.i, i32 60 1841 %dst_61.i.i = insertelement <64 x i8> %dst_60.i.i, i8 %v_61.i.i, i32 61 1842 %dst_62.i.i = insertelement <64 x i8> %dst_61.i.i, i8 %v_62.i.i, i32 62 1843 %dst_63.i.i = insertelement <64 x i8> %dst_62.i.i, i8 %v_63.i.i, i32 63 1844 %shuf_load_to_float = sitofp <64 x i8> %dst_63.i.i to <64 x float> 1845 store <64 x float> %shuf_load_to_float, <64 x float>* %dst 1846 ret void 1847} 1848