1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,NOBW,NOVBMI,AVX512F 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX512,NOVBMI,AVX512BW 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512BW,VBMI 5 6define <8 x i64> @var_shuffle_v8i64(<8 x i64> %v, <8 x i64> %indices) nounwind { 7; AVX512-LABEL: var_shuffle_v8i64: 8; AVX512: # %bb.0: 9; AVX512-NEXT: vpermpd %zmm0, %zmm1, %zmm0 10; AVX512-NEXT: retq 11 %index0 = extractelement <8 x i64> %indices, i32 0 12 %index1 = extractelement <8 x i64> %indices, i32 1 13 %index2 = extractelement <8 x i64> %indices, i32 2 14 %index3 = extractelement <8 x i64> %indices, i32 3 15 %index4 = extractelement <8 x i64> %indices, i32 4 16 %index5 = extractelement <8 x i64> %indices, i32 5 17 %index6 = extractelement <8 x i64> %indices, i32 6 18 %index7 = extractelement <8 x i64> %indices, i32 7 19 %v0 = extractelement <8 x i64> %v, i64 %index0 20 %v1 = extractelement <8 x i64> %v, i64 %index1 21 %v2 = extractelement <8 x i64> %v, i64 %index2 22 %v3 = extractelement <8 x i64> %v, i64 %index3 23 %v4 = extractelement <8 x i64> %v, i64 %index4 24 %v5 = extractelement <8 x i64> %v, i64 %index5 25 %v6 = extractelement <8 x i64> %v, i64 %index6 26 %v7 = extractelement <8 x i64> %v, i64 %index7 27 %ret0 = insertelement <8 x i64> undef, i64 %v0, i32 0 28 %ret1 = insertelement <8 x i64> %ret0, i64 %v1, i32 1 29 %ret2 = insertelement <8 x i64> %ret1, i64 %v2, i32 2 30 %ret3 = insertelement <8 x i64> %ret2, i64 %v3, i32 3 31 %ret4 = insertelement <8 x i64> %ret3, i64 %v4, i32 4 32 %ret5 = insertelement <8 x i64> %ret4, i64 %v5, i32 5 33 %ret6 = insertelement <8 x i64> %ret5, i64 %v6, i32 6 34 %ret7 = insertelement <8 x i64> %ret6, i64 %v7, i32 7 35 ret <8 x i64> %ret7 36} 37 38define <16 x i32> @var_shuffle_v16i32(<16 x i32> %v, <16 x i32> %indices) nounwind { 39; AVX512-LABEL: var_shuffle_v16i32: 40; AVX512: # %bb.0: 41; AVX512-NEXT: vpermps %zmm0, %zmm1, %zmm0 42; AVX512-NEXT: retq 43 %index0 = extractelement <16 x i32> %indices, i32 0 44 %index1 = extractelement <16 x i32> %indices, i32 1 45 %index2 = extractelement <16 x i32> %indices, i32 2 46 %index3 = extractelement <16 x i32> %indices, i32 3 47 %index4 = extractelement <16 x i32> %indices, i32 4 48 %index5 = extractelement <16 x i32> %indices, i32 5 49 %index6 = extractelement <16 x i32> %indices, i32 6 50 %index7 = extractelement <16 x i32> %indices, i32 7 51 %index8 = extractelement <16 x i32> %indices, i32 8 52 %index9 = extractelement <16 x i32> %indices, i32 9 53 %index10 = extractelement <16 x i32> %indices, i32 10 54 %index11 = extractelement <16 x i32> %indices, i32 11 55 %index12 = extractelement <16 x i32> %indices, i32 12 56 %index13 = extractelement <16 x i32> %indices, i32 13 57 %index14 = extractelement <16 x i32> %indices, i32 14 58 %index15 = extractelement <16 x i32> %indices, i32 15 59 %v0 = extractelement <16 x i32> %v, i32 %index0 60 %v1 = extractelement <16 x i32> %v, i32 %index1 61 %v2 = extractelement <16 x i32> %v, i32 %index2 62 %v3 = extractelement <16 x i32> %v, i32 %index3 63 %v4 = extractelement <16 x i32> %v, i32 %index4 64 %v5 = extractelement <16 x i32> %v, i32 %index5 65 %v6 = extractelement <16 x i32> %v, i32 %index6 66 %v7 = extractelement <16 x i32> %v, i32 %index7 67 %v8 = extractelement <16 x i32> %v, i32 %index8 68 %v9 = extractelement <16 x i32> %v, i32 %index9 69 %v10 = extractelement <16 x i32> %v, i32 %index10 70 %v11 = extractelement <16 x i32> %v, i32 %index11 71 %v12 = extractelement <16 x i32> %v, i32 %index12 72 %v13 = extractelement <16 x i32> %v, i32 %index13 73 %v14 = extractelement <16 x i32> %v, i32 %index14 74 %v15 = extractelement <16 x i32> %v, i32 %index15 75 %ret0 = insertelement <16 x i32> undef, i32 %v0, i32 0 76 %ret1 = insertelement <16 x i32> %ret0, i32 %v1, i32 1 77 %ret2 = insertelement <16 x i32> %ret1, i32 %v2, i32 2 78 %ret3 = insertelement <16 x i32> %ret2, i32 %v3, i32 3 79 %ret4 = insertelement <16 x i32> %ret3, i32 %v4, i32 4 80 %ret5 = insertelement <16 x i32> %ret4, i32 %v5, i32 5 81 %ret6 = insertelement <16 x i32> %ret5, i32 %v6, i32 6 82 %ret7 = insertelement <16 x i32> %ret6, i32 %v7, i32 7 83 %ret8 = insertelement <16 x i32> %ret7, i32 %v8, i32 8 84 %ret9 = insertelement <16 x i32> %ret8, i32 %v9, i32 9 85 %ret10 = insertelement <16 x i32> %ret9, i32 %v10, i32 10 86 %ret11 = insertelement <16 x i32> %ret10, i32 %v11, i32 11 87 %ret12 = insertelement <16 x i32> %ret11, i32 %v12, i32 12 88 %ret13 = insertelement <16 x i32> %ret12, i32 %v13, i32 13 89 %ret14 = insertelement <16 x i32> %ret13, i32 %v14, i32 14 90 %ret15 = insertelement <16 x i32> %ret14, i32 %v15, i32 15 91 ret <16 x i32> %ret15 92} 93 94define <32 x i16> @var_shuffle_v32i16(<32 x i16> %v, <32 x i16> %indices) nounwind { 95; NOBW-LABEL: var_shuffle_v32i16: 96; NOBW: # %bb.0: 97; NOBW-NEXT: pushq %rbp 98; NOBW-NEXT: movq %rsp, %rbp 99; NOBW-NEXT: andq $-64, %rsp 100; NOBW-NEXT: subq $2112, %rsp # imm = 0x840 101; NOBW-NEXT: vextracti128 $1, %ymm2, %xmm4 102; NOBW-NEXT: vmovd %xmm4, %eax 103; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 104; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 105; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 106; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 107; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 108; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 109; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 110; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 111; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 112; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 113; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 114; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 115; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 116; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 117; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 118; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 119; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 120; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 121; NOBW-NEXT: andl $31, %eax 122; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 123; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 124; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 125; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 126; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 127; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 128; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 129; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 130; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 131; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 132; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 133; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 134; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 135; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 136; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 137; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 138; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 139; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 140; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 141; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 142; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 143; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 144; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 145; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 146; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 147; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 148; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 149; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 150; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 151; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 152; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 153; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 154; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 155; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 156; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 157; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 158; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 159; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 160; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 161; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 162; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 163; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 164; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 165; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 166; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 167; NOBW-NEXT: vmovaps %ymm0, (%rsp) 168; NOBW-NEXT: movzwl 1472(%rsp,%rax,2), %eax 169; NOBW-NEXT: vmovd %eax, %xmm0 170; NOBW-NEXT: vpextrw $1, %xmm4, %eax 171; NOBW-NEXT: andl $31, %eax 172; NOBW-NEXT: vpinsrw $1, 1408(%rsp,%rax,2), %xmm0, %xmm0 173; NOBW-NEXT: vpextrw $2, %xmm4, %eax 174; NOBW-NEXT: andl $31, %eax 175; NOBW-NEXT: vpinsrw $2, 1344(%rsp,%rax,2), %xmm0, %xmm0 176; NOBW-NEXT: vpextrw $3, %xmm4, %eax 177; NOBW-NEXT: andl $31, %eax 178; NOBW-NEXT: vpinsrw $3, 1280(%rsp,%rax,2), %xmm0, %xmm0 179; NOBW-NEXT: vpextrw $4, %xmm4, %eax 180; NOBW-NEXT: andl $31, %eax 181; NOBW-NEXT: vpinsrw $4, 1216(%rsp,%rax,2), %xmm0, %xmm0 182; NOBW-NEXT: vpextrw $5, %xmm4, %eax 183; NOBW-NEXT: andl $31, %eax 184; NOBW-NEXT: vpinsrw $5, 1152(%rsp,%rax,2), %xmm0, %xmm0 185; NOBW-NEXT: vpextrw $6, %xmm4, %eax 186; NOBW-NEXT: andl $31, %eax 187; NOBW-NEXT: vpinsrw $6, 1088(%rsp,%rax,2), %xmm0, %xmm0 188; NOBW-NEXT: vpextrw $7, %xmm4, %eax 189; NOBW-NEXT: andl $31, %eax 190; NOBW-NEXT: vpinsrw $7, 1024(%rsp,%rax,2), %xmm0, %xmm0 191; NOBW-NEXT: vmovd %xmm2, %eax 192; NOBW-NEXT: andl $31, %eax 193; NOBW-NEXT: movzwl 1984(%rsp,%rax,2), %eax 194; NOBW-NEXT: vmovd %eax, %xmm1 195; NOBW-NEXT: vpextrw $1, %xmm2, %eax 196; NOBW-NEXT: andl $31, %eax 197; NOBW-NEXT: vpinsrw $1, 1920(%rsp,%rax,2), %xmm1, %xmm1 198; NOBW-NEXT: vpextrw $2, %xmm2, %eax 199; NOBW-NEXT: andl $31, %eax 200; NOBW-NEXT: vpinsrw $2, 1856(%rsp,%rax,2), %xmm1, %xmm1 201; NOBW-NEXT: vpextrw $3, %xmm2, %eax 202; NOBW-NEXT: andl $31, %eax 203; NOBW-NEXT: vpinsrw $3, 1792(%rsp,%rax,2), %xmm1, %xmm1 204; NOBW-NEXT: vpextrw $4, %xmm2, %eax 205; NOBW-NEXT: andl $31, %eax 206; NOBW-NEXT: vpinsrw $4, 1728(%rsp,%rax,2), %xmm1, %xmm1 207; NOBW-NEXT: vpextrw $5, %xmm2, %eax 208; NOBW-NEXT: andl $31, %eax 209; NOBW-NEXT: vpinsrw $5, 1664(%rsp,%rax,2), %xmm1, %xmm1 210; NOBW-NEXT: vpextrw $6, %xmm2, %eax 211; NOBW-NEXT: andl $31, %eax 212; NOBW-NEXT: vpinsrw $6, 1600(%rsp,%rax,2), %xmm1, %xmm1 213; NOBW-NEXT: vpextrw $7, %xmm2, %eax 214; NOBW-NEXT: vextracti128 $1, %ymm3, %xmm2 215; NOBW-NEXT: andl $31, %eax 216; NOBW-NEXT: vpinsrw $7, 1536(%rsp,%rax,2), %xmm1, %xmm1 217; NOBW-NEXT: vmovd %xmm2, %eax 218; NOBW-NEXT: andl $31, %eax 219; NOBW-NEXT: movzwl 448(%rsp,%rax,2), %eax 220; NOBW-NEXT: vmovd %eax, %xmm4 221; NOBW-NEXT: vpextrw $1, %xmm2, %eax 222; NOBW-NEXT: andl $31, %eax 223; NOBW-NEXT: vpinsrw $1, 384(%rsp,%rax,2), %xmm4, %xmm4 224; NOBW-NEXT: vpextrw $2, %xmm2, %eax 225; NOBW-NEXT: andl $31, %eax 226; NOBW-NEXT: vpinsrw $2, 320(%rsp,%rax,2), %xmm4, %xmm4 227; NOBW-NEXT: vpextrw $3, %xmm2, %eax 228; NOBW-NEXT: andl $31, %eax 229; NOBW-NEXT: vpinsrw $3, 256(%rsp,%rax,2), %xmm4, %xmm4 230; NOBW-NEXT: vpextrw $4, %xmm2, %eax 231; NOBW-NEXT: andl $31, %eax 232; NOBW-NEXT: vpinsrw $4, 192(%rsp,%rax,2), %xmm4, %xmm4 233; NOBW-NEXT: vpextrw $5, %xmm2, %eax 234; NOBW-NEXT: andl $31, %eax 235; NOBW-NEXT: vpinsrw $5, 128(%rsp,%rax,2), %xmm4, %xmm4 236; NOBW-NEXT: vpextrw $6, %xmm2, %eax 237; NOBW-NEXT: andl $31, %eax 238; NOBW-NEXT: vpinsrw $6, 64(%rsp,%rax,2), %xmm4, %xmm4 239; NOBW-NEXT: vpextrw $7, %xmm2, %eax 240; NOBW-NEXT: andl $31, %eax 241; NOBW-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm4, %xmm2 242; NOBW-NEXT: vmovd %xmm3, %eax 243; NOBW-NEXT: andl $31, %eax 244; NOBW-NEXT: movzwl 960(%rsp,%rax,2), %eax 245; NOBW-NEXT: vmovd %eax, %xmm4 246; NOBW-NEXT: vpextrw $1, %xmm3, %eax 247; NOBW-NEXT: andl $31, %eax 248; NOBW-NEXT: vpinsrw $1, 896(%rsp,%rax,2), %xmm4, %xmm4 249; NOBW-NEXT: vpextrw $2, %xmm3, %eax 250; NOBW-NEXT: andl $31, %eax 251; NOBW-NEXT: vpinsrw $2, 832(%rsp,%rax,2), %xmm4, %xmm4 252; NOBW-NEXT: vpextrw $3, %xmm3, %eax 253; NOBW-NEXT: andl $31, %eax 254; NOBW-NEXT: vpinsrw $3, 768(%rsp,%rax,2), %xmm4, %xmm4 255; NOBW-NEXT: vpextrw $4, %xmm3, %eax 256; NOBW-NEXT: andl $31, %eax 257; NOBW-NEXT: vpinsrw $4, 704(%rsp,%rax,2), %xmm4, %xmm4 258; NOBW-NEXT: vpextrw $5, %xmm3, %eax 259; NOBW-NEXT: andl $31, %eax 260; NOBW-NEXT: vpinsrw $5, 640(%rsp,%rax,2), %xmm4, %xmm4 261; NOBW-NEXT: vpextrw $6, %xmm3, %eax 262; NOBW-NEXT: andl $31, %eax 263; NOBW-NEXT: vpinsrw $6, 576(%rsp,%rax,2), %xmm4, %xmm4 264; NOBW-NEXT: vpextrw $7, %xmm3, %eax 265; NOBW-NEXT: andl $31, %eax 266; NOBW-NEXT: vpinsrw $7, 512(%rsp,%rax,2), %xmm4, %xmm3 267; NOBW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 268; NOBW-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm1 269; NOBW-NEXT: movq %rbp, %rsp 270; NOBW-NEXT: popq %rbp 271; NOBW-NEXT: retq 272; 273; AVX512BW-LABEL: var_shuffle_v32i16: 274; AVX512BW: # %bb.0: 275; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0 276; AVX512BW-NEXT: retq 277 %index0 = extractelement <32 x i16> %indices, i32 0 278 %index1 = extractelement <32 x i16> %indices, i32 1 279 %index2 = extractelement <32 x i16> %indices, i32 2 280 %index3 = extractelement <32 x i16> %indices, i32 3 281 %index4 = extractelement <32 x i16> %indices, i32 4 282 %index5 = extractelement <32 x i16> %indices, i32 5 283 %index6 = extractelement <32 x i16> %indices, i32 6 284 %index7 = extractelement <32 x i16> %indices, i32 7 285 %index8 = extractelement <32 x i16> %indices, i32 8 286 %index9 = extractelement <32 x i16> %indices, i32 9 287 %index10 = extractelement <32 x i16> %indices, i32 10 288 %index11 = extractelement <32 x i16> %indices, i32 11 289 %index12 = extractelement <32 x i16> %indices, i32 12 290 %index13 = extractelement <32 x i16> %indices, i32 13 291 %index14 = extractelement <32 x i16> %indices, i32 14 292 %index15 = extractelement <32 x i16> %indices, i32 15 293 %index16 = extractelement <32 x i16> %indices, i32 16 294 %index17 = extractelement <32 x i16> %indices, i32 17 295 %index18 = extractelement <32 x i16> %indices, i32 18 296 %index19 = extractelement <32 x i16> %indices, i32 19 297 %index20 = extractelement <32 x i16> %indices, i32 20 298 %index21 = extractelement <32 x i16> %indices, i32 21 299 %index22 = extractelement <32 x i16> %indices, i32 22 300 %index23 = extractelement <32 x i16> %indices, i32 23 301 %index24 = extractelement <32 x i16> %indices, i32 24 302 %index25 = extractelement <32 x i16> %indices, i32 25 303 %index26 = extractelement <32 x i16> %indices, i32 26 304 %index27 = extractelement <32 x i16> %indices, i32 27 305 %index28 = extractelement <32 x i16> %indices, i32 28 306 %index29 = extractelement <32 x i16> %indices, i32 29 307 %index30 = extractelement <32 x i16> %indices, i32 30 308 %index31 = extractelement <32 x i16> %indices, i32 31 309 %v0 = extractelement <32 x i16> %v, i16 %index0 310 %v1 = extractelement <32 x i16> %v, i16 %index1 311 %v2 = extractelement <32 x i16> %v, i16 %index2 312 %v3 = extractelement <32 x i16> %v, i16 %index3 313 %v4 = extractelement <32 x i16> %v, i16 %index4 314 %v5 = extractelement <32 x i16> %v, i16 %index5 315 %v6 = extractelement <32 x i16> %v, i16 %index6 316 %v7 = extractelement <32 x i16> %v, i16 %index7 317 %v8 = extractelement <32 x i16> %v, i16 %index8 318 %v9 = extractelement <32 x i16> %v, i16 %index9 319 %v10 = extractelement <32 x i16> %v, i16 %index10 320 %v11 = extractelement <32 x i16> %v, i16 %index11 321 %v12 = extractelement <32 x i16> %v, i16 %index12 322 %v13 = extractelement <32 x i16> %v, i16 %index13 323 %v14 = extractelement <32 x i16> %v, i16 %index14 324 %v15 = extractelement <32 x i16> %v, i16 %index15 325 %v16 = extractelement <32 x i16> %v, i16 %index16 326 %v17 = extractelement <32 x i16> %v, i16 %index17 327 %v18 = extractelement <32 x i16> %v, i16 %index18 328 %v19 = extractelement <32 x i16> %v, i16 %index19 329 %v20 = extractelement <32 x i16> %v, i16 %index20 330 %v21 = extractelement <32 x i16> %v, i16 %index21 331 %v22 = extractelement <32 x i16> %v, i16 %index22 332 %v23 = extractelement <32 x i16> %v, i16 %index23 333 %v24 = extractelement <32 x i16> %v, i16 %index24 334 %v25 = extractelement <32 x i16> %v, i16 %index25 335 %v26 = extractelement <32 x i16> %v, i16 %index26 336 %v27 = extractelement <32 x i16> %v, i16 %index27 337 %v28 = extractelement <32 x i16> %v, i16 %index28 338 %v29 = extractelement <32 x i16> %v, i16 %index29 339 %v30 = extractelement <32 x i16> %v, i16 %index30 340 %v31 = extractelement <32 x i16> %v, i16 %index31 341 %ret0 = insertelement <32 x i16> undef, i16 %v0, i32 0 342 %ret1 = insertelement <32 x i16> %ret0, i16 %v1, i32 1 343 %ret2 = insertelement <32 x i16> %ret1, i16 %v2, i32 2 344 %ret3 = insertelement <32 x i16> %ret2, i16 %v3, i32 3 345 %ret4 = insertelement <32 x i16> %ret3, i16 %v4, i32 4 346 %ret5 = insertelement <32 x i16> %ret4, i16 %v5, i32 5 347 %ret6 = insertelement <32 x i16> %ret5, i16 %v6, i32 6 348 %ret7 = insertelement <32 x i16> %ret6, i16 %v7, i32 7 349 %ret8 = insertelement <32 x i16> %ret7, i16 %v8, i32 8 350 %ret9 = insertelement <32 x i16> %ret8, i16 %v9, i32 9 351 %ret10 = insertelement <32 x i16> %ret9, i16 %v10, i32 10 352 %ret11 = insertelement <32 x i16> %ret10, i16 %v11, i32 11 353 %ret12 = insertelement <32 x i16> %ret11, i16 %v12, i32 12 354 %ret13 = insertelement <32 x i16> %ret12, i16 %v13, i32 13 355 %ret14 = insertelement <32 x i16> %ret13, i16 %v14, i32 14 356 %ret15 = insertelement <32 x i16> %ret14, i16 %v15, i32 15 357 %ret16 = insertelement <32 x i16> %ret15, i16 %v16, i32 16 358 %ret17 = insertelement <32 x i16> %ret16, i16 %v17, i32 17 359 %ret18 = insertelement <32 x i16> %ret17, i16 %v18, i32 18 360 %ret19 = insertelement <32 x i16> %ret18, i16 %v19, i32 19 361 %ret20 = insertelement <32 x i16> %ret19, i16 %v20, i32 20 362 %ret21 = insertelement <32 x i16> %ret20, i16 %v21, i32 21 363 %ret22 = insertelement <32 x i16> %ret21, i16 %v22, i32 22 364 %ret23 = insertelement <32 x i16> %ret22, i16 %v23, i32 23 365 %ret24 = insertelement <32 x i16> %ret23, i16 %v24, i32 24 366 %ret25 = insertelement <32 x i16> %ret24, i16 %v25, i32 25 367 %ret26 = insertelement <32 x i16> %ret25, i16 %v26, i32 26 368 %ret27 = insertelement <32 x i16> %ret26, i16 %v27, i32 27 369 %ret28 = insertelement <32 x i16> %ret27, i16 %v28, i32 28 370 %ret29 = insertelement <32 x i16> %ret28, i16 %v29, i32 29 371 %ret30 = insertelement <32 x i16> %ret29, i16 %v30, i32 30 372 %ret31 = insertelement <32 x i16> %ret30, i16 %v31, i32 31 373 ret <32 x i16> %ret31 374} 375 376define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind { 377; NOBW-LABEL: var_shuffle_v64i8: 378; NOBW: # %bb.0: 379; NOBW-NEXT: pushq %rbp 380; NOBW-NEXT: movq %rsp, %rbp 381; NOBW-NEXT: andq $-64, %rsp 382; NOBW-NEXT: subq $4160, %rsp # imm = 0x1040 383; NOBW-NEXT: vextracti128 $1, %ymm2, %xmm4 384; NOBW-NEXT: vpextrb $0, %xmm4, %eax 385; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 386; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 387; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 388; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 389; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 390; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 391; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 392; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 393; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 394; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 395; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 396; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 397; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 398; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 399; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 400; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 401; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 402; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 403; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 404; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 405; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 406; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 407; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 408; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 409; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 410; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 411; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 412; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 413; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 414; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 415; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 416; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 417; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 418; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 419; NOBW-NEXT: andl $63, %eax 420; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 421; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 422; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 423; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 424; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 425; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 426; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 427; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 428; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 429; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 430; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 431; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 432; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 433; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 434; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 435; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 436; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 437; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 438; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 439; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 440; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 441; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 442; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 443; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 444; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 445; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 446; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 447; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 448; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 449; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 450; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 451; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 452; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 453; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 454; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 455; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 456; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 457; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 458; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 459; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 460; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 461; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 462; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 463; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 464; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 465; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 466; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 467; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 468; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 469; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 470; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 471; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 472; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 473; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 474; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 475; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 476; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 477; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 478; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 479; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 480; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 481; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 482; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 483; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 484; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 485; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 486; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 487; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 488; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 489; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 490; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 491; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 492; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 493; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 494; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 495; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 496; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 497; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 498; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 499; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 500; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 501; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 502; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 503; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 504; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 505; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 506; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 507; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 508; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 509; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 510; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 511; NOBW-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) 512; NOBW-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) 513; NOBW-NEXT: vmovaps %ymm0, (%rsp) 514; NOBW-NEXT: movzbl 3008(%rsp,%rax), %eax 515; NOBW-NEXT: vmovd %eax, %xmm0 516; NOBW-NEXT: vpextrb $1, %xmm4, %eax 517; NOBW-NEXT: andl $63, %eax 518; NOBW-NEXT: vpinsrb $1, 2944(%rsp,%rax), %xmm0, %xmm0 519; NOBW-NEXT: vpextrb $2, %xmm4, %eax 520; NOBW-NEXT: andl $63, %eax 521; NOBW-NEXT: vpinsrb $2, 2880(%rsp,%rax), %xmm0, %xmm0 522; NOBW-NEXT: vpextrb $3, %xmm4, %eax 523; NOBW-NEXT: andl $63, %eax 524; NOBW-NEXT: vpinsrb $3, 2816(%rsp,%rax), %xmm0, %xmm0 525; NOBW-NEXT: vpextrb $4, %xmm4, %eax 526; NOBW-NEXT: andl $63, %eax 527; NOBW-NEXT: vpinsrb $4, 2752(%rsp,%rax), %xmm0, %xmm0 528; NOBW-NEXT: vpextrb $5, %xmm4, %eax 529; NOBW-NEXT: andl $63, %eax 530; NOBW-NEXT: vpinsrb $5, 2688(%rsp,%rax), %xmm0, %xmm0 531; NOBW-NEXT: vpextrb $6, %xmm4, %eax 532; NOBW-NEXT: andl $63, %eax 533; NOBW-NEXT: vpinsrb $6, 2624(%rsp,%rax), %xmm0, %xmm0 534; NOBW-NEXT: vpextrb $7, %xmm4, %eax 535; NOBW-NEXT: andl $63, %eax 536; NOBW-NEXT: vpinsrb $7, 2560(%rsp,%rax), %xmm0, %xmm0 537; NOBW-NEXT: vpextrb $8, %xmm4, %eax 538; NOBW-NEXT: andl $63, %eax 539; NOBW-NEXT: vpinsrb $8, 2496(%rsp,%rax), %xmm0, %xmm0 540; NOBW-NEXT: vpextrb $9, %xmm4, %eax 541; NOBW-NEXT: andl $63, %eax 542; NOBW-NEXT: vpinsrb $9, 2432(%rsp,%rax), %xmm0, %xmm0 543; NOBW-NEXT: vpextrb $10, %xmm4, %eax 544; NOBW-NEXT: andl $63, %eax 545; NOBW-NEXT: vpinsrb $10, 2368(%rsp,%rax), %xmm0, %xmm0 546; NOBW-NEXT: vpextrb $11, %xmm4, %eax 547; NOBW-NEXT: andl $63, %eax 548; NOBW-NEXT: vpinsrb $11, 2304(%rsp,%rax), %xmm0, %xmm0 549; NOBW-NEXT: vpextrb $12, %xmm4, %eax 550; NOBW-NEXT: andl $63, %eax 551; NOBW-NEXT: vpinsrb $12, 2240(%rsp,%rax), %xmm0, %xmm0 552; NOBW-NEXT: vpextrb $13, %xmm4, %eax 553; NOBW-NEXT: andl $63, %eax 554; NOBW-NEXT: vpinsrb $13, 2176(%rsp,%rax), %xmm0, %xmm0 555; NOBW-NEXT: vpextrb $14, %xmm4, %eax 556; NOBW-NEXT: andl $63, %eax 557; NOBW-NEXT: vpinsrb $14, 2112(%rsp,%rax), %xmm0, %xmm0 558; NOBW-NEXT: vpextrb $15, %xmm4, %eax 559; NOBW-NEXT: andl $63, %eax 560; NOBW-NEXT: vpinsrb $15, 2048(%rsp,%rax), %xmm0, %xmm0 561; NOBW-NEXT: vpextrb $0, %xmm2, %eax 562; NOBW-NEXT: andl $63, %eax 563; NOBW-NEXT: movzbl 4032(%rsp,%rax), %eax 564; NOBW-NEXT: vmovd %eax, %xmm1 565; NOBW-NEXT: vpextrb $1, %xmm2, %eax 566; NOBW-NEXT: andl $63, %eax 567; NOBW-NEXT: vpinsrb $1, 3968(%rsp,%rax), %xmm1, %xmm1 568; NOBW-NEXT: vpextrb $2, %xmm2, %eax 569; NOBW-NEXT: andl $63, %eax 570; NOBW-NEXT: vpinsrb $2, 3904(%rsp,%rax), %xmm1, %xmm1 571; NOBW-NEXT: vpextrb $3, %xmm2, %eax 572; NOBW-NEXT: andl $63, %eax 573; NOBW-NEXT: vpinsrb $3, 3840(%rsp,%rax), %xmm1, %xmm1 574; NOBW-NEXT: vpextrb $4, %xmm2, %eax 575; NOBW-NEXT: andl $63, %eax 576; NOBW-NEXT: vpinsrb $4, 3776(%rsp,%rax), %xmm1, %xmm1 577; NOBW-NEXT: vpextrb $5, %xmm2, %eax 578; NOBW-NEXT: andl $63, %eax 579; NOBW-NEXT: vpinsrb $5, 3712(%rsp,%rax), %xmm1, %xmm1 580; NOBW-NEXT: vpextrb $6, %xmm2, %eax 581; NOBW-NEXT: andl $63, %eax 582; NOBW-NEXT: vpinsrb $6, 3648(%rsp,%rax), %xmm1, %xmm1 583; NOBW-NEXT: vpextrb $7, %xmm2, %eax 584; NOBW-NEXT: andl $63, %eax 585; NOBW-NEXT: vpinsrb $7, 3584(%rsp,%rax), %xmm1, %xmm1 586; NOBW-NEXT: vpextrb $8, %xmm2, %eax 587; NOBW-NEXT: andl $63, %eax 588; NOBW-NEXT: vpinsrb $8, 3520(%rsp,%rax), %xmm1, %xmm1 589; NOBW-NEXT: vpextrb $9, %xmm2, %eax 590; NOBW-NEXT: andl $63, %eax 591; NOBW-NEXT: vpinsrb $9, 3456(%rsp,%rax), %xmm1, %xmm1 592; NOBW-NEXT: vpextrb $10, %xmm2, %eax 593; NOBW-NEXT: andl $63, %eax 594; NOBW-NEXT: vpinsrb $10, 3392(%rsp,%rax), %xmm1, %xmm1 595; NOBW-NEXT: vpextrb $11, %xmm2, %eax 596; NOBW-NEXT: andl $63, %eax 597; NOBW-NEXT: vpinsrb $11, 3328(%rsp,%rax), %xmm1, %xmm1 598; NOBW-NEXT: vpextrb $12, %xmm2, %eax 599; NOBW-NEXT: andl $63, %eax 600; NOBW-NEXT: vpinsrb $12, 3264(%rsp,%rax), %xmm1, %xmm1 601; NOBW-NEXT: vpextrb $13, %xmm2, %eax 602; NOBW-NEXT: andl $63, %eax 603; NOBW-NEXT: vpinsrb $13, 3200(%rsp,%rax), %xmm1, %xmm1 604; NOBW-NEXT: vpextrb $14, %xmm2, %eax 605; NOBW-NEXT: andl $63, %eax 606; NOBW-NEXT: vpinsrb $14, 3136(%rsp,%rax), %xmm1, %xmm1 607; NOBW-NEXT: vpextrb $15, %xmm2, %eax 608; NOBW-NEXT: vextracti128 $1, %ymm3, %xmm2 609; NOBW-NEXT: andl $63, %eax 610; NOBW-NEXT: vpinsrb $15, 3072(%rsp,%rax), %xmm1, %xmm1 611; NOBW-NEXT: vpextrb $0, %xmm2, %eax 612; NOBW-NEXT: andl $63, %eax 613; NOBW-NEXT: movzbl 960(%rsp,%rax), %eax 614; NOBW-NEXT: vmovd %eax, %xmm4 615; NOBW-NEXT: vpextrb $1, %xmm2, %eax 616; NOBW-NEXT: andl $63, %eax 617; NOBW-NEXT: vpinsrb $1, 896(%rsp,%rax), %xmm4, %xmm4 618; NOBW-NEXT: vpextrb $2, %xmm2, %eax 619; NOBW-NEXT: andl $63, %eax 620; NOBW-NEXT: vpinsrb $2, 832(%rsp,%rax), %xmm4, %xmm4 621; NOBW-NEXT: vpextrb $3, %xmm2, %eax 622; NOBW-NEXT: andl $63, %eax 623; NOBW-NEXT: vpinsrb $3, 768(%rsp,%rax), %xmm4, %xmm4 624; NOBW-NEXT: vpextrb $4, %xmm2, %eax 625; NOBW-NEXT: andl $63, %eax 626; NOBW-NEXT: vpinsrb $4, 704(%rsp,%rax), %xmm4, %xmm4 627; NOBW-NEXT: vpextrb $5, %xmm2, %eax 628; NOBW-NEXT: andl $63, %eax 629; NOBW-NEXT: vpinsrb $5, 640(%rsp,%rax), %xmm4, %xmm4 630; NOBW-NEXT: vpextrb $6, %xmm2, %eax 631; NOBW-NEXT: andl $63, %eax 632; NOBW-NEXT: vpinsrb $6, 576(%rsp,%rax), %xmm4, %xmm4 633; NOBW-NEXT: vpextrb $7, %xmm2, %eax 634; NOBW-NEXT: andl $63, %eax 635; NOBW-NEXT: vpinsrb $7, 512(%rsp,%rax), %xmm4, %xmm4 636; NOBW-NEXT: vpextrb $8, %xmm2, %eax 637; NOBW-NEXT: andl $63, %eax 638; NOBW-NEXT: vpinsrb $8, 448(%rsp,%rax), %xmm4, %xmm4 639; NOBW-NEXT: vpextrb $9, %xmm2, %eax 640; NOBW-NEXT: andl $63, %eax 641; NOBW-NEXT: vpinsrb $9, 384(%rsp,%rax), %xmm4, %xmm4 642; NOBW-NEXT: vpextrb $10, %xmm2, %eax 643; NOBW-NEXT: andl $63, %eax 644; NOBW-NEXT: vpinsrb $10, 320(%rsp,%rax), %xmm4, %xmm4 645; NOBW-NEXT: vpextrb $11, %xmm2, %eax 646; NOBW-NEXT: andl $63, %eax 647; NOBW-NEXT: vpinsrb $11, 256(%rsp,%rax), %xmm4, %xmm4 648; NOBW-NEXT: vpextrb $12, %xmm2, %eax 649; NOBW-NEXT: andl $63, %eax 650; NOBW-NEXT: vpinsrb $12, 192(%rsp,%rax), %xmm4, %xmm4 651; NOBW-NEXT: vpextrb $13, %xmm2, %eax 652; NOBW-NEXT: andl $63, %eax 653; NOBW-NEXT: vpinsrb $13, 128(%rsp,%rax), %xmm4, %xmm4 654; NOBW-NEXT: vpextrb $14, %xmm2, %eax 655; NOBW-NEXT: andl $63, %eax 656; NOBW-NEXT: vpinsrb $14, 64(%rsp,%rax), %xmm4, %xmm4 657; NOBW-NEXT: vpextrb $15, %xmm2, %eax 658; NOBW-NEXT: andl $63, %eax 659; NOBW-NEXT: vpinsrb $15, (%rsp,%rax), %xmm4, %xmm2 660; NOBW-NEXT: vpextrb $0, %xmm3, %eax 661; NOBW-NEXT: andl $63, %eax 662; NOBW-NEXT: movzbl 1984(%rsp,%rax), %eax 663; NOBW-NEXT: vmovd %eax, %xmm4 664; NOBW-NEXT: vpextrb $1, %xmm3, %eax 665; NOBW-NEXT: andl $63, %eax 666; NOBW-NEXT: vpinsrb $1, 1920(%rsp,%rax), %xmm4, %xmm4 667; NOBW-NEXT: vpextrb $2, %xmm3, %eax 668; NOBW-NEXT: andl $63, %eax 669; NOBW-NEXT: vpinsrb $2, 1856(%rsp,%rax), %xmm4, %xmm4 670; NOBW-NEXT: vpextrb $3, %xmm3, %eax 671; NOBW-NEXT: andl $63, %eax 672; NOBW-NEXT: vpinsrb $3, 1792(%rsp,%rax), %xmm4, %xmm4 673; NOBW-NEXT: vpextrb $4, %xmm3, %eax 674; NOBW-NEXT: andl $63, %eax 675; NOBW-NEXT: vpinsrb $4, 1728(%rsp,%rax), %xmm4, %xmm4 676; NOBW-NEXT: vpextrb $5, %xmm3, %eax 677; NOBW-NEXT: andl $63, %eax 678; NOBW-NEXT: vpinsrb $5, 1664(%rsp,%rax), %xmm4, %xmm4 679; NOBW-NEXT: vpextrb $6, %xmm3, %eax 680; NOBW-NEXT: andl $63, %eax 681; NOBW-NEXT: vpinsrb $6, 1600(%rsp,%rax), %xmm4, %xmm4 682; NOBW-NEXT: vpextrb $7, %xmm3, %eax 683; NOBW-NEXT: andl $63, %eax 684; NOBW-NEXT: vpinsrb $7, 1536(%rsp,%rax), %xmm4, %xmm4 685; NOBW-NEXT: vpextrb $8, %xmm3, %eax 686; NOBW-NEXT: andl $63, %eax 687; NOBW-NEXT: vpinsrb $8, 1472(%rsp,%rax), %xmm4, %xmm4 688; NOBW-NEXT: vpextrb $9, %xmm3, %eax 689; NOBW-NEXT: andl $63, %eax 690; NOBW-NEXT: vpinsrb $9, 1408(%rsp,%rax), %xmm4, %xmm4 691; NOBW-NEXT: vpextrb $10, %xmm3, %eax 692; NOBW-NEXT: andl $63, %eax 693; NOBW-NEXT: vpinsrb $10, 1344(%rsp,%rax), %xmm4, %xmm4 694; NOBW-NEXT: vpextrb $11, %xmm3, %eax 695; NOBW-NEXT: andl $63, %eax 696; NOBW-NEXT: vpinsrb $11, 1280(%rsp,%rax), %xmm4, %xmm4 697; NOBW-NEXT: vpextrb $12, %xmm3, %eax 698; NOBW-NEXT: andl $63, %eax 699; NOBW-NEXT: vpinsrb $12, 1216(%rsp,%rax), %xmm4, %xmm4 700; NOBW-NEXT: vpextrb $13, %xmm3, %eax 701; NOBW-NEXT: andl $63, %eax 702; NOBW-NEXT: vpinsrb $13, 1152(%rsp,%rax), %xmm4, %xmm4 703; NOBW-NEXT: vpextrb $14, %xmm3, %eax 704; NOBW-NEXT: andl $63, %eax 705; NOBW-NEXT: vpinsrb $14, 1088(%rsp,%rax), %xmm4, %xmm4 706; NOBW-NEXT: vpextrb $15, %xmm3, %eax 707; NOBW-NEXT: andl $63, %eax 708; NOBW-NEXT: vpinsrb $15, 1024(%rsp,%rax), %xmm4, %xmm3 709; NOBW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 710; NOBW-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm1 711; NOBW-NEXT: movq %rbp, %rsp 712; NOBW-NEXT: popq %rbp 713; NOBW-NEXT: retq 714; 715; VBMI-LABEL: var_shuffle_v64i8: 716; VBMI: # %bb.0: 717; VBMI-NEXT: vpermb %zmm0, %zmm1, %zmm0 718; VBMI-NEXT: retq 719 %index0 = extractelement <64 x i8> %indices, i32 0 720 %index1 = extractelement <64 x i8> %indices, i32 1 721 %index2 = extractelement <64 x i8> %indices, i32 2 722 %index3 = extractelement <64 x i8> %indices, i32 3 723 %index4 = extractelement <64 x i8> %indices, i32 4 724 %index5 = extractelement <64 x i8> %indices, i32 5 725 %index6 = extractelement <64 x i8> %indices, i32 6 726 %index7 = extractelement <64 x i8> %indices, i32 7 727 %index8 = extractelement <64 x i8> %indices, i32 8 728 %index9 = extractelement <64 x i8> %indices, i32 9 729 %index10 = extractelement <64 x i8> %indices, i32 10 730 %index11 = extractelement <64 x i8> %indices, i32 11 731 %index12 = extractelement <64 x i8> %indices, i32 12 732 %index13 = extractelement <64 x i8> %indices, i32 13 733 %index14 = extractelement <64 x i8> %indices, i32 14 734 %index15 = extractelement <64 x i8> %indices, i32 15 735 %index16 = extractelement <64 x i8> %indices, i32 16 736 %index17 = extractelement <64 x i8> %indices, i32 17 737 %index18 = extractelement <64 x i8> %indices, i32 18 738 %index19 = extractelement <64 x i8> %indices, i32 19 739 %index20 = extractelement <64 x i8> %indices, i32 20 740 %index21 = extractelement <64 x i8> %indices, i32 21 741 %index22 = extractelement <64 x i8> %indices, i32 22 742 %index23 = extractelement <64 x i8> %indices, i32 23 743 %index24 = extractelement <64 x i8> %indices, i32 24 744 %index25 = extractelement <64 x i8> %indices, i32 25 745 %index26 = extractelement <64 x i8> %indices, i32 26 746 %index27 = extractelement <64 x i8> %indices, i32 27 747 %index28 = extractelement <64 x i8> %indices, i32 28 748 %index29 = extractelement <64 x i8> %indices, i32 29 749 %index30 = extractelement <64 x i8> %indices, i32 30 750 %index31 = extractelement <64 x i8> %indices, i32 31 751 %index32 = extractelement <64 x i8> %indices, i32 32 752 %index33 = extractelement <64 x i8> %indices, i32 33 753 %index34 = extractelement <64 x i8> %indices, i32 34 754 %index35 = extractelement <64 x i8> %indices, i32 35 755 %index36 = extractelement <64 x i8> %indices, i32 36 756 %index37 = extractelement <64 x i8> %indices, i32 37 757 %index38 = extractelement <64 x i8> %indices, i32 38 758 %index39 = extractelement <64 x i8> %indices, i32 39 759 %index40 = extractelement <64 x i8> %indices, i32 40 760 %index41 = extractelement <64 x i8> %indices, i32 41 761 %index42 = extractelement <64 x i8> %indices, i32 42 762 %index43 = extractelement <64 x i8> %indices, i32 43 763 %index44 = extractelement <64 x i8> %indices, i32 44 764 %index45 = extractelement <64 x i8> %indices, i32 45 765 %index46 = extractelement <64 x i8> %indices, i32 46 766 %index47 = extractelement <64 x i8> %indices, i32 47 767 %index48 = extractelement <64 x i8> %indices, i32 48 768 %index49 = extractelement <64 x i8> %indices, i32 49 769 %index50 = extractelement <64 x i8> %indices, i32 50 770 %index51 = extractelement <64 x i8> %indices, i32 51 771 %index52 = extractelement <64 x i8> %indices, i32 52 772 %index53 = extractelement <64 x i8> %indices, i32 53 773 %index54 = extractelement <64 x i8> %indices, i32 54 774 %index55 = extractelement <64 x i8> %indices, i32 55 775 %index56 = extractelement <64 x i8> %indices, i32 56 776 %index57 = extractelement <64 x i8> %indices, i32 57 777 %index58 = extractelement <64 x i8> %indices, i32 58 778 %index59 = extractelement <64 x i8> %indices, i32 59 779 %index60 = extractelement <64 x i8> %indices, i32 60 780 %index61 = extractelement <64 x i8> %indices, i32 61 781 %index62 = extractelement <64 x i8> %indices, i32 62 782 %index63 = extractelement <64 x i8> %indices, i32 63 783 %v0 = extractelement <64 x i8> %v, i8 %index0 784 %v1 = extractelement <64 x i8> %v, i8 %index1 785 %v2 = extractelement <64 x i8> %v, i8 %index2 786 %v3 = extractelement <64 x i8> %v, i8 %index3 787 %v4 = extractelement <64 x i8> %v, i8 %index4 788 %v5 = extractelement <64 x i8> %v, i8 %index5 789 %v6 = extractelement <64 x i8> %v, i8 %index6 790 %v7 = extractelement <64 x i8> %v, i8 %index7 791 %v8 = extractelement <64 x i8> %v, i8 %index8 792 %v9 = extractelement <64 x i8> %v, i8 %index9 793 %v10 = extractelement <64 x i8> %v, i8 %index10 794 %v11 = extractelement <64 x i8> %v, i8 %index11 795 %v12 = extractelement <64 x i8> %v, i8 %index12 796 %v13 = extractelement <64 x i8> %v, i8 %index13 797 %v14 = extractelement <64 x i8> %v, i8 %index14 798 %v15 = extractelement <64 x i8> %v, i8 %index15 799 %v16 = extractelement <64 x i8> %v, i8 %index16 800 %v17 = extractelement <64 x i8> %v, i8 %index17 801 %v18 = extractelement <64 x i8> %v, i8 %index18 802 %v19 = extractelement <64 x i8> %v, i8 %index19 803 %v20 = extractelement <64 x i8> %v, i8 %index20 804 %v21 = extractelement <64 x i8> %v, i8 %index21 805 %v22 = extractelement <64 x i8> %v, i8 %index22 806 %v23 = extractelement <64 x i8> %v, i8 %index23 807 %v24 = extractelement <64 x i8> %v, i8 %index24 808 %v25 = extractelement <64 x i8> %v, i8 %index25 809 %v26 = extractelement <64 x i8> %v, i8 %index26 810 %v27 = extractelement <64 x i8> %v, i8 %index27 811 %v28 = extractelement <64 x i8> %v, i8 %index28 812 %v29 = extractelement <64 x i8> %v, i8 %index29 813 %v30 = extractelement <64 x i8> %v, i8 %index30 814 %v31 = extractelement <64 x i8> %v, i8 %index31 815 %v32 = extractelement <64 x i8> %v, i8 %index32 816 %v33 = extractelement <64 x i8> %v, i8 %index33 817 %v34 = extractelement <64 x i8> %v, i8 %index34 818 %v35 = extractelement <64 x i8> %v, i8 %index35 819 %v36 = extractelement <64 x i8> %v, i8 %index36 820 %v37 = extractelement <64 x i8> %v, i8 %index37 821 %v38 = extractelement <64 x i8> %v, i8 %index38 822 %v39 = extractelement <64 x i8> %v, i8 %index39 823 %v40 = extractelement <64 x i8> %v, i8 %index40 824 %v41 = extractelement <64 x i8> %v, i8 %index41 825 %v42 = extractelement <64 x i8> %v, i8 %index42 826 %v43 = extractelement <64 x i8> %v, i8 %index43 827 %v44 = extractelement <64 x i8> %v, i8 %index44 828 %v45 = extractelement <64 x i8> %v, i8 %index45 829 %v46 = extractelement <64 x i8> %v, i8 %index46 830 %v47 = extractelement <64 x i8> %v, i8 %index47 831 %v48 = extractelement <64 x i8> %v, i8 %index48 832 %v49 = extractelement <64 x i8> %v, i8 %index49 833 %v50 = extractelement <64 x i8> %v, i8 %index50 834 %v51 = extractelement <64 x i8> %v, i8 %index51 835 %v52 = extractelement <64 x i8> %v, i8 %index52 836 %v53 = extractelement <64 x i8> %v, i8 %index53 837 %v54 = extractelement <64 x i8> %v, i8 %index54 838 %v55 = extractelement <64 x i8> %v, i8 %index55 839 %v56 = extractelement <64 x i8> %v, i8 %index56 840 %v57 = extractelement <64 x i8> %v, i8 %index57 841 %v58 = extractelement <64 x i8> %v, i8 %index58 842 %v59 = extractelement <64 x i8> %v, i8 %index59 843 %v60 = extractelement <64 x i8> %v, i8 %index60 844 %v61 = extractelement <64 x i8> %v, i8 %index61 845 %v62 = extractelement <64 x i8> %v, i8 %index62 846 %v63 = extractelement <64 x i8> %v, i8 %index63 847 %ret0 = insertelement <64 x i8> undef, i8 %v0, i32 0 848 %ret1 = insertelement <64 x i8> %ret0, i8 %v1, i32 1 849 %ret2 = insertelement <64 x i8> %ret1, i8 %v2, i32 2 850 %ret3 = insertelement <64 x i8> %ret2, i8 %v3, i32 3 851 %ret4 = insertelement <64 x i8> %ret3, i8 %v4, i32 4 852 %ret5 = insertelement <64 x i8> %ret4, i8 %v5, i32 5 853 %ret6 = insertelement <64 x i8> %ret5, i8 %v6, i32 6 854 %ret7 = insertelement <64 x i8> %ret6, i8 %v7, i32 7 855 %ret8 = insertelement <64 x i8> %ret7, i8 %v8, i32 8 856 %ret9 = insertelement <64 x i8> %ret8, i8 %v9, i32 9 857 %ret10 = insertelement <64 x i8> %ret9, i8 %v10, i32 10 858 %ret11 = insertelement <64 x i8> %ret10, i8 %v11, i32 11 859 %ret12 = insertelement <64 x i8> %ret11, i8 %v12, i32 12 860 %ret13 = insertelement <64 x i8> %ret12, i8 %v13, i32 13 861 %ret14 = insertelement <64 x i8> %ret13, i8 %v14, i32 14 862 %ret15 = insertelement <64 x i8> %ret14, i8 %v15, i32 15 863 %ret16 = insertelement <64 x i8> %ret15, i8 %v16, i32 16 864 %ret17 = insertelement <64 x i8> %ret16, i8 %v17, i32 17 865 %ret18 = insertelement <64 x i8> %ret17, i8 %v18, i32 18 866 %ret19 = insertelement <64 x i8> %ret18, i8 %v19, i32 19 867 %ret20 = insertelement <64 x i8> %ret19, i8 %v20, i32 20 868 %ret21 = insertelement <64 x i8> %ret20, i8 %v21, i32 21 869 %ret22 = insertelement <64 x i8> %ret21, i8 %v22, i32 22 870 %ret23 = insertelement <64 x i8> %ret22, i8 %v23, i32 23 871 %ret24 = insertelement <64 x i8> %ret23, i8 %v24, i32 24 872 %ret25 = insertelement <64 x i8> %ret24, i8 %v25, i32 25 873 %ret26 = insertelement <64 x i8> %ret25, i8 %v26, i32 26 874 %ret27 = insertelement <64 x i8> %ret26, i8 %v27, i32 27 875 %ret28 = insertelement <64 x i8> %ret27, i8 %v28, i32 28 876 %ret29 = insertelement <64 x i8> %ret28, i8 %v29, i32 29 877 %ret30 = insertelement <64 x i8> %ret29, i8 %v30, i32 30 878 %ret31 = insertelement <64 x i8> %ret30, i8 %v31, i32 31 879 %ret32 = insertelement <64 x i8> %ret31, i8 %v32, i32 32 880 %ret33 = insertelement <64 x i8> %ret32, i8 %v33, i32 33 881 %ret34 = insertelement <64 x i8> %ret33, i8 %v34, i32 34 882 %ret35 = insertelement <64 x i8> %ret34, i8 %v35, i32 35 883 %ret36 = insertelement <64 x i8> %ret35, i8 %v36, i32 36 884 %ret37 = insertelement <64 x i8> %ret36, i8 %v37, i32 37 885 %ret38 = insertelement <64 x i8> %ret37, i8 %v38, i32 38 886 %ret39 = insertelement <64 x i8> %ret38, i8 %v39, i32 39 887 %ret40 = insertelement <64 x i8> %ret39, i8 %v40, i32 40 888 %ret41 = insertelement <64 x i8> %ret40, i8 %v41, i32 41 889 %ret42 = insertelement <64 x i8> %ret41, i8 %v42, i32 42 890 %ret43 = insertelement <64 x i8> %ret42, i8 %v43, i32 43 891 %ret44 = insertelement <64 x i8> %ret43, i8 %v44, i32 44 892 %ret45 = insertelement <64 x i8> %ret44, i8 %v45, i32 45 893 %ret46 = insertelement <64 x i8> %ret45, i8 %v46, i32 46 894 %ret47 = insertelement <64 x i8> %ret46, i8 %v47, i32 47 895 %ret48 = insertelement <64 x i8> %ret47, i8 %v48, i32 48 896 %ret49 = insertelement <64 x i8> %ret48, i8 %v49, i32 49 897 %ret50 = insertelement <64 x i8> %ret49, i8 %v50, i32 50 898 %ret51 = insertelement <64 x i8> %ret50, i8 %v51, i32 51 899 %ret52 = insertelement <64 x i8> %ret51, i8 %v52, i32 52 900 %ret53 = insertelement <64 x i8> %ret52, i8 %v53, i32 53 901 %ret54 = insertelement <64 x i8> %ret53, i8 %v54, i32 54 902 %ret55 = insertelement <64 x i8> %ret54, i8 %v55, i32 55 903 %ret56 = insertelement <64 x i8> %ret55, i8 %v56, i32 56 904 %ret57 = insertelement <64 x i8> %ret56, i8 %v57, i32 57 905 %ret58 = insertelement <64 x i8> %ret57, i8 %v58, i32 58 906 %ret59 = insertelement <64 x i8> %ret58, i8 %v59, i32 59 907 %ret60 = insertelement <64 x i8> %ret59, i8 %v60, i32 60 908 %ret61 = insertelement <64 x i8> %ret60, i8 %v61, i32 61 909 %ret62 = insertelement <64 x i8> %ret61, i8 %v62, i32 62 910 %ret63 = insertelement <64 x i8> %ret62, i8 %v63, i32 63 911 ret <64 x i8> %ret63 912} 913 914define <8 x double> @var_shuffle_v8f64(<8 x double> %v, <8 x i64> %indices) nounwind { 915; AVX512-LABEL: var_shuffle_v8f64: 916; AVX512: # %bb.0: 917; AVX512-NEXT: vpermpd %zmm0, %zmm1, %zmm0 918; AVX512-NEXT: retq 919 %index0 = extractelement <8 x i64> %indices, i32 0 920 %index1 = extractelement <8 x i64> %indices, i32 1 921 %index2 = extractelement <8 x i64> %indices, i32 2 922 %index3 = extractelement <8 x i64> %indices, i32 3 923 %index4 = extractelement <8 x i64> %indices, i32 4 924 %index5 = extractelement <8 x i64> %indices, i32 5 925 %index6 = extractelement <8 x i64> %indices, i32 6 926 %index7 = extractelement <8 x i64> %indices, i32 7 927 %v0 = extractelement <8 x double> %v, i64 %index0 928 %v1 = extractelement <8 x double> %v, i64 %index1 929 %v2 = extractelement <8 x double> %v, i64 %index2 930 %v3 = extractelement <8 x double> %v, i64 %index3 931 %v4 = extractelement <8 x double> %v, i64 %index4 932 %v5 = extractelement <8 x double> %v, i64 %index5 933 %v6 = extractelement <8 x double> %v, i64 %index6 934 %v7 = extractelement <8 x double> %v, i64 %index7 935 %ret0 = insertelement <8 x double> undef, double %v0, i32 0 936 %ret1 = insertelement <8 x double> %ret0, double %v1, i32 1 937 %ret2 = insertelement <8 x double> %ret1, double %v2, i32 2 938 %ret3 = insertelement <8 x double> %ret2, double %v3, i32 3 939 %ret4 = insertelement <8 x double> %ret3, double %v4, i32 4 940 %ret5 = insertelement <8 x double> %ret4, double %v5, i32 5 941 %ret6 = insertelement <8 x double> %ret5, double %v6, i32 6 942 %ret7 = insertelement <8 x double> %ret6, double %v7, i32 7 943 ret <8 x double> %ret7 944} 945 946define <16 x float> @var_shuffle_v16f32(<16 x float> %v, <16 x i32> %indices) nounwind { 947; AVX512-LABEL: var_shuffle_v16f32: 948; AVX512: # %bb.0: 949; AVX512-NEXT: vpermps %zmm0, %zmm1, %zmm0 950; AVX512-NEXT: retq 951 %index0 = extractelement <16 x i32> %indices, i32 0 952 %index1 = extractelement <16 x i32> %indices, i32 1 953 %index2 = extractelement <16 x i32> %indices, i32 2 954 %index3 = extractelement <16 x i32> %indices, i32 3 955 %index4 = extractelement <16 x i32> %indices, i32 4 956 %index5 = extractelement <16 x i32> %indices, i32 5 957 %index6 = extractelement <16 x i32> %indices, i32 6 958 %index7 = extractelement <16 x i32> %indices, i32 7 959 %index8 = extractelement <16 x i32> %indices, i32 8 960 %index9 = extractelement <16 x i32> %indices, i32 9 961 %index10 = extractelement <16 x i32> %indices, i32 10 962 %index11 = extractelement <16 x i32> %indices, i32 11 963 %index12 = extractelement <16 x i32> %indices, i32 12 964 %index13 = extractelement <16 x i32> %indices, i32 13 965 %index14 = extractelement <16 x i32> %indices, i32 14 966 %index15 = extractelement <16 x i32> %indices, i32 15 967 %v0 = extractelement <16 x float> %v, i32 %index0 968 %v1 = extractelement <16 x float> %v, i32 %index1 969 %v2 = extractelement <16 x float> %v, i32 %index2 970 %v3 = extractelement <16 x float> %v, i32 %index3 971 %v4 = extractelement <16 x float> %v, i32 %index4 972 %v5 = extractelement <16 x float> %v, i32 %index5 973 %v6 = extractelement <16 x float> %v, i32 %index6 974 %v7 = extractelement <16 x float> %v, i32 %index7 975 %v8 = extractelement <16 x float> %v, i32 %index8 976 %v9 = extractelement <16 x float> %v, i32 %index9 977 %v10 = extractelement <16 x float> %v, i32 %index10 978 %v11 = extractelement <16 x float> %v, i32 %index11 979 %v12 = extractelement <16 x float> %v, i32 %index12 980 %v13 = extractelement <16 x float> %v, i32 %index13 981 %v14 = extractelement <16 x float> %v, i32 %index14 982 %v15 = extractelement <16 x float> %v, i32 %index15 983 %ret0 = insertelement <16 x float> undef, float %v0, i32 0 984 %ret1 = insertelement <16 x float> %ret0, float %v1, i32 1 985 %ret2 = insertelement <16 x float> %ret1, float %v2, i32 2 986 %ret3 = insertelement <16 x float> %ret2, float %v3, i32 3 987 %ret4 = insertelement <16 x float> %ret3, float %v4, i32 4 988 %ret5 = insertelement <16 x float> %ret4, float %v5, i32 5 989 %ret6 = insertelement <16 x float> %ret5, float %v6, i32 6 990 %ret7 = insertelement <16 x float> %ret6, float %v7, i32 7 991 %ret8 = insertelement <16 x float> %ret7, float %v8, i32 8 992 %ret9 = insertelement <16 x float> %ret8, float %v9, i32 9 993 %ret10 = insertelement <16 x float> %ret9, float %v10, i32 10 994 %ret11 = insertelement <16 x float> %ret10, float %v11, i32 11 995 %ret12 = insertelement <16 x float> %ret11, float %v12, i32 12 996 %ret13 = insertelement <16 x float> %ret12, float %v13, i32 13 997 %ret14 = insertelement <16 x float> %ret13, float %v14, i32 14 998 %ret15 = insertelement <16 x float> %ret14, float %v15, i32 15 999 ret <16 x float> %ret15 1000} 1001