1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=ALL,AVX512DQ 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=ALL,AVX512BW 4 5; 6; Variable Shifts 7; 8 9define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind { 10; ALL-LABEL: var_shift_v8i64: 11; ALL: # %bb.0: 12; ALL-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 13; ALL-NEXT: retq 14 %shift = shl <8 x i64> %a, %b 15 ret <8 x i64> %shift 16} 17 18define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { 19; ALL-LABEL: var_shift_v16i32: 20; ALL: # %bb.0: 21; ALL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 22; ALL-NEXT: retq 23 %shift = shl <16 x i32> %a, %b 24 ret <16 x i32> %shift 25} 26 27define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { 28; AVX512DQ-LABEL: var_shift_v32i16: 29; AVX512DQ: # %bb.0: 30; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 31; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 32; AVX512DQ-NEXT: vpsllvd %zmm2, %zmm3, %zmm2 33; AVX512DQ-NEXT: vpmovdw %zmm2, %ymm2 34; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 35; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 36; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 37; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 38; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 39; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 40; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 41; AVX512DQ-NEXT: retq 42; 43; AVX512BW-LABEL: var_shift_v32i16: 44; AVX512BW: # %bb.0: 45; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 46; AVX512BW-NEXT: retq 47 %shift = shl <32 x i16> %a, %b 48 ret <32 x i16> %shift 49} 50 51define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { 52; AVX512DQ-LABEL: var_shift_v64i8: 53; AVX512DQ: # %bb.0: 54; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 55; AVX512DQ-NEXT: vpsllw $4, %ymm2, %ymm3 56; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] 57; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3 58; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm5 59; AVX512DQ-NEXT: vpsllw $5, %ymm5, %ymm5 60; AVX512DQ-NEXT: vpblendvb %ymm5, %ymm3, %ymm2, %ymm2 61; AVX512DQ-NEXT: vpsllw $2, %ymm2, %ymm3 62; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] 63; AVX512DQ-NEXT: vpand %ymm6, %ymm3, %ymm3 64; AVX512DQ-NEXT: vpaddb %ymm5, %ymm5, %ymm5 65; AVX512DQ-NEXT: vpblendvb %ymm5, %ymm3, %ymm2, %ymm2 66; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm3 67; AVX512DQ-NEXT: vpaddb %ymm5, %ymm5, %ymm5 68; AVX512DQ-NEXT: vpblendvb %ymm5, %ymm3, %ymm2, %ymm2 69; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm3 70; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3 71; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1 72; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 73; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm3 74; AVX512DQ-NEXT: vpand %ymm6, %ymm3, %ymm3 75; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 76; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 77; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm3 78; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 79; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 80; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 81; AVX512DQ-NEXT: retq 82; 83; AVX512BW-LABEL: var_shift_v64i8: 84; AVX512BW: # %bb.0: 85; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2 86; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 87; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1 88; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 89; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} 90; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2 91; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 92; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 93; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 94; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} 95; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 96; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 97; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1} 98; AVX512BW-NEXT: retq 99 %shift = shl <64 x i8> %a, %b 100 ret <64 x i8> %shift 101} 102 103; 104; Uniform Variable Shifts 105; 106 107define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind { 108; ALL-LABEL: splatvar_shift_v8i64: 109; ALL: # %bb.0: 110; ALL-NEXT: vpsllq %xmm1, %zmm0, %zmm0 111; ALL-NEXT: retq 112 %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 113 %shift = shl <8 x i64> %a, %splat 114 ret <8 x i64> %shift 115} 116 117define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { 118; ALL-LABEL: splatvar_shift_v16i32: 119; ALL: # %bb.0: 120; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 121; ALL-NEXT: vpslld %xmm1, %zmm0, %zmm0 122; ALL-NEXT: retq 123 %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer 124 %shift = shl <16 x i32> %a, %splat 125 ret <16 x i32> %shift 126} 127 128define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { 129; AVX512DQ-LABEL: splatvar_shift_v32i16: 130; AVX512DQ: # %bb.0: 131; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 132; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 133; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2 134; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0 135; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 136; AVX512DQ-NEXT: retq 137; 138; AVX512BW-LABEL: splatvar_shift_v32i16: 139; AVX512BW: # %bb.0: 140; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 141; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 142; AVX512BW-NEXT: retq 143 %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer 144 %shift = shl <32 x i16> %a, %splat 145 ret <32 x i16> %shift 146} 147 148define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { 149; AVX512DQ-LABEL: splatvar_shift_v64i8: 150; AVX512DQ: # %bb.0: 151; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 152; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 153; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2 154; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0 155; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 156; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 157; AVX512DQ-NEXT: vpsllw %xmm1, %xmm2, %xmm1 158; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1 159; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1 160; AVX512DQ-NEXT: vpandq %zmm1, %zmm0, %zmm0 161; AVX512DQ-NEXT: retq 162; 163; AVX512BW-LABEL: splatvar_shift_v64i8: 164; AVX512BW: # %bb.0: 165; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 166; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 167; AVX512BW-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 168; AVX512BW-NEXT: vpsllw %xmm1, %xmm2, %xmm1 169; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 170; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 171; AVX512BW-NEXT: retq 172 %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer 173 %shift = shl <64 x i8> %a, %splat 174 ret <64 x i8> %shift 175} 176 177; 178; Constant Shifts 179; 180 181define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind { 182; ALL-LABEL: constant_shift_v8i64: 183; ALL: # %bb.0: 184; ALL-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 185; ALL-NEXT: retq 186 %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62> 187 ret <8 x i64> %shift 188} 189 190define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind { 191; ALL-LABEL: constant_shift_v16i32: 192; ALL: # %bb.0: 193; ALL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 194; ALL-NEXT: retq 195 %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7> 196 ret <16 x i32> %shift 197} 198 199define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind { 200; AVX512DQ-LABEL: constant_shift_v32i16: 201; AVX512DQ: # %bb.0: 202; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1 203; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 204; AVX512DQ-NEXT: vpmullw %ymm2, %ymm1, %ymm1 205; AVX512DQ-NEXT: vpmullw %ymm2, %ymm0, %ymm0 206; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 207; AVX512DQ-NEXT: retq 208; 209; AVX512BW-LABEL: constant_shift_v32i16: 210; AVX512BW: # %bb.0: 211; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 212; AVX512BW-NEXT: retq 213 %shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 214 ret <32 x i16> %shift 215} 216 217define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind { 218; AVX512DQ-LABEL: constant_shift_v64i8: 219; AVX512DQ: # %bb.0: 220; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1 221; AVX512DQ-NEXT: vpsllw $4, %ymm1, %ymm2 222; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] 223; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2 224; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] 225; AVX512DQ-NEXT: # ymm4 = mem[0,1,0,1] 226; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm1 227; AVX512DQ-NEXT: vpsllw $2, %ymm1, %ymm2 228; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] 229; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2 230; AVX512DQ-NEXT: vpaddb %ymm4, %ymm4, %ymm6 231; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm1 232; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm2 233; AVX512DQ-NEXT: vpaddb %ymm6, %ymm6, %ymm7 234; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm2, %ymm1, %ymm1 235; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2 236; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2 237; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0 238; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2 239; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2 240; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm0, %ymm0 241; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm2 242; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm2, %ymm0, %ymm0 243; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 244; AVX512DQ-NEXT: retq 245; 246; AVX512BW-LABEL: constant_shift_v64i8: 247; AVX512BW: # %bb.0: 248; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] 249; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 250; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 251; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2 252; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 253; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} 254; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2 255; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 256; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 257; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 258; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1} 259; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1 260; AVX512BW-NEXT: vpmovb2m %zmm1, %k1 261; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1} 262; AVX512BW-NEXT: retq 263 %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0> 264 ret <64 x i8> %shift 265} 266 267; 268; Uniform Constant Shifts 269; 270 271define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind { 272; ALL-LABEL: splatconstant_shift_v8i64: 273; ALL: # %bb.0: 274; ALL-NEXT: vpsllq $7, %zmm0, %zmm0 275; ALL-NEXT: retq 276 %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7> 277 ret <8 x i64> %shift 278} 279 280define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind { 281; ALL-LABEL: splatconstant_shift_v16i32: 282; ALL: # %bb.0: 283; ALL-NEXT: vpslld $5, %zmm0, %zmm0 284; ALL-NEXT: retq 285 %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 286 ret <16 x i32> %shift 287} 288 289define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind { 290; AVX512DQ-LABEL: splatconstant_shift_v32i16: 291; AVX512DQ: # %bb.0: 292; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm1 293; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 294; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm0 295; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 296; AVX512DQ-NEXT: retq 297; 298; AVX512BW-LABEL: splatconstant_shift_v32i16: 299; AVX512BW: # %bb.0: 300; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm0 301; AVX512BW-NEXT: retq 302 %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 303 ret <32 x i16> %shift 304} 305 306define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind { 307; AVX512DQ-LABEL: splatconstant_shift_v64i8: 308; AVX512DQ: # %bb.0: 309; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm1 310; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 311; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm0 312; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 313; AVX512DQ-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 314; AVX512DQ-NEXT: retq 315; 316; AVX512BW-LABEL: splatconstant_shift_v64i8: 317; AVX512BW: # %bb.0: 318; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm0 319; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 320; AVX512BW-NEXT: retq 321 %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 322 ret <64 x i8> %shift 323} 324