1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512DQVL 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512BWVL 12; 13; Just one 32-bit run to make sure we do reasonable things for i64 shifts. 14; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE 15 16; 17; Variable Shifts 18; 19 20define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { 21; SSE2-LABEL: var_shift_v2i64: 22; SSE2: # %bb.0: 23; SSE2-NEXT: movdqa %xmm0, %xmm2 24; SSE2-NEXT: psllq %xmm1, %xmm2 25; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 26; SSE2-NEXT: psllq %xmm1, %xmm0 27; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 28; SSE2-NEXT: retq 29; 30; SSE41-LABEL: var_shift_v2i64: 31; SSE41: # %bb.0: 32; SSE41-NEXT: movdqa %xmm0, %xmm2 33; SSE41-NEXT: psllq %xmm1, %xmm2 34; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 35; SSE41-NEXT: psllq %xmm1, %xmm0 36; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7] 37; SSE41-NEXT: retq 38; 39; AVX1-LABEL: var_shift_v2i64: 40; AVX1: # %bb.0: 41; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm2 42; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 43; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 44; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7] 45; AVX1-NEXT: retq 46; 47; AVX2-LABEL: var_shift_v2i64: 48; AVX2: # %bb.0: 49; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 50; AVX2-NEXT: retq 51; 52; XOPAVX1-LABEL: var_shift_v2i64: 53; XOPAVX1: # %bb.0: 54; XOPAVX1-NEXT: vpshlq %xmm1, %xmm0, %xmm0 55; XOPAVX1-NEXT: retq 56; 57; XOPAVX2-LABEL: var_shift_v2i64: 58; XOPAVX2: # %bb.0: 59; XOPAVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 60; XOPAVX2-NEXT: retq 61; 62; AVX512-LABEL: var_shift_v2i64: 63; AVX512: # %bb.0: 64; AVX512-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 65; AVX512-NEXT: retq 66; 67; AVX512VL-LABEL: var_shift_v2i64: 68; AVX512VL: # %bb.0: 69; AVX512VL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 70; AVX512VL-NEXT: retq 71; 72; X86-SSE-LABEL: var_shift_v2i64: 73; X86-SSE: # %bb.0: 74; X86-SSE-NEXT: movdqa %xmm0, %xmm2 75; X86-SSE-NEXT: psllq %xmm1, %xmm2 76; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 77; X86-SSE-NEXT: psllq %xmm1, %xmm0 78; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 79; X86-SSE-NEXT: retl 80 %shift = shl <2 x i64> %a, %b 81 ret <2 x i64> %shift 82} 83 84define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { 85; SSE2-LABEL: var_shift_v4i32: 86; SSE2: # %bb.0: 87; SSE2-NEXT: pslld $23, %xmm1 88; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 89; SSE2-NEXT: cvttps2dq %xmm1, %xmm1 90; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 91; SSE2-NEXT: pmuludq %xmm1, %xmm0 92; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 93; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 94; SSE2-NEXT: pmuludq %xmm2, %xmm1 95; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 96; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 97; SSE2-NEXT: retq 98; 99; SSE41-LABEL: var_shift_v4i32: 100; SSE41: # %bb.0: 101; SSE41-NEXT: pslld $23, %xmm1 102; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 103; SSE41-NEXT: cvttps2dq %xmm1, %xmm1 104; SSE41-NEXT: pmulld %xmm1, %xmm0 105; SSE41-NEXT: retq 106; 107; AVX1-LABEL: var_shift_v4i32: 108; AVX1: # %bb.0: 109; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 110; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 111; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 112; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 113; AVX1-NEXT: retq 114; 115; AVX2-LABEL: var_shift_v4i32: 116; AVX2: # %bb.0: 117; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 118; AVX2-NEXT: retq 119; 120; XOPAVX1-LABEL: var_shift_v4i32: 121; XOPAVX1: # %bb.0: 122; XOPAVX1-NEXT: vpshld %xmm1, %xmm0, %xmm0 123; XOPAVX1-NEXT: retq 124; 125; XOPAVX2-LABEL: var_shift_v4i32: 126; XOPAVX2: # %bb.0: 127; XOPAVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 128; XOPAVX2-NEXT: retq 129; 130; AVX512-LABEL: var_shift_v4i32: 131; AVX512: # %bb.0: 132; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 133; AVX512-NEXT: retq 134; 135; AVX512VL-LABEL: var_shift_v4i32: 136; AVX512VL: # %bb.0: 137; AVX512VL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 138; AVX512VL-NEXT: retq 139; 140; X86-SSE-LABEL: var_shift_v4i32: 141; X86-SSE: # %bb.0: 142; X86-SSE-NEXT: pslld $23, %xmm1 143; X86-SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 144; X86-SSE-NEXT: cvttps2dq %xmm1, %xmm1 145; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 146; X86-SSE-NEXT: pmuludq %xmm1, %xmm0 147; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 148; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 149; X86-SSE-NEXT: pmuludq %xmm2, %xmm1 150; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 151; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 152; X86-SSE-NEXT: retl 153 %shift = shl <4 x i32> %a, %b 154 ret <4 x i32> %shift 155} 156 157define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { 158; SSE2-LABEL: var_shift_v8i16: 159; SSE2: # %bb.0: 160; SSE2-NEXT: movdqa %xmm1, %xmm2 161; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7] 162; SSE2-NEXT: pslld $23, %xmm2 163; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216] 164; SSE2-NEXT: paddd %xmm3, %xmm2 165; SSE2-NEXT: cvttps2dq %xmm2, %xmm2 166; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7] 167; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7] 168; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 169; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 170; SSE2-NEXT: pslld $23, %xmm1 171; SSE2-NEXT: paddd %xmm3, %xmm1 172; SSE2-NEXT: cvttps2dq %xmm1, %xmm1 173; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 174; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7] 175; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 176; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 177; SSE2-NEXT: pmullw %xmm1, %xmm0 178; SSE2-NEXT: retq 179; 180; SSE41-LABEL: var_shift_v8i16: 181; SSE41: # %bb.0: 182; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 183; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 184; SSE41-NEXT: pslld $23, %xmm1 185; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216] 186; SSE41-NEXT: paddd %xmm3, %xmm1 187; SSE41-NEXT: cvttps2dq %xmm1, %xmm1 188; SSE41-NEXT: pslld $23, %xmm2 189; SSE41-NEXT: paddd %xmm3, %xmm2 190; SSE41-NEXT: cvttps2dq %xmm2, %xmm2 191; SSE41-NEXT: packusdw %xmm1, %xmm2 192; SSE41-NEXT: pmullw %xmm2, %xmm0 193; SSE41-NEXT: retq 194; 195; AVX1-LABEL: var_shift_v8i16: 196; AVX1: # %bb.0: 197; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4,4,5,5,6,6,7,7] 198; AVX1-NEXT: vpslld $23, %xmm2, %xmm2 199; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216] 200; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2 201; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 202; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 203; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 204; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1 205; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 206; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 207; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 208; AVX1-NEXT: retq 209; 210; AVX2-LABEL: var_shift_v8i16: 211; AVX2: # %bb.0: 212; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 213; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 214; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 215; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u] 216; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 217; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 218; AVX2-NEXT: vzeroupper 219; AVX2-NEXT: retq 220; 221; XOP-LABEL: var_shift_v8i16: 222; XOP: # %bb.0: 223; XOP-NEXT: vpshlw %xmm1, %xmm0, %xmm0 224; XOP-NEXT: retq 225; 226; AVX512DQ-LABEL: var_shift_v8i16: 227; AVX512DQ: # %bb.0: 228; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 229; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 230; AVX512DQ-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 231; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 232; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 233; AVX512DQ-NEXT: vzeroupper 234; AVX512DQ-NEXT: retq 235; 236; AVX512BW-LABEL: var_shift_v8i16: 237; AVX512BW: # %bb.0: 238; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 239; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 240; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 241; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 242; AVX512BW-NEXT: vzeroupper 243; AVX512BW-NEXT: retq 244; 245; AVX512DQVL-LABEL: var_shift_v8i16: 246; AVX512DQVL: # %bb.0: 247; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 248; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 249; AVX512DQVL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 250; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0 251; AVX512DQVL-NEXT: vzeroupper 252; AVX512DQVL-NEXT: retq 253; 254; AVX512BWVL-LABEL: var_shift_v8i16: 255; AVX512BWVL: # %bb.0: 256; AVX512BWVL-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 257; AVX512BWVL-NEXT: retq 258; 259; X86-SSE-LABEL: var_shift_v8i16: 260; X86-SSE: # %bb.0: 261; X86-SSE-NEXT: movdqa %xmm1, %xmm2 262; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7] 263; X86-SSE-NEXT: pslld $23, %xmm2 264; X86-SSE-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216] 265; X86-SSE-NEXT: paddd %xmm3, %xmm2 266; X86-SSE-NEXT: cvttps2dq %xmm2, %xmm2 267; X86-SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7] 268; X86-SSE-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7] 269; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 270; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 271; X86-SSE-NEXT: pslld $23, %xmm1 272; X86-SSE-NEXT: paddd %xmm3, %xmm1 273; X86-SSE-NEXT: cvttps2dq %xmm1, %xmm1 274; X86-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 275; X86-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7] 276; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 277; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 278; X86-SSE-NEXT: pmullw %xmm1, %xmm0 279; X86-SSE-NEXT: retl 280 %shift = shl <8 x i16> %a, %b 281 ret <8 x i16> %shift 282} 283 284define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { 285; SSE2-LABEL: var_shift_v16i8: 286; SSE2: # %bb.0: 287; SSE2-NEXT: psllw $5, %xmm1 288; SSE2-NEXT: pxor %xmm2, %xmm2 289; SSE2-NEXT: pxor %xmm3, %xmm3 290; SSE2-NEXT: pcmpgtb %xmm1, %xmm3 291; SSE2-NEXT: movdqa %xmm3, %xmm4 292; SSE2-NEXT: pandn %xmm0, %xmm4 293; SSE2-NEXT: psllw $4, %xmm0 294; SSE2-NEXT: pand %xmm3, %xmm0 295; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 296; SSE2-NEXT: por %xmm4, %xmm0 297; SSE2-NEXT: paddb %xmm1, %xmm1 298; SSE2-NEXT: pxor %xmm3, %xmm3 299; SSE2-NEXT: pcmpgtb %xmm1, %xmm3 300; SSE2-NEXT: movdqa %xmm3, %xmm4 301; SSE2-NEXT: pandn %xmm0, %xmm4 302; SSE2-NEXT: psllw $2, %xmm0 303; SSE2-NEXT: pand %xmm3, %xmm0 304; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 305; SSE2-NEXT: por %xmm4, %xmm0 306; SSE2-NEXT: paddb %xmm1, %xmm1 307; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 308; SSE2-NEXT: movdqa %xmm2, %xmm1 309; SSE2-NEXT: pandn %xmm0, %xmm1 310; SSE2-NEXT: paddb %xmm0, %xmm0 311; SSE2-NEXT: pand %xmm2, %xmm0 312; SSE2-NEXT: por %xmm1, %xmm0 313; SSE2-NEXT: retq 314; 315; SSE41-LABEL: var_shift_v16i8: 316; SSE41: # %bb.0: 317; SSE41-NEXT: movdqa %xmm0, %xmm2 318; SSE41-NEXT: psllw $5, %xmm1 319; SSE41-NEXT: movdqa %xmm0, %xmm3 320; SSE41-NEXT: psllw $4, %xmm3 321; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 322; SSE41-NEXT: movdqa %xmm1, %xmm0 323; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2 324; SSE41-NEXT: movdqa %xmm2, %xmm3 325; SSE41-NEXT: psllw $2, %xmm3 326; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 327; SSE41-NEXT: paddb %xmm1, %xmm1 328; SSE41-NEXT: movdqa %xmm1, %xmm0 329; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2 330; SSE41-NEXT: movdqa %xmm2, %xmm3 331; SSE41-NEXT: paddb %xmm2, %xmm3 332; SSE41-NEXT: paddb %xmm1, %xmm1 333; SSE41-NEXT: movdqa %xmm1, %xmm0 334; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2 335; SSE41-NEXT: movdqa %xmm2, %xmm0 336; SSE41-NEXT: retq 337; 338; AVX-LABEL: var_shift_v16i8: 339; AVX: # %bb.0: 340; AVX-NEXT: vpsllw $5, %xmm1, %xmm1 341; AVX-NEXT: vpsllw $4, %xmm0, %xmm2 342; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 343; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 344; AVX-NEXT: vpsllw $2, %xmm0, %xmm2 345; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 346; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1 347; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 348; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm2 349; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1 350; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 351; AVX-NEXT: retq 352; 353; XOP-LABEL: var_shift_v16i8: 354; XOP: # %bb.0: 355; XOP-NEXT: vpshlb %xmm1, %xmm0, %xmm0 356; XOP-NEXT: retq 357; 358; AVX512DQ-LABEL: var_shift_v16i8: 359; AVX512DQ: # %bb.0: 360; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero 361; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 362; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 363; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 364; AVX512DQ-NEXT: vzeroupper 365; AVX512DQ-NEXT: retq 366; 367; AVX512BW-LABEL: var_shift_v16i8: 368; AVX512BW: # %bb.0: 369; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 370; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 371; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 372; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 373; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 374; AVX512BW-NEXT: vzeroupper 375; AVX512BW-NEXT: retq 376; 377; AVX512DQVL-LABEL: var_shift_v16i8: 378; AVX512DQVL: # %bb.0: 379; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero 380; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 381; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 382; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0 383; AVX512DQVL-NEXT: vzeroupper 384; AVX512DQVL-NEXT: retq 385; 386; AVX512BWVL-LABEL: var_shift_v16i8: 387; AVX512BWVL: # %bb.0: 388; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 389; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 390; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 391; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 392; AVX512BWVL-NEXT: vzeroupper 393; AVX512BWVL-NEXT: retq 394; 395; X86-SSE-LABEL: var_shift_v16i8: 396; X86-SSE: # %bb.0: 397; X86-SSE-NEXT: psllw $5, %xmm1 398; X86-SSE-NEXT: pxor %xmm2, %xmm2 399; X86-SSE-NEXT: pxor %xmm3, %xmm3 400; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm3 401; X86-SSE-NEXT: movdqa %xmm3, %xmm4 402; X86-SSE-NEXT: pandn %xmm0, %xmm4 403; X86-SSE-NEXT: psllw $4, %xmm0 404; X86-SSE-NEXT: pand %xmm3, %xmm0 405; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 406; X86-SSE-NEXT: por %xmm4, %xmm0 407; X86-SSE-NEXT: paddb %xmm1, %xmm1 408; X86-SSE-NEXT: pxor %xmm3, %xmm3 409; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm3 410; X86-SSE-NEXT: movdqa %xmm3, %xmm4 411; X86-SSE-NEXT: pandn %xmm0, %xmm4 412; X86-SSE-NEXT: psllw $2, %xmm0 413; X86-SSE-NEXT: pand %xmm3, %xmm0 414; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 415; X86-SSE-NEXT: por %xmm4, %xmm0 416; X86-SSE-NEXT: paddb %xmm1, %xmm1 417; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm2 418; X86-SSE-NEXT: movdqa %xmm2, %xmm1 419; X86-SSE-NEXT: pandn %xmm0, %xmm1 420; X86-SSE-NEXT: paddb %xmm0, %xmm0 421; X86-SSE-NEXT: pand %xmm2, %xmm0 422; X86-SSE-NEXT: por %xmm1, %xmm0 423; X86-SSE-NEXT: retl 424 %shift = shl <16 x i8> %a, %b 425 ret <16 x i8> %shift 426} 427 428; 429; Uniform Variable Shifts 430; 431 432define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { 433; SSE-LABEL: splatvar_shift_v2i64: 434; SSE: # %bb.0: 435; SSE-NEXT: psllq %xmm1, %xmm0 436; SSE-NEXT: retq 437; 438; AVX-LABEL: splatvar_shift_v2i64: 439; AVX: # %bb.0: 440; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 441; AVX-NEXT: retq 442; 443; XOP-LABEL: splatvar_shift_v2i64: 444; XOP: # %bb.0: 445; XOP-NEXT: vpsllq %xmm1, %xmm0, %xmm0 446; XOP-NEXT: retq 447; 448; AVX512-LABEL: splatvar_shift_v2i64: 449; AVX512: # %bb.0: 450; AVX512-NEXT: vpsllq %xmm1, %xmm0, %xmm0 451; AVX512-NEXT: retq 452; 453; AVX512VL-LABEL: splatvar_shift_v2i64: 454; AVX512VL: # %bb.0: 455; AVX512VL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 456; AVX512VL-NEXT: retq 457; 458; X86-SSE-LABEL: splatvar_shift_v2i64: 459; X86-SSE: # %bb.0: 460; X86-SSE-NEXT: psllq %xmm1, %xmm0 461; X86-SSE-NEXT: retl 462 %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer 463 %shift = shl <2 x i64> %a, %splat 464 ret <2 x i64> %shift 465} 466 467define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { 468; SSE2-LABEL: splatvar_shift_v4i32: 469; SSE2: # %bb.0: 470; SSE2-NEXT: xorps %xmm2, %xmm2 471; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] 472; SSE2-NEXT: pslld %xmm2, %xmm0 473; SSE2-NEXT: retq 474; 475; SSE41-LABEL: splatvar_shift_v4i32: 476; SSE41: # %bb.0: 477; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 478; SSE41-NEXT: pslld %xmm1, %xmm0 479; SSE41-NEXT: retq 480; 481; AVX-LABEL: splatvar_shift_v4i32: 482; AVX: # %bb.0: 483; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 484; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0 485; AVX-NEXT: retq 486; 487; XOP-LABEL: splatvar_shift_v4i32: 488; XOP: # %bb.0: 489; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 490; XOP-NEXT: vpslld %xmm1, %xmm0, %xmm0 491; XOP-NEXT: retq 492; 493; AVX512-LABEL: splatvar_shift_v4i32: 494; AVX512: # %bb.0: 495; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 496; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0 497; AVX512-NEXT: retq 498; 499; AVX512VL-LABEL: splatvar_shift_v4i32: 500; AVX512VL: # %bb.0: 501; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 502; AVX512VL-NEXT: vpslld %xmm1, %xmm0, %xmm0 503; AVX512VL-NEXT: retq 504; 505; X86-SSE-LABEL: splatvar_shift_v4i32: 506; X86-SSE: # %bb.0: 507; X86-SSE-NEXT: xorps %xmm2, %xmm2 508; X86-SSE-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] 509; X86-SSE-NEXT: pslld %xmm2, %xmm0 510; X86-SSE-NEXT: retl 511 %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer 512 %shift = shl <4 x i32> %a, %splat 513 ret <4 x i32> %shift 514} 515 516define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { 517; SSE2-LABEL: splatvar_shift_v8i16: 518; SSE2: # %bb.0: 519; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 520; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 521; SSE2-NEXT: psllw %xmm1, %xmm0 522; SSE2-NEXT: retq 523; 524; SSE41-LABEL: splatvar_shift_v8i16: 525; SSE41: # %bb.0: 526; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 527; SSE41-NEXT: psllw %xmm1, %xmm0 528; SSE41-NEXT: retq 529; 530; AVX-LABEL: splatvar_shift_v8i16: 531; AVX: # %bb.0: 532; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 533; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 534; AVX-NEXT: retq 535; 536; XOP-LABEL: splatvar_shift_v8i16: 537; XOP: # %bb.0: 538; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 539; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0 540; XOP-NEXT: retq 541; 542; AVX512-LABEL: splatvar_shift_v8i16: 543; AVX512: # %bb.0: 544; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 545; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0 546; AVX512-NEXT: retq 547; 548; AVX512VL-LABEL: splatvar_shift_v8i16: 549; AVX512VL: # %bb.0: 550; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 551; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 552; AVX512VL-NEXT: retq 553; 554; X86-SSE-LABEL: splatvar_shift_v8i16: 555; X86-SSE: # %bb.0: 556; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 557; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 558; X86-SSE-NEXT: psllw %xmm1, %xmm0 559; X86-SSE-NEXT: retl 560 %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer 561 %shift = shl <8 x i16> %a, %splat 562 ret <8 x i16> %shift 563} 564 565define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { 566; SSE2-LABEL: splatvar_shift_v16i8: 567; SSE2: # %bb.0: 568; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0] 569; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 570; SSE2-NEXT: psllw %xmm1, %xmm0 571; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 572; SSE2-NEXT: psllw %xmm1, %xmm2 573; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 574; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7] 575; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 576; SSE2-NEXT: pand %xmm1, %xmm0 577; SSE2-NEXT: retq 578; 579; SSE41-LABEL: splatvar_shift_v16i8: 580; SSE41: # %bb.0: 581; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 582; SSE41-NEXT: psllw %xmm1, %xmm0 583; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 584; SSE41-NEXT: psllw %xmm1, %xmm2 585; SSE41-NEXT: pxor %xmm1, %xmm1 586; SSE41-NEXT: pshufb %xmm1, %xmm2 587; SSE41-NEXT: pand %xmm2, %xmm0 588; SSE41-NEXT: retq 589; 590; AVX1-LABEL: splatvar_shift_v16i8: 591; AVX1: # %bb.0: 592; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 593; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 594; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 595; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm1 596; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 597; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 598; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 599; AVX1-NEXT: retq 600; 601; AVX2-LABEL: splatvar_shift_v16i8: 602; AVX2: # %bb.0: 603; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 604; AVX2-NEXT: vpsllw %xmm1, %xmm0, %xmm0 605; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 606; AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1 607; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 608; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 609; AVX2-NEXT: retq 610; 611; XOPAVX1-LABEL: splatvar_shift_v16i8: 612; XOPAVX1: # %bb.0: 613; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 614; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 615; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0 616; XOPAVX1-NEXT: retq 617; 618; XOPAVX2-LABEL: splatvar_shift_v16i8: 619; XOPAVX2: # %bb.0: 620; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1 621; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0 622; XOPAVX2-NEXT: retq 623; 624; AVX512DQ-LABEL: splatvar_shift_v16i8: 625; AVX512DQ: # %bb.0: 626; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 627; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 628; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0 629; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 630; AVX512DQ-NEXT: vzeroupper 631; AVX512DQ-NEXT: retq 632; 633; AVX512BW-LABEL: splatvar_shift_v16i8: 634; AVX512BW: # %bb.0: 635; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 636; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 637; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 638; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 639; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 640; AVX512BW-NEXT: vzeroupper 641; AVX512BW-NEXT: retq 642; 643; AVX512DQVL-LABEL: splatvar_shift_v16i8: 644; AVX512DQVL: # %bb.0: 645; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 646; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 647; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0 648; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0 649; AVX512DQVL-NEXT: vzeroupper 650; AVX512DQVL-NEXT: retq 651; 652; AVX512BWVL-LABEL: splatvar_shift_v16i8: 653; AVX512BWVL: # %bb.0: 654; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 655; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 656; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 657; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 658; AVX512BWVL-NEXT: vzeroupper 659; AVX512BWVL-NEXT: retq 660; 661; X86-SSE-LABEL: splatvar_shift_v16i8: 662; X86-SSE: # %bb.0: 663; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0] 664; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 665; X86-SSE-NEXT: psllw %xmm1, %xmm0 666; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm2 667; X86-SSE-NEXT: psllw %xmm1, %xmm2 668; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 669; X86-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7] 670; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 671; X86-SSE-NEXT: pand %xmm1, %xmm0 672; X86-SSE-NEXT: retl 673 %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer 674 %shift = shl <16 x i8> %a, %splat 675 ret <16 x i8> %shift 676} 677 678; 679; Constant Shifts 680; 681 682define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind { 683; SSE2-LABEL: constant_shift_v2i64: 684; SSE2: # %bb.0: 685; SSE2-NEXT: movdqa %xmm0, %xmm1 686; SSE2-NEXT: psllq $1, %xmm1 687; SSE2-NEXT: psllq $7, %xmm0 688; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 689; SSE2-NEXT: retq 690; 691; SSE41-LABEL: constant_shift_v2i64: 692; SSE41: # %bb.0: 693; SSE41-NEXT: movdqa %xmm0, %xmm1 694; SSE41-NEXT: psllq $7, %xmm1 695; SSE41-NEXT: psllq $1, %xmm0 696; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 697; SSE41-NEXT: retq 698; 699; AVX1-LABEL: constant_shift_v2i64: 700; AVX1: # %bb.0: 701; AVX1-NEXT: vpsllq $7, %xmm0, %xmm1 702; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 703; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 704; AVX1-NEXT: retq 705; 706; AVX2-LABEL: constant_shift_v2i64: 707; AVX2: # %bb.0: 708; AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 709; AVX2-NEXT: retq 710; 711; XOPAVX1-LABEL: constant_shift_v2i64: 712; XOPAVX1: # %bb.0: 713; XOPAVX1-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 714; XOPAVX1-NEXT: retq 715; 716; XOPAVX2-LABEL: constant_shift_v2i64: 717; XOPAVX2: # %bb.0: 718; XOPAVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 719; XOPAVX2-NEXT: retq 720; 721; AVX512-LABEL: constant_shift_v2i64: 722; AVX512: # %bb.0: 723; AVX512-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 724; AVX512-NEXT: retq 725; 726; AVX512VL-LABEL: constant_shift_v2i64: 727; AVX512VL: # %bb.0: 728; AVX512VL-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 729; AVX512VL-NEXT: retq 730; 731; X86-SSE-LABEL: constant_shift_v2i64: 732; X86-SSE: # %bb.0: 733; X86-SSE-NEXT: movdqa %xmm0, %xmm1 734; X86-SSE-NEXT: psllq $1, %xmm1 735; X86-SSE-NEXT: psllq $7, %xmm0 736; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 737; X86-SSE-NEXT: retl 738 %shift = shl <2 x i64> %a, <i64 1, i64 7> 739 ret <2 x i64> %shift 740} 741 742define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind { 743; SSE2-LABEL: constant_shift_v4i32: 744; SSE2: # %bb.0: 745; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [16,32,64,128] 746; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 747; SSE2-NEXT: pmuludq %xmm1, %xmm0 748; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 749; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 750; SSE2-NEXT: pmuludq %xmm2, %xmm1 751; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 752; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 753; SSE2-NEXT: retq 754; 755; SSE41-LABEL: constant_shift_v4i32: 756; SSE41: # %bb.0: 757; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 758; SSE41-NEXT: retq 759; 760; AVX1-LABEL: constant_shift_v4i32: 761; AVX1: # %bb.0: 762; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 763; AVX1-NEXT: retq 764; 765; AVX2-LABEL: constant_shift_v4i32: 766; AVX2: # %bb.0: 767; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 768; AVX2-NEXT: retq 769; 770; XOPAVX1-LABEL: constant_shift_v4i32: 771; XOPAVX1: # %bb.0: 772; XOPAVX1-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 773; XOPAVX1-NEXT: retq 774; 775; XOPAVX2-LABEL: constant_shift_v4i32: 776; XOPAVX2: # %bb.0: 777; XOPAVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 778; XOPAVX2-NEXT: retq 779; 780; AVX512-LABEL: constant_shift_v4i32: 781; AVX512: # %bb.0: 782; AVX512-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 783; AVX512-NEXT: retq 784; 785; AVX512VL-LABEL: constant_shift_v4i32: 786; AVX512VL: # %bb.0: 787; AVX512VL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 788; AVX512VL-NEXT: retq 789; 790; X86-SSE-LABEL: constant_shift_v4i32: 791; X86-SSE: # %bb.0: 792; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [16,32,64,128] 793; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 794; X86-SSE-NEXT: pmuludq %xmm1, %xmm0 795; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 796; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 797; X86-SSE-NEXT: pmuludq %xmm2, %xmm1 798; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 799; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 800; X86-SSE-NEXT: retl 801 %shift = shl <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7> 802 ret <4 x i32> %shift 803} 804 805define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { 806; SSE-LABEL: constant_shift_v8i16: 807; SSE: # %bb.0: 808; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 809; SSE-NEXT: retq 810; 811; AVX-LABEL: constant_shift_v8i16: 812; AVX: # %bb.0: 813; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 814; AVX-NEXT: retq 815; 816; XOP-LABEL: constant_shift_v8i16: 817; XOP: # %bb.0: 818; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 819; XOP-NEXT: retq 820; 821; AVX512DQ-LABEL: constant_shift_v8i16: 822; AVX512DQ: # %bb.0: 823; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 824; AVX512DQ-NEXT: retq 825; 826; AVX512BW-LABEL: constant_shift_v8i16: 827; AVX512BW: # %bb.0: 828; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 829; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7] 830; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 831; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 832; AVX512BW-NEXT: vzeroupper 833; AVX512BW-NEXT: retq 834; 835; AVX512DQVL-LABEL: constant_shift_v8i16: 836; AVX512DQVL: # %bb.0: 837; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 838; AVX512DQVL-NEXT: retq 839; 840; AVX512BWVL-LABEL: constant_shift_v8i16: 841; AVX512BWVL: # %bb.0: 842; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 843; AVX512BWVL-NEXT: retq 844; 845; X86-SSE-LABEL: constant_shift_v8i16: 846; X86-SSE: # %bb.0: 847; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 848; X86-SSE-NEXT: retl 849 %shift = shl <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> 850 ret <8 x i16> %shift 851} 852 853define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { 854; SSE2-LABEL: constant_shift_v16i8: 855; SSE2: # %bb.0: 856; SSE2-NEXT: movdqa %xmm0, %xmm1 857; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 858; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 859; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 860; SSE2-NEXT: pand %xmm2, %xmm1 861; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 862; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 863; SSE2-NEXT: pand %xmm2, %xmm0 864; SSE2-NEXT: packuswb %xmm1, %xmm0 865; SSE2-NEXT: retq 866; 867; SSE41-LABEL: constant_shift_v16i8: 868; SSE41: # %bb.0: 869; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 870; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 871; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 872; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 873; SSE41-NEXT: pand %xmm2, %xmm0 874; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 875; SSE41-NEXT: pand %xmm2, %xmm1 876; SSE41-NEXT: packuswb %xmm0, %xmm1 877; SSE41-NEXT: movdqa %xmm1, %xmm0 878; SSE41-NEXT: retq 879; 880; AVX1-LABEL: constant_shift_v16i8: 881; AVX1: # %bb.0: 882; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 883; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 884; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 885; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 886; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 887; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 888; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 889; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 890; AVX1-NEXT: retq 891; 892; AVX2-LABEL: constant_shift_v16i8: 893; AVX2: # %bb.0: 894; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 895; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 896; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 897; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 898; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 899; AVX2-NEXT: vzeroupper 900; AVX2-NEXT: retq 901; 902; XOP-LABEL: constant_shift_v16i8: 903; XOP: # %bb.0: 904; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 905; XOP-NEXT: retq 906; 907; AVX512DQ-LABEL: constant_shift_v16i8: 908; AVX512DQ: # %bb.0: 909; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 910; AVX512DQ-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 911; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 912; AVX512DQ-NEXT: vzeroupper 913; AVX512DQ-NEXT: retq 914; 915; AVX512BW-LABEL: constant_shift_v16i8: 916; AVX512BW: # %bb.0: 917; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] 918; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 919; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 920; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 921; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 922; AVX512BW-NEXT: vzeroupper 923; AVX512BW-NEXT: retq 924; 925; AVX512DQVL-LABEL: constant_shift_v16i8: 926; AVX512DQVL: # %bb.0: 927; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 928; AVX512DQVL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 929; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0 930; AVX512DQVL-NEXT: vzeroupper 931; AVX512DQVL-NEXT: retq 932; 933; AVX512BWVL-LABEL: constant_shift_v16i8: 934; AVX512BWVL: # %bb.0: 935; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 936; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 937; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 938; AVX512BWVL-NEXT: vzeroupper 939; AVX512BWVL-NEXT: retq 940; 941; X86-SSE-LABEL: constant_shift_v16i8: 942; X86-SSE: # %bb.0: 943; X86-SSE-NEXT: movdqa %xmm0, %xmm1 944; X86-SSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 945; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 946; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 947; X86-SSE-NEXT: pand %xmm2, %xmm1 948; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 949; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 950; X86-SSE-NEXT: pand %xmm2, %xmm0 951; X86-SSE-NEXT: packuswb %xmm1, %xmm0 952; X86-SSE-NEXT: retl 953 %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0> 954 ret <16 x i8> %shift 955} 956 957; 958; Uniform Constant Shifts 959; 960 961define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) nounwind { 962; SSE-LABEL: splatconstant_shift_v2i64: 963; SSE: # %bb.0: 964; SSE-NEXT: psllq $7, %xmm0 965; SSE-NEXT: retq 966; 967; AVX-LABEL: splatconstant_shift_v2i64: 968; AVX: # %bb.0: 969; AVX-NEXT: vpsllq $7, %xmm0, %xmm0 970; AVX-NEXT: retq 971; 972; XOP-LABEL: splatconstant_shift_v2i64: 973; XOP: # %bb.0: 974; XOP-NEXT: vpsllq $7, %xmm0, %xmm0 975; XOP-NEXT: retq 976; 977; AVX512-LABEL: splatconstant_shift_v2i64: 978; AVX512: # %bb.0: 979; AVX512-NEXT: vpsllq $7, %xmm0, %xmm0 980; AVX512-NEXT: retq 981; 982; AVX512VL-LABEL: splatconstant_shift_v2i64: 983; AVX512VL: # %bb.0: 984; AVX512VL-NEXT: vpsllq $7, %xmm0, %xmm0 985; AVX512VL-NEXT: retq 986; 987; X86-SSE-LABEL: splatconstant_shift_v2i64: 988; X86-SSE: # %bb.0: 989; X86-SSE-NEXT: psllq $7, %xmm0 990; X86-SSE-NEXT: retl 991 %shift = shl <2 x i64> %a, <i64 7, i64 7> 992 ret <2 x i64> %shift 993} 994 995define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) nounwind { 996; SSE-LABEL: splatconstant_shift_v4i32: 997; SSE: # %bb.0: 998; SSE-NEXT: pslld $5, %xmm0 999; SSE-NEXT: retq 1000; 1001; AVX-LABEL: splatconstant_shift_v4i32: 1002; AVX: # %bb.0: 1003; AVX-NEXT: vpslld $5, %xmm0, %xmm0 1004; AVX-NEXT: retq 1005; 1006; XOP-LABEL: splatconstant_shift_v4i32: 1007; XOP: # %bb.0: 1008; XOP-NEXT: vpslld $5, %xmm0, %xmm0 1009; XOP-NEXT: retq 1010; 1011; AVX512-LABEL: splatconstant_shift_v4i32: 1012; AVX512: # %bb.0: 1013; AVX512-NEXT: vpslld $5, %xmm0, %xmm0 1014; AVX512-NEXT: retq 1015; 1016; AVX512VL-LABEL: splatconstant_shift_v4i32: 1017; AVX512VL: # %bb.0: 1018; AVX512VL-NEXT: vpslld $5, %xmm0, %xmm0 1019; AVX512VL-NEXT: retq 1020; 1021; X86-SSE-LABEL: splatconstant_shift_v4i32: 1022; X86-SSE: # %bb.0: 1023; X86-SSE-NEXT: pslld $5, %xmm0 1024; X86-SSE-NEXT: retl 1025 %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5> 1026 ret <4 x i32> %shift 1027} 1028 1029define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) nounwind { 1030; SSE-LABEL: splatconstant_shift_v8i16: 1031; SSE: # %bb.0: 1032; SSE-NEXT: psllw $3, %xmm0 1033; SSE-NEXT: retq 1034; 1035; AVX-LABEL: splatconstant_shift_v8i16: 1036; AVX: # %bb.0: 1037; AVX-NEXT: vpsllw $3, %xmm0, %xmm0 1038; AVX-NEXT: retq 1039; 1040; XOP-LABEL: splatconstant_shift_v8i16: 1041; XOP: # %bb.0: 1042; XOP-NEXT: vpsllw $3, %xmm0, %xmm0 1043; XOP-NEXT: retq 1044; 1045; AVX512-LABEL: splatconstant_shift_v8i16: 1046; AVX512: # %bb.0: 1047; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0 1048; AVX512-NEXT: retq 1049; 1050; AVX512VL-LABEL: splatconstant_shift_v8i16: 1051; AVX512VL: # %bb.0: 1052; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0 1053; AVX512VL-NEXT: retq 1054; 1055; X86-SSE-LABEL: splatconstant_shift_v8i16: 1056; X86-SSE: # %bb.0: 1057; X86-SSE-NEXT: psllw $3, %xmm0 1058; X86-SSE-NEXT: retl 1059 %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 1060 ret <8 x i16> %shift 1061} 1062 1063define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind { 1064; SSE-LABEL: splatconstant_shift_v16i8: 1065; SSE: # %bb.0: 1066; SSE-NEXT: psllw $3, %xmm0 1067; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1068; SSE-NEXT: retq 1069; 1070; AVX-LABEL: splatconstant_shift_v16i8: 1071; AVX: # %bb.0: 1072; AVX-NEXT: vpsllw $3, %xmm0, %xmm0 1073; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1074; AVX-NEXT: retq 1075; 1076; XOP-LABEL: splatconstant_shift_v16i8: 1077; XOP: # %bb.0: 1078; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1079; XOP-NEXT: retq 1080; 1081; AVX512-LABEL: splatconstant_shift_v16i8: 1082; AVX512: # %bb.0: 1083; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0 1084; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1085; AVX512-NEXT: retq 1086; 1087; AVX512VL-LABEL: splatconstant_shift_v16i8: 1088; AVX512VL: # %bb.0: 1089; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0 1090; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1091; AVX512VL-NEXT: retq 1092; 1093; X86-SSE-LABEL: splatconstant_shift_v16i8: 1094; X86-SSE: # %bb.0: 1095; X86-SSE-NEXT: psllw $3, %xmm0 1096; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 1097; X86-SSE-NEXT: retl 1098 %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 1099 ret <16 x i8> %shift 1100} 1101