1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-SLOW 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-FAST 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=AVX512 8 9; 10; 128-bit vectors 11; 12 13define <2 x i64> @ext_i2_2i64(i2 %a0) { 14; SSE2-SSSE3-LABEL: ext_i2_2i64: 15; SSE2-SSSE3: # %bb.0: 16; SSE2-SSSE3-NEXT: movd %edi, %xmm0 17; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1] 18; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2] 19; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 20; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 21; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 22; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 23; SSE2-SSSE3-NEXT: retq 24; 25; AVX1-LABEL: ext_i2_2i64: 26; AVX1: # %bb.0: 27; AVX1-NEXT: vmovd %edi, %xmm0 28; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 29; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2] 30; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 31; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 32; AVX1-NEXT: retq 33; 34; AVX2-LABEL: ext_i2_2i64: 35; AVX2: # %bb.0: 36; AVX2-NEXT: vmovd %edi, %xmm0 37; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 38; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2] 39; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 40; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 41; AVX2-NEXT: retq 42; 43; AVX512-LABEL: ext_i2_2i64: 44; AVX512: # %bb.0: 45; AVX512-NEXT: kmovd %edi, %k1 46; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 47; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 48; AVX512-NEXT: retq 49 %1 = bitcast i2 %a0 to <2 x i1> 50 %2 = sext <2 x i1> %1 to <2 x i64> 51 ret <2 x i64> %2 52} 53 54define <4 x i32> @ext_i4_4i32(i4 %a0) { 55; SSE2-SSSE3-LABEL: ext_i4_4i32: 56; SSE2-SSSE3: # %bb.0: 57; SSE2-SSSE3-NEXT: movd %edi, %xmm0 58; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 59; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8] 60; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 61; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 62; SSE2-SSSE3-NEXT: retq 63; 64; AVX1-LABEL: ext_i4_4i32: 65; AVX1: # %bb.0: 66; AVX1-NEXT: vmovd %edi, %xmm0 67; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 68; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8] 69; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 70; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 71; AVX1-NEXT: retq 72; 73; AVX2-LABEL: ext_i4_4i32: 74; AVX2: # %bb.0: 75; AVX2-NEXT: vmovd %edi, %xmm0 76; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 77; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8] 78; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 79; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 80; AVX2-NEXT: retq 81; 82; AVX512-LABEL: ext_i4_4i32: 83; AVX512: # %bb.0: 84; AVX512-NEXT: kmovd %edi, %k1 85; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 86; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 87; AVX512-NEXT: retq 88 %1 = bitcast i4 %a0 to <4 x i1> 89 %2 = sext <4 x i1> %1 to <4 x i32> 90 ret <4 x i32> %2 91} 92 93define <8 x i16> @ext_i8_8i16(i8 %a0) { 94; SSE2-SSSE3-LABEL: ext_i8_8i16: 95; SSE2-SSSE3: # %bb.0: 96; SSE2-SSSE3-NEXT: movd %edi, %xmm0 97; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 98; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 99; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 100; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 101; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0 102; SSE2-SSSE3-NEXT: retq 103; 104; AVX1-LABEL: ext_i8_8i16: 105; AVX1: # %bb.0: 106; AVX1-NEXT: vmovd %edi, %xmm0 107; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 108; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 109; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 110; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 111; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 112; AVX1-NEXT: retq 113; 114; AVX2-LABEL: ext_i8_8i16: 115; AVX2: # %bb.0: 116; AVX2-NEXT: vmovd %edi, %xmm0 117; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 118; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 119; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 120; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 121; AVX2-NEXT: retq 122; 123; AVX512-LABEL: ext_i8_8i16: 124; AVX512: # %bb.0: 125; AVX512-NEXT: kmovd %edi, %k0 126; AVX512-NEXT: vpmovm2w %k0, %xmm0 127; AVX512-NEXT: retq 128 %1 = bitcast i8 %a0 to <8 x i1> 129 %2 = sext <8 x i1> %1 to <8 x i16> 130 ret <8 x i16> %2 131} 132 133define <16 x i8> @ext_i16_16i8(i16 %a0) { 134; SSE2-LABEL: ext_i16_16i8: 135; SSE2: # %bb.0: 136; SSE2-NEXT: movd %edi, %xmm0 137; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 138; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7] 139; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 140; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 141; SSE2-NEXT: pand %xmm1, %xmm0 142; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 143; SSE2-NEXT: retq 144; 145; SSSE3-LABEL: ext_i16_16i8: 146; SSSE3: # %bb.0: 147; SSSE3-NEXT: movd %edi, %xmm0 148; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 149; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 150; SSSE3-NEXT: pand %xmm1, %xmm0 151; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0 152; SSSE3-NEXT: retq 153; 154; AVX1-LABEL: ext_i16_16i8: 155; AVX1: # %bb.0: 156; AVX1-NEXT: vmovd %edi, %xmm0 157; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 158; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] 159; AVX1-NEXT: # xmm1 = mem[0,0] 160; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 161; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 162; AVX1-NEXT: retq 163; 164; AVX2-LABEL: ext_i16_16i8: 165; AVX2: # %bb.0: 166; AVX2-NEXT: vmovd %edi, %xmm0 167; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 168; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] 169; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 170; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 171; AVX2-NEXT: retq 172; 173; AVX512-LABEL: ext_i16_16i8: 174; AVX512: # %bb.0: 175; AVX512-NEXT: kmovd %edi, %k0 176; AVX512-NEXT: vpmovm2b %k0, %xmm0 177; AVX512-NEXT: retq 178 %1 = bitcast i16 %a0 to <16 x i1> 179 %2 = sext <16 x i1> %1 to <16 x i8> 180 ret <16 x i8> %2 181} 182 183; 184; 256-bit vectors 185; 186 187define <4 x i64> @ext_i4_4i64(i4 %a0) { 188; SSE2-SSSE3-LABEL: ext_i4_4i64: 189; SSE2-SSSE3: # %bb.0: 190; SSE2-SSSE3-NEXT: movd %edi, %xmm0 191; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1] 192; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2] 193; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1 194; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 195; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 196; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 197; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 198; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8] 199; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2 200; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2 201; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2] 202; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 203; SSE2-SSSE3-NEXT: retq 204; 205; AVX1-LABEL: ext_i4_4i64: 206; AVX1: # %bb.0: 207; AVX1-NEXT: vmovd %edi, %xmm0 208; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 209; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 210; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 211; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 212; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 213; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 214; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 215; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1 216; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 217; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 218; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 219; AVX1-NEXT: retq 220; 221; AVX2-LABEL: ext_i4_4i64: 222; AVX2: # %bb.0: 223; AVX2-NEXT: # kill: def $edi killed $edi def $rdi 224; AVX2-NEXT: vmovq %rdi, %xmm0 225; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0 226; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8] 227; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 228; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 229; AVX2-NEXT: retq 230; 231; AVX512-LABEL: ext_i4_4i64: 232; AVX512: # %bb.0: 233; AVX512-NEXT: kmovd %edi, %k1 234; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 235; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} 236; AVX512-NEXT: retq 237 %1 = bitcast i4 %a0 to <4 x i1> 238 %2 = sext <4 x i1> %1 to <4 x i64> 239 ret <4 x i64> %2 240} 241 242define <8 x i32> @ext_i8_8i32(i8 %a0) { 243; SSE2-SSSE3-LABEL: ext_i8_8i32: 244; SSE2-SSSE3: # %bb.0: 245; SSE2-SSSE3-NEXT: movd %edi, %xmm0 246; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 247; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8] 248; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 249; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 250; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0 251; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128] 252; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 253; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1 254; SSE2-SSSE3-NEXT: retq 255; 256; AVX1-LABEL: ext_i8_8i32: 257; AVX1: # %bb.0: 258; AVX1-NEXT: vmovd %edi, %xmm0 259; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 260; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 261; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 262; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 263; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 264; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1 265; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 266; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1 267; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 268; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 269; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 270; AVX1-NEXT: retq 271; 272; AVX2-LABEL: ext_i8_8i32: 273; AVX2: # %bb.0: 274; AVX2-NEXT: vmovd %edi, %xmm0 275; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 276; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128] 277; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 278; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 279; AVX2-NEXT: retq 280; 281; AVX512-LABEL: ext_i8_8i32: 282; AVX512: # %bb.0: 283; AVX512-NEXT: kmovd %edi, %k1 284; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 285; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 286; AVX512-NEXT: retq 287 %1 = bitcast i8 %a0 to <8 x i1> 288 %2 = sext <8 x i1> %1 to <8 x i32> 289 ret <8 x i32> %2 290} 291 292define <16 x i16> @ext_i16_16i16(i16 %a0) { 293; SSE2-SSSE3-LABEL: ext_i16_16i16: 294; SSE2-SSSE3: # %bb.0: 295; SSE2-SSSE3-NEXT: movd %edi, %xmm0 296; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 297; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 298; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128] 299; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 300; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 301; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0 302; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768] 303; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 304; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1 305; SSE2-SSSE3-NEXT: retq 306; 307; AVX1-LABEL: ext_i16_16i16: 308; AVX1: # %bb.0: 309; AVX1-NEXT: vmovd %edi, %xmm0 310; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 311; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 312; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 313; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 314; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 315; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 316; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 317; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 318; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1 319; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 320; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 321; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 322; AVX1-NEXT: retq 323; 324; AVX2-LABEL: ext_i16_16i16: 325; AVX2: # %bb.0: 326; AVX2-NEXT: vmovd %edi, %xmm0 327; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 328; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 329; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 330; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 331; AVX2-NEXT: retq 332; 333; AVX512-LABEL: ext_i16_16i16: 334; AVX512: # %bb.0: 335; AVX512-NEXT: kmovd %edi, %k0 336; AVX512-NEXT: vpmovm2w %k0, %ymm0 337; AVX512-NEXT: retq 338 %1 = bitcast i16 %a0 to <16 x i1> 339 %2 = sext <16 x i1> %1 to <16 x i16> 340 ret <16 x i16> %2 341} 342 343define <32 x i8> @ext_i32_32i8(i32 %a0) { 344; SSE2-SSSE3-LABEL: ext_i32_32i8: 345; SSE2-SSSE3: # %bb.0: 346; SSE2-SSSE3-NEXT: movd %edi, %xmm1 347; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 348; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 349; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 350; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 351; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 352; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0 353; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7] 354; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 355; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 356; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1 357; SSE2-SSSE3-NEXT: retq 358; 359; AVX1-LABEL: ext_i32_32i8: 360; AVX1: # %bb.0: 361; AVX1-NEXT: vmovd %edi, %xmm0 362; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 363; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7] 364; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7] 365; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 366; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5] 367; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 368; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 369; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 370; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 371; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 372; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1 373; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 374; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 375; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 376; AVX1-NEXT: retq 377; 378; AVX2-LABEL: ext_i32_32i8: 379; AVX2: # %bb.0: 380; AVX2-NEXT: vmovd %edi, %xmm0 381; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 382; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19] 383; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 384; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 385; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 386; AVX2-NEXT: retq 387; 388; AVX512-LABEL: ext_i32_32i8: 389; AVX512: # %bb.0: 390; AVX512-NEXT: kmovd %edi, %k0 391; AVX512-NEXT: vpmovm2b %k0, %ymm0 392; AVX512-NEXT: retq 393 %1 = bitcast i32 %a0 to <32 x i1> 394 %2 = sext <32 x i1> %1 to <32 x i8> 395 ret <32 x i8> %2 396} 397 398; 399; 512-bit vectors 400; 401 402define <8 x i64> @ext_i8_8i64(i8 %a0) { 403; SSE2-SSSE3-LABEL: ext_i8_8i64: 404; SSE2-SSSE3: # %bb.0: 405; SSE2-SSSE3-NEXT: movd %edi, %xmm0 406; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1] 407; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2] 408; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1 409; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 410; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 411; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 412; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 413; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8] 414; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2 415; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2 416; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2 417; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2] 418; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 419; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32] 420; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3 421; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3 422; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3 423; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2] 424; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2 425; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128] 426; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4 427; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4 428; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2] 429; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 430; SSE2-SSSE3-NEXT: retq 431; 432; AVX1-LABEL: ext_i8_8i64: 433; AVX1: # %bb.0: 434; AVX1-NEXT: vmovd %edi, %xmm0 435; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 436; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 437; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0 438; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 439; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 440; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2 441; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 442; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 443; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0 444; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0 445; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 446; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 447; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 448; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2 449; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 450; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1 451; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1 452; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 453; AVX1-NEXT: retq 454; 455; AVX2-LABEL: ext_i8_8i64: 456; AVX2: # %bb.0: 457; AVX2-NEXT: vmovd %edi, %xmm0 458; AVX2-NEXT: vpbroadcastb %xmm0, %ymm1 459; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8] 460; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2 461; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0 462; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128] 463; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 464; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1 465; AVX2-NEXT: retq 466; 467; AVX512-LABEL: ext_i8_8i64: 468; AVX512: # %bb.0: 469; AVX512-NEXT: kmovd %edi, %k1 470; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 471; AVX512-NEXT: retq 472 %1 = bitcast i8 %a0 to <8 x i1> 473 %2 = sext <8 x i1> %1 to <8 x i64> 474 ret <8 x i64> %2 475} 476 477define <16 x i32> @ext_i16_16i32(i16 %a0) { 478; SSE2-SSSE3-LABEL: ext_i16_16i32: 479; SSE2-SSSE3: # %bb.0: 480; SSE2-SSSE3-NEXT: movd %edi, %xmm0 481; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 482; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8] 483; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0 484; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 485; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 486; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128] 487; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1 488; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 489; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1 490; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048] 491; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2 492; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 493; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2 494; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768] 495; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 496; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3 497; SSE2-SSSE3-NEXT: retq 498; 499; AVX1-LABEL: ext_i16_16i32: 500; AVX1: # %bb.0: 501; AVX1-NEXT: vmovd %edi, %xmm0 502; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 503; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 504; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0 505; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 506; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 507; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 508; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 509; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 510; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 511; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0 512; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 513; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 514; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 515; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 516; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 517; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1 518; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1 519; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 520; AVX1-NEXT: retq 521; 522; AVX2-LABEL: ext_i16_16i32: 523; AVX2: # %bb.0: 524; AVX2-NEXT: vmovd %edi, %xmm0 525; AVX2-NEXT: vpbroadcastw %xmm0, %ymm1 526; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128] 527; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2 528; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0 529; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768] 530; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 531; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1 532; AVX2-NEXT: retq 533; 534; AVX512-LABEL: ext_i16_16i32: 535; AVX512: # %bb.0: 536; AVX512-NEXT: kmovd %edi, %k1 537; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 538; AVX512-NEXT: retq 539 %1 = bitcast i16 %a0 to <16 x i1> 540 %2 = sext <16 x i1> %1 to <16 x i32> 541 ret <16 x i32> %2 542} 543 544define <32 x i16> @ext_i32_32i16(i32 %a0) { 545; SSE2-SSSE3-LABEL: ext_i32_32i16: 546; SSE2-SSSE3: # %bb.0: 547; SSE2-SSSE3-NEXT: movd %edi, %xmm2 548; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7] 549; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 550; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128] 551; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 552; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 553; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0 554; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768] 555; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1 556; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1 557; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,2,3,4,5,6,7] 558; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0] 559; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2 560; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 561; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2 562; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3 563; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3 564; SSE2-SSSE3-NEXT: retq 565; 566; AVX1-LABEL: ext_i32_32i16: 567; AVX1: # %bb.0: 568; AVX1-NEXT: vmovd %edi, %xmm1 569; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7] 570; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 571; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 572; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 573; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 574; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 575; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 576; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3 577; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 578; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3 579; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0 580; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0 581; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 582; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,2,3,4,5,6,7] 583; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 584; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 585; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 586; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 587; AVX1-NEXT: vpcmpeqw %xmm4, %xmm2, %xmm2 588; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2 589; AVX1-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm1 590; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1 591; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 592; AVX1-NEXT: retq 593; 594; AVX2-LABEL: ext_i32_32i16: 595; AVX2: # %bb.0: 596; AVX2-NEXT: vmovd %edi, %xmm0 597; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 598; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 599; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 600; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 601; AVX2-NEXT: shrl $16, %edi 602; AVX2-NEXT: vmovd %edi, %xmm2 603; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2 604; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2 605; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1 606; AVX2-NEXT: retq 607; 608; AVX512-LABEL: ext_i32_32i16: 609; AVX512: # %bb.0: 610; AVX512-NEXT: kmovd %edi, %k0 611; AVX512-NEXT: vpmovm2w %k0, %zmm0 612; AVX512-NEXT: retq 613 %1 = bitcast i32 %a0 to <32 x i1> 614 %2 = sext <32 x i1> %1 to <32 x i16> 615 ret <32 x i16> %2 616} 617 618define <64 x i8> @ext_i64_64i8(i64 %a0) { 619; SSE2-SSSE3-LABEL: ext_i64_64i8: 620; SSE2-SSSE3: # %bb.0: 621; SSE2-SSSE3-NEXT: movq %rdi, %xmm3 622; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 623; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7] 624; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 625; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 626; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 627; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0 628; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7] 629; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 630; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 631; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1 632; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5] 633; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3] 634; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 635; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2 636; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7] 637; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] 638; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 639; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3 640; SSE2-SSSE3-NEXT: retq 641; 642; AVX1-LABEL: ext_i64_64i8: 643; AVX1: # %bb.0: 644; AVX1-NEXT: vmovq %rdi, %xmm0 645; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 646; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 647; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7] 648; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 649; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5] 650; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 651; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 652; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 653; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 654; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3 655; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 656; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3 657; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0 658; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0 659; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 660; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5] 661; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7] 662; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1 663; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[2,2,3,3,6,6,7,7] 664; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 665; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 666; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2 667; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2 668; AVX1-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm1 669; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1 670; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 671; AVX1-NEXT: retq 672; 673; AVX2-LABEL: ext_i64_64i8: 674; AVX2: # %bb.0: 675; AVX2-NEXT: vmovq %rdi, %xmm0 676; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,0,1] 677; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19] 678; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 679; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 680; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 681; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23] 682; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 683; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 684; AVX2-NEXT: retq 685; 686; AVX512-LABEL: ext_i64_64i8: 687; AVX512: # %bb.0: 688; AVX512-NEXT: kmovq %rdi, %k0 689; AVX512-NEXT: vpmovm2b %k0, %zmm0 690; AVX512-NEXT: retq 691 %1 = bitcast i64 %a0 to <64 x i1> 692 %2 = sext <64 x i1> %1 to <64 x i8> 693 ret <64 x i8> %2 694} 695