1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE,SSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefixes=AVX,AVX512VPOPCNTDQ 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512VPOPCNTDQVL 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=BITALG_NOVLX 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=BITALG 12 13define <2 x i64> @testv2i64(<2 x i64> %in) nounwind { 14; SSE2-LABEL: testv2i64: 15; SSE2: # %bb.0: 16; SSE2-NEXT: movdqa %xmm0, %xmm1 17; SSE2-NEXT: psrlw $1, %xmm1 18; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 19; SSE2-NEXT: psubb %xmm1, %xmm0 20; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 21; SSE2-NEXT: movdqa %xmm0, %xmm2 22; SSE2-NEXT: pand %xmm1, %xmm2 23; SSE2-NEXT: psrlw $2, %xmm0 24; SSE2-NEXT: pand %xmm1, %xmm0 25; SSE2-NEXT: paddb %xmm2, %xmm0 26; SSE2-NEXT: movdqa %xmm0, %xmm1 27; SSE2-NEXT: psrlw $4, %xmm1 28; SSE2-NEXT: paddb %xmm0, %xmm1 29; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 30; SSE2-NEXT: pxor %xmm0, %xmm0 31; SSE2-NEXT: psadbw %xmm0, %xmm1 32; SSE2-NEXT: movdqa %xmm1, %xmm0 33; SSE2-NEXT: retq 34; 35; SSE3-LABEL: testv2i64: 36; SSE3: # %bb.0: 37; SSE3-NEXT: movdqa %xmm0, %xmm1 38; SSE3-NEXT: psrlw $1, %xmm1 39; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 40; SSE3-NEXT: psubb %xmm1, %xmm0 41; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 42; SSE3-NEXT: movdqa %xmm0, %xmm2 43; SSE3-NEXT: pand %xmm1, %xmm2 44; SSE3-NEXT: psrlw $2, %xmm0 45; SSE3-NEXT: pand %xmm1, %xmm0 46; SSE3-NEXT: paddb %xmm2, %xmm0 47; SSE3-NEXT: movdqa %xmm0, %xmm1 48; SSE3-NEXT: psrlw $4, %xmm1 49; SSE3-NEXT: paddb %xmm0, %xmm1 50; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 51; SSE3-NEXT: pxor %xmm0, %xmm0 52; SSE3-NEXT: psadbw %xmm0, %xmm1 53; SSE3-NEXT: movdqa %xmm1, %xmm0 54; SSE3-NEXT: retq 55; 56; SSSE3-LABEL: testv2i64: 57; SSSE3: # %bb.0: 58; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 59; SSSE3-NEXT: movdqa %xmm0, %xmm2 60; SSSE3-NEXT: pand %xmm1, %xmm2 61; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 62; SSSE3-NEXT: movdqa %xmm3, %xmm4 63; SSSE3-NEXT: pshufb %xmm2, %xmm4 64; SSSE3-NEXT: psrlw $4, %xmm0 65; SSSE3-NEXT: pand %xmm1, %xmm0 66; SSSE3-NEXT: pshufb %xmm0, %xmm3 67; SSSE3-NEXT: paddb %xmm4, %xmm3 68; SSSE3-NEXT: pxor %xmm0, %xmm0 69; SSSE3-NEXT: psadbw %xmm3, %xmm0 70; SSSE3-NEXT: retq 71; 72; SSE41-LABEL: testv2i64: 73; SSE41: # %bb.0: 74; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 75; SSE41-NEXT: movdqa %xmm0, %xmm2 76; SSE41-NEXT: pand %xmm1, %xmm2 77; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 78; SSE41-NEXT: movdqa %xmm3, %xmm4 79; SSE41-NEXT: pshufb %xmm2, %xmm4 80; SSE41-NEXT: psrlw $4, %xmm0 81; SSE41-NEXT: pand %xmm1, %xmm0 82; SSE41-NEXT: pshufb %xmm0, %xmm3 83; SSE41-NEXT: paddb %xmm4, %xmm3 84; SSE41-NEXT: pxor %xmm0, %xmm0 85; SSE41-NEXT: psadbw %xmm3, %xmm0 86; SSE41-NEXT: retq 87; 88; AVX1-LABEL: testv2i64: 89; AVX1: # %bb.0: 90; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 91; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 92; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 93; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 94; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 95; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 96; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 97; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 98; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 99; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 100; AVX1-NEXT: retq 101; 102; AVX2-LABEL: testv2i64: 103; AVX2: # %bb.0: 104; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 105; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 106; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 107; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 108; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 109; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 110; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 111; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 112; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 113; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 114; AVX2-NEXT: retq 115; 116; AVX512VPOPCNTDQ-LABEL: testv2i64: 117; AVX512VPOPCNTDQ: # %bb.0: 118; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 119; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 120; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 121; AVX512VPOPCNTDQ-NEXT: vzeroupper 122; AVX512VPOPCNTDQ-NEXT: retq 123; 124; AVX512VPOPCNTDQVL-LABEL: testv2i64: 125; AVX512VPOPCNTDQVL: # %bb.0: 126; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 127; AVX512VPOPCNTDQVL-NEXT: retq 128; 129; BITALG_NOVLX-LABEL: testv2i64: 130; BITALG_NOVLX: # %bb.0: 131; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 132; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 133; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 134; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 135; BITALG_NOVLX-NEXT: vzeroupper 136; BITALG_NOVLX-NEXT: retq 137; 138; BITALG-LABEL: testv2i64: 139; BITALG: # %bb.0: 140; BITALG-NEXT: vpopcntb %xmm0, %xmm0 141; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 142; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 143; BITALG-NEXT: retq 144 %out = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %in) 145 ret <2 x i64> %out 146} 147 148define <4 x i32> @testv4i32(<4 x i32> %in) nounwind { 149; SSE2-LABEL: testv4i32: 150; SSE2: # %bb.0: 151; SSE2-NEXT: movdqa %xmm0, %xmm1 152; SSE2-NEXT: psrlw $1, %xmm1 153; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 154; SSE2-NEXT: psubb %xmm1, %xmm0 155; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 156; SSE2-NEXT: movdqa %xmm0, %xmm2 157; SSE2-NEXT: pand %xmm1, %xmm2 158; SSE2-NEXT: psrlw $2, %xmm0 159; SSE2-NEXT: pand %xmm1, %xmm0 160; SSE2-NEXT: paddb %xmm2, %xmm0 161; SSE2-NEXT: movdqa %xmm0, %xmm1 162; SSE2-NEXT: psrlw $4, %xmm1 163; SSE2-NEXT: paddb %xmm0, %xmm1 164; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 165; SSE2-NEXT: pxor %xmm0, %xmm0 166; SSE2-NEXT: movdqa %xmm1, %xmm2 167; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 168; SSE2-NEXT: psadbw %xmm0, %xmm2 169; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 170; SSE2-NEXT: psadbw %xmm0, %xmm1 171; SSE2-NEXT: packuswb %xmm2, %xmm1 172; SSE2-NEXT: movdqa %xmm1, %xmm0 173; SSE2-NEXT: retq 174; 175; SSE3-LABEL: testv4i32: 176; SSE3: # %bb.0: 177; SSE3-NEXT: movdqa %xmm0, %xmm1 178; SSE3-NEXT: psrlw $1, %xmm1 179; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 180; SSE3-NEXT: psubb %xmm1, %xmm0 181; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 182; SSE3-NEXT: movdqa %xmm0, %xmm2 183; SSE3-NEXT: pand %xmm1, %xmm2 184; SSE3-NEXT: psrlw $2, %xmm0 185; SSE3-NEXT: pand %xmm1, %xmm0 186; SSE3-NEXT: paddb %xmm2, %xmm0 187; SSE3-NEXT: movdqa %xmm0, %xmm1 188; SSE3-NEXT: psrlw $4, %xmm1 189; SSE3-NEXT: paddb %xmm0, %xmm1 190; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 191; SSE3-NEXT: pxor %xmm0, %xmm0 192; SSE3-NEXT: movdqa %xmm1, %xmm2 193; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 194; SSE3-NEXT: psadbw %xmm0, %xmm2 195; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 196; SSE3-NEXT: psadbw %xmm0, %xmm1 197; SSE3-NEXT: packuswb %xmm2, %xmm1 198; SSE3-NEXT: movdqa %xmm1, %xmm0 199; SSE3-NEXT: retq 200; 201; SSSE3-LABEL: testv4i32: 202; SSSE3: # %bb.0: 203; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 204; SSSE3-NEXT: movdqa %xmm0, %xmm3 205; SSSE3-NEXT: pand %xmm2, %xmm3 206; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 207; SSSE3-NEXT: movdqa %xmm1, %xmm4 208; SSSE3-NEXT: pshufb %xmm3, %xmm4 209; SSSE3-NEXT: psrlw $4, %xmm0 210; SSSE3-NEXT: pand %xmm2, %xmm0 211; SSSE3-NEXT: pshufb %xmm0, %xmm1 212; SSSE3-NEXT: paddb %xmm4, %xmm1 213; SSSE3-NEXT: pxor %xmm0, %xmm0 214; SSSE3-NEXT: movdqa %xmm1, %xmm2 215; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 216; SSSE3-NEXT: psadbw %xmm0, %xmm2 217; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 218; SSSE3-NEXT: psadbw %xmm0, %xmm1 219; SSSE3-NEXT: packuswb %xmm2, %xmm1 220; SSSE3-NEXT: movdqa %xmm1, %xmm0 221; SSSE3-NEXT: retq 222; 223; SSE41-LABEL: testv4i32: 224; SSE41: # %bb.0: 225; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 226; SSE41-NEXT: movdqa %xmm0, %xmm2 227; SSE41-NEXT: pand %xmm1, %xmm2 228; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 229; SSE41-NEXT: movdqa %xmm3, %xmm4 230; SSE41-NEXT: pshufb %xmm2, %xmm4 231; SSE41-NEXT: psrlw $4, %xmm0 232; SSE41-NEXT: pand %xmm1, %xmm0 233; SSE41-NEXT: pshufb %xmm0, %xmm3 234; SSE41-NEXT: paddb %xmm4, %xmm3 235; SSE41-NEXT: pxor %xmm1, %xmm1 236; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero 237; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] 238; SSE41-NEXT: psadbw %xmm1, %xmm3 239; SSE41-NEXT: psadbw %xmm1, %xmm0 240; SSE41-NEXT: packuswb %xmm3, %xmm0 241; SSE41-NEXT: retq 242; 243; AVX1-LABEL: testv4i32: 244; AVX1: # %bb.0: 245; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 246; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 247; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 248; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 249; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 250; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 251; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 252; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 253; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 254; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 255; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 256; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 257; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 258; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 259; AVX1-NEXT: retq 260; 261; AVX2-LABEL: testv4i32: 262; AVX2: # %bb.0: 263; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 264; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 265; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 266; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 267; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 268; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 269; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 270; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 271; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 272; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 273; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 274; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 275; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 276; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 277; AVX2-NEXT: retq 278; 279; AVX512VPOPCNTDQ-LABEL: testv4i32: 280; AVX512VPOPCNTDQ: # %bb.0: 281; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 282; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 283; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 284; AVX512VPOPCNTDQ-NEXT: vzeroupper 285; AVX512VPOPCNTDQ-NEXT: retq 286; 287; AVX512VPOPCNTDQVL-LABEL: testv4i32: 288; AVX512VPOPCNTDQVL: # %bb.0: 289; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 290; AVX512VPOPCNTDQVL-NEXT: retq 291; 292; BITALG_NOVLX-LABEL: testv4i32: 293; BITALG_NOVLX: # %bb.0: 294; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 295; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 296; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 297; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 298; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 299; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 300; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 301; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 302; BITALG_NOVLX-NEXT: vzeroupper 303; BITALG_NOVLX-NEXT: retq 304; 305; BITALG-LABEL: testv4i32: 306; BITALG: # %bb.0: 307; BITALG-NEXT: vpopcntb %xmm0, %xmm0 308; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 309; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 310; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 311; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 312; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 313; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 314; BITALG-NEXT: retq 315 %out = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %in) 316 ret <4 x i32> %out 317} 318 319define <8 x i16> @testv8i16(<8 x i16> %in) nounwind { 320; SSE2-LABEL: testv8i16: 321; SSE2: # %bb.0: 322; SSE2-NEXT: movdqa %xmm0, %xmm1 323; SSE2-NEXT: psrlw $1, %xmm1 324; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 325; SSE2-NEXT: psubb %xmm1, %xmm0 326; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 327; SSE2-NEXT: movdqa %xmm0, %xmm2 328; SSE2-NEXT: pand %xmm1, %xmm2 329; SSE2-NEXT: psrlw $2, %xmm0 330; SSE2-NEXT: pand %xmm1, %xmm0 331; SSE2-NEXT: paddb %xmm2, %xmm0 332; SSE2-NEXT: movdqa %xmm0, %xmm1 333; SSE2-NEXT: psrlw $4, %xmm1 334; SSE2-NEXT: paddb %xmm0, %xmm1 335; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 336; SSE2-NEXT: movdqa %xmm1, %xmm0 337; SSE2-NEXT: psllw $8, %xmm0 338; SSE2-NEXT: paddb %xmm1, %xmm0 339; SSE2-NEXT: psrlw $8, %xmm0 340; SSE2-NEXT: retq 341; 342; SSE3-LABEL: testv8i16: 343; SSE3: # %bb.0: 344; SSE3-NEXT: movdqa %xmm0, %xmm1 345; SSE3-NEXT: psrlw $1, %xmm1 346; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 347; SSE3-NEXT: psubb %xmm1, %xmm0 348; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 349; SSE3-NEXT: movdqa %xmm0, %xmm2 350; SSE3-NEXT: pand %xmm1, %xmm2 351; SSE3-NEXT: psrlw $2, %xmm0 352; SSE3-NEXT: pand %xmm1, %xmm0 353; SSE3-NEXT: paddb %xmm2, %xmm0 354; SSE3-NEXT: movdqa %xmm0, %xmm1 355; SSE3-NEXT: psrlw $4, %xmm1 356; SSE3-NEXT: paddb %xmm0, %xmm1 357; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 358; SSE3-NEXT: movdqa %xmm1, %xmm0 359; SSE3-NEXT: psllw $8, %xmm0 360; SSE3-NEXT: paddb %xmm1, %xmm0 361; SSE3-NEXT: psrlw $8, %xmm0 362; SSE3-NEXT: retq 363; 364; SSSE3-LABEL: testv8i16: 365; SSSE3: # %bb.0: 366; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 367; SSSE3-NEXT: movdqa %xmm0, %xmm2 368; SSSE3-NEXT: pand %xmm1, %xmm2 369; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 370; SSSE3-NEXT: movdqa %xmm3, %xmm4 371; SSSE3-NEXT: pshufb %xmm2, %xmm4 372; SSSE3-NEXT: psrlw $4, %xmm0 373; SSSE3-NEXT: pand %xmm1, %xmm0 374; SSSE3-NEXT: pshufb %xmm0, %xmm3 375; SSSE3-NEXT: paddb %xmm4, %xmm3 376; SSSE3-NEXT: movdqa %xmm3, %xmm0 377; SSSE3-NEXT: psllw $8, %xmm0 378; SSSE3-NEXT: paddb %xmm3, %xmm0 379; SSSE3-NEXT: psrlw $8, %xmm0 380; SSSE3-NEXT: retq 381; 382; SSE41-LABEL: testv8i16: 383; SSE41: # %bb.0: 384; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 385; SSE41-NEXT: movdqa %xmm0, %xmm2 386; SSE41-NEXT: pand %xmm1, %xmm2 387; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 388; SSE41-NEXT: movdqa %xmm3, %xmm4 389; SSE41-NEXT: pshufb %xmm2, %xmm4 390; SSE41-NEXT: psrlw $4, %xmm0 391; SSE41-NEXT: pand %xmm1, %xmm0 392; SSE41-NEXT: pshufb %xmm0, %xmm3 393; SSE41-NEXT: paddb %xmm4, %xmm3 394; SSE41-NEXT: movdqa %xmm3, %xmm0 395; SSE41-NEXT: psllw $8, %xmm0 396; SSE41-NEXT: paddb %xmm3, %xmm0 397; SSE41-NEXT: psrlw $8, %xmm0 398; SSE41-NEXT: retq 399; 400; AVX1-LABEL: testv8i16: 401; AVX1: # %bb.0: 402; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 403; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 404; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 405; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 406; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 407; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 408; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 409; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 410; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 411; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 412; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 413; AVX1-NEXT: retq 414; 415; AVX2-LABEL: testv8i16: 416; AVX2: # %bb.0: 417; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 418; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 419; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 420; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 421; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 422; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 423; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 424; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 425; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 426; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 427; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 428; AVX2-NEXT: retq 429; 430; AVX512VPOPCNTDQ-LABEL: testv8i16: 431; AVX512VPOPCNTDQ: # %bb.0: 432; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 433; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 434; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 435; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 436; AVX512VPOPCNTDQ-NEXT: vzeroupper 437; AVX512VPOPCNTDQ-NEXT: retq 438; 439; AVX512VPOPCNTDQVL-LABEL: testv8i16: 440; AVX512VPOPCNTDQVL: # %bb.0: 441; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 442; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 443; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 444; AVX512VPOPCNTDQVL-NEXT: vzeroupper 445; AVX512VPOPCNTDQVL-NEXT: retq 446; 447; BITALG_NOVLX-LABEL: testv8i16: 448; BITALG_NOVLX: # %bb.0: 449; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 450; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 451; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 452; BITALG_NOVLX-NEXT: vzeroupper 453; BITALG_NOVLX-NEXT: retq 454; 455; BITALG-LABEL: testv8i16: 456; BITALG: # %bb.0: 457; BITALG-NEXT: vpopcntw %xmm0, %xmm0 458; BITALG-NEXT: retq 459 %out = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %in) 460 ret <8 x i16> %out 461} 462 463define <16 x i8> @testv16i8(<16 x i8> %in) nounwind { 464; SSE2-LABEL: testv16i8: 465; SSE2: # %bb.0: 466; SSE2-NEXT: movdqa %xmm0, %xmm1 467; SSE2-NEXT: psrlw $1, %xmm1 468; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 469; SSE2-NEXT: psubb %xmm1, %xmm0 470; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 471; SSE2-NEXT: movdqa %xmm0, %xmm2 472; SSE2-NEXT: pand %xmm1, %xmm2 473; SSE2-NEXT: psrlw $2, %xmm0 474; SSE2-NEXT: pand %xmm1, %xmm0 475; SSE2-NEXT: paddb %xmm2, %xmm0 476; SSE2-NEXT: movdqa %xmm0, %xmm1 477; SSE2-NEXT: psrlw $4, %xmm1 478; SSE2-NEXT: paddb %xmm0, %xmm1 479; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 480; SSE2-NEXT: movdqa %xmm1, %xmm0 481; SSE2-NEXT: retq 482; 483; SSE3-LABEL: testv16i8: 484; SSE3: # %bb.0: 485; SSE3-NEXT: movdqa %xmm0, %xmm1 486; SSE3-NEXT: psrlw $1, %xmm1 487; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 488; SSE3-NEXT: psubb %xmm1, %xmm0 489; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 490; SSE3-NEXT: movdqa %xmm0, %xmm2 491; SSE3-NEXT: pand %xmm1, %xmm2 492; SSE3-NEXT: psrlw $2, %xmm0 493; SSE3-NEXT: pand %xmm1, %xmm0 494; SSE3-NEXT: paddb %xmm2, %xmm0 495; SSE3-NEXT: movdqa %xmm0, %xmm1 496; SSE3-NEXT: psrlw $4, %xmm1 497; SSE3-NEXT: paddb %xmm0, %xmm1 498; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 499; SSE3-NEXT: movdqa %xmm1, %xmm0 500; SSE3-NEXT: retq 501; 502; SSSE3-LABEL: testv16i8: 503; SSSE3: # %bb.0: 504; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 505; SSSE3-NEXT: movdqa %xmm0, %xmm3 506; SSSE3-NEXT: pand %xmm2, %xmm3 507; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 508; SSSE3-NEXT: movdqa %xmm1, %xmm4 509; SSSE3-NEXT: pshufb %xmm3, %xmm4 510; SSSE3-NEXT: psrlw $4, %xmm0 511; SSSE3-NEXT: pand %xmm2, %xmm0 512; SSSE3-NEXT: pshufb %xmm0, %xmm1 513; SSSE3-NEXT: paddb %xmm4, %xmm1 514; SSSE3-NEXT: movdqa %xmm1, %xmm0 515; SSSE3-NEXT: retq 516; 517; SSE41-LABEL: testv16i8: 518; SSE41: # %bb.0: 519; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 520; SSE41-NEXT: movdqa %xmm0, %xmm3 521; SSE41-NEXT: pand %xmm2, %xmm3 522; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 523; SSE41-NEXT: movdqa %xmm1, %xmm4 524; SSE41-NEXT: pshufb %xmm3, %xmm4 525; SSE41-NEXT: psrlw $4, %xmm0 526; SSE41-NEXT: pand %xmm2, %xmm0 527; SSE41-NEXT: pshufb %xmm0, %xmm1 528; SSE41-NEXT: paddb %xmm4, %xmm1 529; SSE41-NEXT: movdqa %xmm1, %xmm0 530; SSE41-NEXT: retq 531; 532; AVX1-LABEL: testv16i8: 533; AVX1: # %bb.0: 534; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 535; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 536; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 537; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 538; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 539; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 540; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 541; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 542; AVX1-NEXT: retq 543; 544; AVX2-LABEL: testv16i8: 545; AVX2: # %bb.0: 546; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 547; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 548; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 549; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 550; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 551; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 552; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 553; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 554; AVX2-NEXT: retq 555; 556; AVX512VPOPCNTDQ-LABEL: testv16i8: 557; AVX512VPOPCNTDQ: # %bb.0: 558; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 559; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 560; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0 561; AVX512VPOPCNTDQ-NEXT: vzeroupper 562; AVX512VPOPCNTDQ-NEXT: retq 563; 564; AVX512VPOPCNTDQVL-LABEL: testv16i8: 565; AVX512VPOPCNTDQVL: # %bb.0: 566; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 567; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 568; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0 569; AVX512VPOPCNTDQVL-NEXT: vzeroupper 570; AVX512VPOPCNTDQVL-NEXT: retq 571; 572; BITALG_NOVLX-LABEL: testv16i8: 573; BITALG_NOVLX: # %bb.0: 574; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 575; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 576; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 577; BITALG_NOVLX-NEXT: vzeroupper 578; BITALG_NOVLX-NEXT: retq 579; 580; BITALG-LABEL: testv16i8: 581; BITALG: # %bb.0: 582; BITALG-NEXT: vpopcntb %xmm0, %xmm0 583; BITALG-NEXT: retq 584 %out = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %in) 585 ret <16 x i8> %out 586} 587 588define <2 x i64> @foldv2i64() nounwind { 589; SSE-LABEL: foldv2i64: 590; SSE: # %bb.0: 591; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,64] 592; SSE-NEXT: retq 593; 594; AVX-LABEL: foldv2i64: 595; AVX: # %bb.0: 596; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,64] 597; AVX-NEXT: retq 598; 599; BITALG_NOVLX-LABEL: foldv2i64: 600; BITALG_NOVLX: # %bb.0: 601; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [1,64] 602; BITALG_NOVLX-NEXT: retq 603; 604; BITALG-LABEL: foldv2i64: 605; BITALG: # %bb.0: 606; BITALG-NEXT: vmovaps {{.*#+}} xmm0 = [1,64] 607; BITALG-NEXT: retq 608 %out = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> <i64 256, i64 -1>) 609 ret <2 x i64> %out 610} 611 612define <4 x i32> @foldv4i32() nounwind { 613; SSE-LABEL: foldv4i32: 614; SSE: # %bb.0: 615; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,32,0,8] 616; SSE-NEXT: retq 617; 618; AVX-LABEL: foldv4i32: 619; AVX: # %bb.0: 620; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,32,0,8] 621; AVX-NEXT: retq 622; 623; BITALG_NOVLX-LABEL: foldv4i32: 624; BITALG_NOVLX: # %bb.0: 625; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [1,32,0,8] 626; BITALG_NOVLX-NEXT: retq 627; 628; BITALG-LABEL: foldv4i32: 629; BITALG: # %bb.0: 630; BITALG-NEXT: vmovaps {{.*#+}} xmm0 = [1,32,0,8] 631; BITALG-NEXT: retq 632 %out = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> <i32 256, i32 -1, i32 0, i32 255>) 633 ret <4 x i32> %out 634} 635 636define <8 x i16> @foldv8i16() nounwind { 637; SSE-LABEL: foldv8i16: 638; SSE: # %bb.0: 639; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,16,0,8,0,3,2,3] 640; SSE-NEXT: retq 641; 642; AVX-LABEL: foldv8i16: 643; AVX: # %bb.0: 644; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,16,0,8,0,3,2,3] 645; AVX-NEXT: retq 646; 647; BITALG_NOVLX-LABEL: foldv8i16: 648; BITALG_NOVLX: # %bb.0: 649; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [1,16,0,8,0,3,2,3] 650; BITALG_NOVLX-NEXT: retq 651; 652; BITALG-LABEL: foldv8i16: 653; BITALG: # %bb.0: 654; BITALG-NEXT: vmovaps {{.*#+}} xmm0 = [1,16,0,8,0,3,2,3] 655; BITALG-NEXT: retq 656 %out = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88>) 657 ret <8 x i16> %out 658} 659 660define <16 x i8> @foldv16i8() nounwind { 661; SSE-LABEL: foldv16i8: 662; SSE: # %bb.0: 663; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,8,0,8,0,3,2,3,7,7,1,1,1,1,1,1] 664; SSE-NEXT: retq 665; 666; AVX-LABEL: foldv16i8: 667; AVX: # %bb.0: 668; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,8,0,8,0,3,2,3,7,7,1,1,1,1,1,1] 669; AVX-NEXT: retq 670; 671; BITALG_NOVLX-LABEL: foldv16i8: 672; BITALG_NOVLX: # %bb.0: 673; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [0,8,0,8,0,3,2,3,7,7,1,1,1,1,1,1] 674; BITALG_NOVLX-NEXT: retq 675; 676; BITALG-LABEL: foldv16i8: 677; BITALG: # %bb.0: 678; BITALG-NEXT: vmovaps {{.*#+}} xmm0 = [0,8,0,8,0,3,2,3,7,7,1,1,1,1,1,1] 679; BITALG-NEXT: retq 680 %out = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32>) 681 ret <16 x i8> %out 682} 683 684define <2 x i64> @eq_1_v2i64(<2 x i64> %0) { 685; SSE2-LABEL: eq_1_v2i64: 686; SSE2: # %bb.0: 687; SSE2-NEXT: pxor %xmm1, %xmm1 688; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 689; SSE2-NEXT: paddq %xmm0, %xmm2 690; SSE2-NEXT: pand %xmm0, %xmm2 691; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 692; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,0,3,2] 693; SSE2-NEXT: pand %xmm3, %xmm0 694; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 695; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2] 696; SSE2-NEXT: pand %xmm2, %xmm1 697; SSE2-NEXT: pandn %xmm1, %xmm0 698; SSE2-NEXT: retq 699; 700; SSE3-LABEL: eq_1_v2i64: 701; SSE3: # %bb.0: 702; SSE3-NEXT: pxor %xmm1, %xmm1 703; SSE3-NEXT: pcmpeqd %xmm2, %xmm2 704; SSE3-NEXT: paddq %xmm0, %xmm2 705; SSE3-NEXT: pand %xmm0, %xmm2 706; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 707; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,0,3,2] 708; SSE3-NEXT: pand %xmm3, %xmm0 709; SSE3-NEXT: pcmpeqd %xmm1, %xmm2 710; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2] 711; SSE3-NEXT: pand %xmm2, %xmm1 712; SSE3-NEXT: pandn %xmm1, %xmm0 713; SSE3-NEXT: retq 714; 715; SSSE3-LABEL: eq_1_v2i64: 716; SSSE3: # %bb.0: 717; SSSE3-NEXT: pxor %xmm1, %xmm1 718; SSSE3-NEXT: pcmpeqd %xmm2, %xmm2 719; SSSE3-NEXT: paddq %xmm0, %xmm2 720; SSSE3-NEXT: pand %xmm0, %xmm2 721; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 722; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,0,3,2] 723; SSSE3-NEXT: pand %xmm3, %xmm0 724; SSSE3-NEXT: pcmpeqd %xmm1, %xmm2 725; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2] 726; SSSE3-NEXT: pand %xmm2, %xmm1 727; SSSE3-NEXT: pandn %xmm1, %xmm0 728; SSSE3-NEXT: retq 729; 730; SSE41-LABEL: eq_1_v2i64: 731; SSE41: # %bb.0: 732; SSE41-NEXT: pxor %xmm1, %xmm1 733; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 734; SSE41-NEXT: paddq %xmm0, %xmm2 735; SSE41-NEXT: pand %xmm0, %xmm2 736; SSE41-NEXT: pcmpeqq %xmm1, %xmm0 737; SSE41-NEXT: pcmpeqq %xmm1, %xmm2 738; SSE41-NEXT: pandn %xmm2, %xmm0 739; SSE41-NEXT: retq 740; 741; AVX1-LABEL: eq_1_v2i64: 742; AVX1: # %bb.0: 743; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 744; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm2 745; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 746; AVX1-NEXT: vpaddq %xmm3, %xmm0, %xmm3 747; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 748; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 749; AVX1-NEXT: vpandn %xmm0, %xmm2, %xmm0 750; AVX1-NEXT: retq 751; 752; AVX2-LABEL: eq_1_v2i64: 753; AVX2: # %bb.0: 754; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 755; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm2 756; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 757; AVX2-NEXT: vpaddq %xmm3, %xmm0, %xmm3 758; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0 759; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 760; AVX2-NEXT: vpandn %xmm0, %xmm2, %xmm0 761; AVX2-NEXT: retq 762; 763; AVX512VPOPCNTDQ-LABEL: eq_1_v2i64: 764; AVX512VPOPCNTDQ: # %bb.0: 765; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 766; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 767; AVX512VPOPCNTDQ-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 768; AVX512VPOPCNTDQ-NEXT: vzeroupper 769; AVX512VPOPCNTDQ-NEXT: retq 770; 771; AVX512VPOPCNTDQVL-LABEL: eq_1_v2i64: 772; AVX512VPOPCNTDQVL: # %bb.0: 773; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 774; AVX512VPOPCNTDQVL-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 775; AVX512VPOPCNTDQVL-NEXT: retq 776; 777; BITALG_NOVLX-LABEL: eq_1_v2i64: 778; BITALG_NOVLX: # %bb.0: 779; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 780; BITALG_NOVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm2 781; BITALG_NOVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 782; BITALG_NOVLX-NEXT: vpaddq %xmm3, %xmm0, %xmm3 783; BITALG_NOVLX-NEXT: vpand %xmm3, %xmm0, %xmm0 784; BITALG_NOVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 785; BITALG_NOVLX-NEXT: vpandn %xmm0, %xmm2, %xmm0 786; BITALG_NOVLX-NEXT: retq 787; 788; BITALG-LABEL: eq_1_v2i64: 789; BITALG: # %bb.0: 790; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 791; BITALG-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm2 792; BITALG-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 793; BITALG-NEXT: vpaddq %xmm3, %xmm0, %xmm3 794; BITALG-NEXT: vpand %xmm3, %xmm0, %xmm0 795; BITALG-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 796; BITALG-NEXT: vpandn %xmm0, %xmm2, %xmm0 797; BITALG-NEXT: retq 798 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 799 %3 = icmp eq <2 x i64> %2, <i64 1, i64 1> 800 %4 = sext <2 x i1> %3 to <2 x i64> 801 ret <2 x i64> %4 802} 803 804define <2 x i64> @ne_1_v2i64(<2 x i64> %0) { 805; SSE2-LABEL: ne_1_v2i64: 806; SSE2: # %bb.0: 807; SSE2-NEXT: pxor %xmm1, %xmm1 808; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 809; SSE2-NEXT: movdqa %xmm0, %xmm3 810; SSE2-NEXT: paddq %xmm2, %xmm3 811; SSE2-NEXT: pand %xmm0, %xmm3 812; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 813; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,0,3,2] 814; SSE2-NEXT: pand %xmm4, %xmm0 815; SSE2-NEXT: pcmpeqd %xmm1, %xmm3 816; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,0,3,2] 817; SSE2-NEXT: pand %xmm3, %xmm1 818; SSE2-NEXT: pxor %xmm2, %xmm1 819; SSE2-NEXT: por %xmm1, %xmm0 820; SSE2-NEXT: retq 821; 822; SSE3-LABEL: ne_1_v2i64: 823; SSE3: # %bb.0: 824; SSE3-NEXT: pxor %xmm1, %xmm1 825; SSE3-NEXT: pcmpeqd %xmm2, %xmm2 826; SSE3-NEXT: movdqa %xmm0, %xmm3 827; SSE3-NEXT: paddq %xmm2, %xmm3 828; SSE3-NEXT: pand %xmm0, %xmm3 829; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 830; SSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,0,3,2] 831; SSE3-NEXT: pand %xmm4, %xmm0 832; SSE3-NEXT: pcmpeqd %xmm1, %xmm3 833; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,0,3,2] 834; SSE3-NEXT: pand %xmm3, %xmm1 835; SSE3-NEXT: pxor %xmm2, %xmm1 836; SSE3-NEXT: por %xmm1, %xmm0 837; SSE3-NEXT: retq 838; 839; SSSE3-LABEL: ne_1_v2i64: 840; SSSE3: # %bb.0: 841; SSSE3-NEXT: pxor %xmm1, %xmm1 842; SSSE3-NEXT: pcmpeqd %xmm2, %xmm2 843; SSSE3-NEXT: movdqa %xmm0, %xmm3 844; SSSE3-NEXT: paddq %xmm2, %xmm3 845; SSSE3-NEXT: pand %xmm0, %xmm3 846; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 847; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,0,3,2] 848; SSSE3-NEXT: pand %xmm4, %xmm0 849; SSSE3-NEXT: pcmpeqd %xmm1, %xmm3 850; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,0,3,2] 851; SSSE3-NEXT: pand %xmm3, %xmm1 852; SSSE3-NEXT: pxor %xmm2, %xmm1 853; SSSE3-NEXT: por %xmm1, %xmm0 854; SSSE3-NEXT: retq 855; 856; SSE41-LABEL: ne_1_v2i64: 857; SSE41: # %bb.0: 858; SSE41-NEXT: pxor %xmm2, %xmm2 859; SSE41-NEXT: pcmpeqd %xmm3, %xmm3 860; SSE41-NEXT: movdqa %xmm0, %xmm1 861; SSE41-NEXT: paddq %xmm3, %xmm1 862; SSE41-NEXT: pand %xmm0, %xmm1 863; SSE41-NEXT: pcmpeqq %xmm2, %xmm0 864; SSE41-NEXT: pcmpeqq %xmm2, %xmm1 865; SSE41-NEXT: pxor %xmm3, %xmm1 866; SSE41-NEXT: por %xmm0, %xmm1 867; SSE41-NEXT: movdqa %xmm1, %xmm0 868; SSE41-NEXT: retq 869; 870; AVX1-LABEL: ne_1_v2i64: 871; AVX1: # %bb.0: 872; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 873; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm2 874; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 875; AVX1-NEXT: vpaddq %xmm3, %xmm0, %xmm4 876; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 877; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 878; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 879; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0 880; AVX1-NEXT: retq 881; 882; AVX2-LABEL: ne_1_v2i64: 883; AVX2: # %bb.0: 884; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 885; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm2 886; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 887; AVX2-NEXT: vpaddq %xmm3, %xmm0, %xmm4 888; AVX2-NEXT: vpand %xmm4, %xmm0, %xmm0 889; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 890; AVX2-NEXT: vpxor %xmm3, %xmm0, %xmm0 891; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 892; AVX2-NEXT: retq 893; 894; AVX512VPOPCNTDQ-LABEL: ne_1_v2i64: 895; AVX512VPOPCNTDQ: # %bb.0: 896; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 897; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 898; AVX512VPOPCNTDQ-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 899; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 900; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 901; AVX512VPOPCNTDQ-NEXT: vzeroupper 902; AVX512VPOPCNTDQ-NEXT: retq 903; 904; AVX512VPOPCNTDQVL-LABEL: ne_1_v2i64: 905; AVX512VPOPCNTDQVL: # %bb.0: 906; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 907; AVX512VPOPCNTDQVL-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 908; AVX512VPOPCNTDQVL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 909; AVX512VPOPCNTDQVL-NEXT: retq 910; 911; BITALG_NOVLX-LABEL: ne_1_v2i64: 912; BITALG_NOVLX: # %bb.0: 913; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 914; BITALG_NOVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm2 915; BITALG_NOVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 916; BITALG_NOVLX-NEXT: vpaddq %xmm3, %xmm0, %xmm3 917; BITALG_NOVLX-NEXT: vpand %xmm3, %xmm0, %xmm0 918; BITALG_NOVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 919; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 920; BITALG_NOVLX-NEXT: vpor %xmm0, %xmm2, %xmm0 921; BITALG_NOVLX-NEXT: vzeroupper 922; BITALG_NOVLX-NEXT: retq 923; 924; BITALG-LABEL: ne_1_v2i64: 925; BITALG: # %bb.0: 926; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 927; BITALG-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm2 928; BITALG-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 929; BITALG-NEXT: vpaddq %xmm3, %xmm0, %xmm4 930; BITALG-NEXT: vpand %xmm4, %xmm0, %xmm0 931; BITALG-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 932; BITALG-NEXT: vpternlogq $222, %xmm3, %xmm2, %xmm0 933; BITALG-NEXT: retq 934 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 935 %3 = icmp ne <2 x i64> %2, <i64 1, i64 1> 936 %4 = sext <2 x i1> %3 to <2 x i64> 937 ret <2 x i64> %4 938} 939 940define <4 x i32> @eq_1_v4i32(<4 x i32> %0) { 941; SSE-LABEL: eq_1_v4i32: 942; SSE: # %bb.0: 943; SSE-NEXT: pxor %xmm1, %xmm1 944; SSE-NEXT: pcmpeqd %xmm2, %xmm2 945; SSE-NEXT: paddd %xmm0, %xmm2 946; SSE-NEXT: pand %xmm0, %xmm2 947; SSE-NEXT: pcmpeqd %xmm1, %xmm0 948; SSE-NEXT: pcmpeqd %xmm1, %xmm2 949; SSE-NEXT: pandn %xmm2, %xmm0 950; SSE-NEXT: retq 951; 952; AVX1-LABEL: eq_1_v4i32: 953; AVX1: # %bb.0: 954; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 955; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm2 956; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 957; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm3 958; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 959; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 960; AVX1-NEXT: vpandn %xmm0, %xmm2, %xmm0 961; AVX1-NEXT: retq 962; 963; AVX2-LABEL: eq_1_v4i32: 964; AVX2: # %bb.0: 965; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 966; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm2 967; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 968; AVX2-NEXT: vpaddd %xmm3, %xmm0, %xmm3 969; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0 970; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 971; AVX2-NEXT: vpandn %xmm0, %xmm2, %xmm0 972; AVX2-NEXT: retq 973; 974; AVX512VPOPCNTDQ-LABEL: eq_1_v4i32: 975; AVX512VPOPCNTDQ: # %bb.0: 976; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 977; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 978; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] 979; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 980; AVX512VPOPCNTDQ-NEXT: vzeroupper 981; AVX512VPOPCNTDQ-NEXT: retq 982; 983; AVX512VPOPCNTDQVL-LABEL: eq_1_v4i32: 984; AVX512VPOPCNTDQVL: # %bb.0: 985; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 986; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] 987; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 988; AVX512VPOPCNTDQVL-NEXT: retq 989; 990; BITALG_NOVLX-LABEL: eq_1_v4i32: 991; BITALG_NOVLX: # %bb.0: 992; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 993; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm2 994; BITALG_NOVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 995; BITALG_NOVLX-NEXT: vpaddd %xmm3, %xmm0, %xmm3 996; BITALG_NOVLX-NEXT: vpand %xmm3, %xmm0, %xmm0 997; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 998; BITALG_NOVLX-NEXT: vpandn %xmm0, %xmm2, %xmm0 999; BITALG_NOVLX-NEXT: retq 1000; 1001; BITALG-LABEL: eq_1_v4i32: 1002; BITALG: # %bb.0: 1003; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 1004; BITALG-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm2 1005; BITALG-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1006; BITALG-NEXT: vpaddd %xmm3, %xmm0, %xmm3 1007; BITALG-NEXT: vpand %xmm3, %xmm0, %xmm0 1008; BITALG-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1009; BITALG-NEXT: vpandn %xmm0, %xmm2, %xmm0 1010; BITALG-NEXT: retq 1011 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 1012 %3 = icmp eq <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1> 1013 %4 = sext <4 x i1> %3 to <4 x i32> 1014 ret <4 x i32> %4 1015} 1016 1017define <4 x i32> @ne_1_v4i32(<4 x i32> %0) { 1018; SSE-LABEL: ne_1_v4i32: 1019; SSE: # %bb.0: 1020; SSE-NEXT: pxor %xmm2, %xmm2 1021; SSE-NEXT: pcmpeqd %xmm3, %xmm3 1022; SSE-NEXT: movdqa %xmm0, %xmm1 1023; SSE-NEXT: paddd %xmm3, %xmm1 1024; SSE-NEXT: pand %xmm0, %xmm1 1025; SSE-NEXT: pcmpeqd %xmm2, %xmm0 1026; SSE-NEXT: pcmpeqd %xmm2, %xmm1 1027; SSE-NEXT: pxor %xmm3, %xmm1 1028; SSE-NEXT: por %xmm0, %xmm1 1029; SSE-NEXT: movdqa %xmm1, %xmm0 1030; SSE-NEXT: retq 1031; 1032; AVX1-LABEL: ne_1_v4i32: 1033; AVX1: # %bb.0: 1034; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1035; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm2 1036; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1037; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm4 1038; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 1039; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1040; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 1041; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0 1042; AVX1-NEXT: retq 1043; 1044; AVX2-LABEL: ne_1_v4i32: 1045; AVX2: # %bb.0: 1046; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1047; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm2 1048; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1049; AVX2-NEXT: vpaddd %xmm3, %xmm0, %xmm4 1050; AVX2-NEXT: vpand %xmm4, %xmm0, %xmm0 1051; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1052; AVX2-NEXT: vpxor %xmm3, %xmm0, %xmm0 1053; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 1054; AVX2-NEXT: retq 1055; 1056; AVX512VPOPCNTDQ-LABEL: ne_1_v4i32: 1057; AVX512VPOPCNTDQ: # %bb.0: 1058; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1059; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 1060; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] 1061; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1062; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 1063; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1064; AVX512VPOPCNTDQ-NEXT: vzeroupper 1065; AVX512VPOPCNTDQ-NEXT: retq 1066; 1067; AVX512VPOPCNTDQVL-LABEL: ne_1_v4i32: 1068; AVX512VPOPCNTDQVL: # %bb.0: 1069; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 1070; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] 1071; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1072; AVX512VPOPCNTDQVL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 1073; AVX512VPOPCNTDQVL-NEXT: retq 1074; 1075; BITALG_NOVLX-LABEL: ne_1_v4i32: 1076; BITALG_NOVLX: # %bb.0: 1077; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1078; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm2 1079; BITALG_NOVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1080; BITALG_NOVLX-NEXT: vpaddd %xmm3, %xmm0, %xmm3 1081; BITALG_NOVLX-NEXT: vpand %xmm3, %xmm0, %xmm0 1082; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1083; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 1084; BITALG_NOVLX-NEXT: vpor %xmm0, %xmm2, %xmm0 1085; BITALG_NOVLX-NEXT: vzeroupper 1086; BITALG_NOVLX-NEXT: retq 1087; 1088; BITALG-LABEL: ne_1_v4i32: 1089; BITALG: # %bb.0: 1090; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 1091; BITALG-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm2 1092; BITALG-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1093; BITALG-NEXT: vpaddd %xmm3, %xmm0, %xmm4 1094; BITALG-NEXT: vpand %xmm4, %xmm0, %xmm0 1095; BITALG-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1096; BITALG-NEXT: vpternlogd $222, %xmm3, %xmm2, %xmm0 1097; BITALG-NEXT: retq 1098 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 1099 %3 = icmp ne <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1> 1100 %4 = sext <4 x i1> %3 to <4 x i32> 1101 ret <4 x i32> %4 1102} 1103 1104define <8 x i16> @eq_1_v8i16(<8 x i16> %0) { 1105; SSE-LABEL: eq_1_v8i16: 1106; SSE: # %bb.0: 1107; SSE-NEXT: pxor %xmm1, %xmm1 1108; SSE-NEXT: pcmpeqd %xmm2, %xmm2 1109; SSE-NEXT: paddw %xmm0, %xmm2 1110; SSE-NEXT: pand %xmm0, %xmm2 1111; SSE-NEXT: pcmpeqw %xmm1, %xmm0 1112; SSE-NEXT: pcmpeqw %xmm1, %xmm2 1113; SSE-NEXT: pandn %xmm2, %xmm0 1114; SSE-NEXT: retq 1115; 1116; AVX-LABEL: eq_1_v8i16: 1117; AVX: # %bb.0: 1118; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1119; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2 1120; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1121; AVX-NEXT: vpaddw %xmm3, %xmm0, %xmm3 1122; AVX-NEXT: vpand %xmm3, %xmm0, %xmm0 1123; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 1124; AVX-NEXT: vpandn %xmm0, %xmm2, %xmm0 1125; AVX-NEXT: retq 1126; 1127; BITALG_NOVLX-LABEL: eq_1_v8i16: 1128; BITALG_NOVLX: # %bb.0: 1129; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1130; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 1131; BITALG_NOVLX-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1132; BITALG_NOVLX-NEXT: vzeroupper 1133; BITALG_NOVLX-NEXT: retq 1134; 1135; BITALG-LABEL: eq_1_v8i16: 1136; BITALG: # %bb.0: 1137; BITALG-NEXT: vpopcntw %xmm0, %xmm0 1138; BITALG-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1139; BITALG-NEXT: retq 1140 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 1141 %3 = icmp eq <8 x i16> %2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1142 %4 = sext <8 x i1> %3 to <8 x i16> 1143 ret <8 x i16> %4 1144} 1145 1146define <8 x i16> @ne_1_v8i16(<8 x i16> %0) { 1147; SSE-LABEL: ne_1_v8i16: 1148; SSE: # %bb.0: 1149; SSE-NEXT: pxor %xmm2, %xmm2 1150; SSE-NEXT: pcmpeqd %xmm3, %xmm3 1151; SSE-NEXT: movdqa %xmm0, %xmm1 1152; SSE-NEXT: paddw %xmm3, %xmm1 1153; SSE-NEXT: pand %xmm0, %xmm1 1154; SSE-NEXT: pcmpeqw %xmm2, %xmm0 1155; SSE-NEXT: pcmpeqw %xmm2, %xmm1 1156; SSE-NEXT: pxor %xmm3, %xmm1 1157; SSE-NEXT: por %xmm0, %xmm1 1158; SSE-NEXT: movdqa %xmm1, %xmm0 1159; SSE-NEXT: retq 1160; 1161; AVX1-LABEL: ne_1_v8i16: 1162; AVX1: # %bb.0: 1163; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1164; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2 1165; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1166; AVX1-NEXT: vpaddw %xmm3, %xmm0, %xmm4 1167; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 1168; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 1169; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 1170; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0 1171; AVX1-NEXT: retq 1172; 1173; AVX2-LABEL: ne_1_v8i16: 1174; AVX2: # %bb.0: 1175; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1176; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2 1177; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1178; AVX2-NEXT: vpaddw %xmm3, %xmm0, %xmm4 1179; AVX2-NEXT: vpand %xmm4, %xmm0, %xmm0 1180; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 1181; AVX2-NEXT: vpxor %xmm3, %xmm0, %xmm0 1182; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 1183; AVX2-NEXT: retq 1184; 1185; AVX512VPOPCNTDQ-LABEL: ne_1_v8i16: 1186; AVX512VPOPCNTDQ: # %bb.0: 1187; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 1188; AVX512VPOPCNTDQ-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2 1189; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1190; AVX512VPOPCNTDQ-NEXT: vpaddw %xmm3, %xmm0, %xmm3 1191; AVX512VPOPCNTDQ-NEXT: vpand %xmm3, %xmm0, %xmm0 1192; AVX512VPOPCNTDQ-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 1193; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 1194; AVX512VPOPCNTDQ-NEXT: vpor %xmm0, %xmm2, %xmm0 1195; AVX512VPOPCNTDQ-NEXT: vzeroupper 1196; AVX512VPOPCNTDQ-NEXT: retq 1197; 1198; AVX512VPOPCNTDQVL-LABEL: ne_1_v8i16: 1199; AVX512VPOPCNTDQVL: # %bb.0: 1200; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1201; AVX512VPOPCNTDQVL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2 1202; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1203; AVX512VPOPCNTDQVL-NEXT: vpaddw %xmm3, %xmm0, %xmm4 1204; AVX512VPOPCNTDQVL-NEXT: vpand %xmm4, %xmm0, %xmm0 1205; AVX512VPOPCNTDQVL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 1206; AVX512VPOPCNTDQVL-NEXT: vpternlogq $222, %xmm3, %xmm2, %xmm0 1207; AVX512VPOPCNTDQVL-NEXT: retq 1208; 1209; BITALG_NOVLX-LABEL: ne_1_v8i16: 1210; BITALG_NOVLX: # %bb.0: 1211; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1212; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 1213; BITALG_NOVLX-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1214; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 1215; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1216; BITALG_NOVLX-NEXT: vzeroupper 1217; BITALG_NOVLX-NEXT: retq 1218; 1219; BITALG-LABEL: ne_1_v8i16: 1220; BITALG: # %bb.0: 1221; BITALG-NEXT: vpopcntw %xmm0, %xmm0 1222; BITALG-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1223; BITALG-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 1224; BITALG-NEXT: retq 1225 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 1226 %3 = icmp ne <8 x i16> %2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1227 %4 = sext <8 x i1> %3 to <8 x i16> 1228 ret <8 x i16> %4 1229} 1230 1231define <16 x i8> @eq_1_v16i8(<16 x i8> %0) { 1232; SSE-LABEL: eq_1_v16i8: 1233; SSE: # %bb.0: 1234; SSE-NEXT: pxor %xmm1, %xmm1 1235; SSE-NEXT: pcmpeqd %xmm2, %xmm2 1236; SSE-NEXT: paddb %xmm0, %xmm2 1237; SSE-NEXT: pand %xmm0, %xmm2 1238; SSE-NEXT: pcmpeqb %xmm1, %xmm0 1239; SSE-NEXT: pcmpeqb %xmm1, %xmm2 1240; SSE-NEXT: pandn %xmm2, %xmm0 1241; SSE-NEXT: retq 1242; 1243; AVX-LABEL: eq_1_v16i8: 1244; AVX: # %bb.0: 1245; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1246; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2 1247; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1248; AVX-NEXT: vpaddb %xmm3, %xmm0, %xmm3 1249; AVX-NEXT: vpand %xmm3, %xmm0, %xmm0 1250; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1251; AVX-NEXT: vpandn %xmm0, %xmm2, %xmm0 1252; AVX-NEXT: retq 1253; 1254; BITALG_NOVLX-LABEL: eq_1_v16i8: 1255; BITALG_NOVLX: # %bb.0: 1256; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1257; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 1258; BITALG_NOVLX-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1259; BITALG_NOVLX-NEXT: vzeroupper 1260; BITALG_NOVLX-NEXT: retq 1261; 1262; BITALG-LABEL: eq_1_v16i8: 1263; BITALG: # %bb.0: 1264; BITALG-NEXT: vpopcntb %xmm0, %xmm0 1265; BITALG-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1266; BITALG-NEXT: retq 1267 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) 1268 %3 = icmp eq <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1269 %4 = sext <16 x i1> %3 to <16 x i8> 1270 ret <16 x i8> %4 1271} 1272 1273define <16 x i8> @ne_1_v16i8(<16 x i8> %0) { 1274; SSE-LABEL: ne_1_v16i8: 1275; SSE: # %bb.0: 1276; SSE-NEXT: pxor %xmm2, %xmm2 1277; SSE-NEXT: pcmpeqd %xmm3, %xmm3 1278; SSE-NEXT: movdqa %xmm0, %xmm1 1279; SSE-NEXT: paddb %xmm3, %xmm1 1280; SSE-NEXT: pand %xmm0, %xmm1 1281; SSE-NEXT: pcmpeqb %xmm2, %xmm0 1282; SSE-NEXT: pcmpeqb %xmm2, %xmm1 1283; SSE-NEXT: pxor %xmm3, %xmm1 1284; SSE-NEXT: por %xmm0, %xmm1 1285; SSE-NEXT: movdqa %xmm1, %xmm0 1286; SSE-NEXT: retq 1287; 1288; AVX1-LABEL: ne_1_v16i8: 1289; AVX1: # %bb.0: 1290; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1291; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2 1292; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1293; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm4 1294; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 1295; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1296; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 1297; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0 1298; AVX1-NEXT: retq 1299; 1300; AVX2-LABEL: ne_1_v16i8: 1301; AVX2: # %bb.0: 1302; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1303; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2 1304; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1305; AVX2-NEXT: vpaddb %xmm3, %xmm0, %xmm4 1306; AVX2-NEXT: vpand %xmm4, %xmm0, %xmm0 1307; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1308; AVX2-NEXT: vpxor %xmm3, %xmm0, %xmm0 1309; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 1310; AVX2-NEXT: retq 1311; 1312; AVX512VPOPCNTDQ-LABEL: ne_1_v16i8: 1313; AVX512VPOPCNTDQ: # %bb.0: 1314; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 1315; AVX512VPOPCNTDQ-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2 1316; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1317; AVX512VPOPCNTDQ-NEXT: vpaddb %xmm3, %xmm0, %xmm3 1318; AVX512VPOPCNTDQ-NEXT: vpand %xmm3, %xmm0, %xmm0 1319; AVX512VPOPCNTDQ-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1320; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 1321; AVX512VPOPCNTDQ-NEXT: vpor %xmm0, %xmm2, %xmm0 1322; AVX512VPOPCNTDQ-NEXT: vzeroupper 1323; AVX512VPOPCNTDQ-NEXT: retq 1324; 1325; AVX512VPOPCNTDQVL-LABEL: ne_1_v16i8: 1326; AVX512VPOPCNTDQVL: # %bb.0: 1327; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1328; AVX512VPOPCNTDQVL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2 1329; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1330; AVX512VPOPCNTDQVL-NEXT: vpaddb %xmm3, %xmm0, %xmm4 1331; AVX512VPOPCNTDQVL-NEXT: vpand %xmm4, %xmm0, %xmm0 1332; AVX512VPOPCNTDQVL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1333; AVX512VPOPCNTDQVL-NEXT: vpternlogq $222, %xmm3, %xmm2, %xmm0 1334; AVX512VPOPCNTDQVL-NEXT: retq 1335; 1336; BITALG_NOVLX-LABEL: ne_1_v16i8: 1337; BITALG_NOVLX: # %bb.0: 1338; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1339; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 1340; BITALG_NOVLX-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1341; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 1342; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1343; BITALG_NOVLX-NEXT: vzeroupper 1344; BITALG_NOVLX-NEXT: retq 1345; 1346; BITALG-LABEL: ne_1_v16i8: 1347; BITALG: # %bb.0: 1348; BITALG-NEXT: vpopcntb %xmm0, %xmm0 1349; BITALG-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1350; BITALG-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 1351; BITALG-NEXT: retq 1352 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) 1353 %3 = icmp ne <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1354 %4 = sext <16 x i1> %3 to <16 x i8> 1355 ret <16 x i8> %4 1356} 1357 1358declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) 1359declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) 1360declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) 1361declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) 1362