1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL 9 10; 11; Truncate 12; 13 14define i1 @trunc_v2i64_v2i1(<2 x i64>) { 15; SSE-LABEL: trunc_v2i64_v2i1: 16; SSE: # %bb.0: 17; SSE-NEXT: psllq $63, %xmm0 18; SSE-NEXT: movmskpd %xmm0, %eax 19; SSE-NEXT: testl %eax, %eax 20; SSE-NEXT: setne %al 21; SSE-NEXT: retq 22; 23; AVX-LABEL: trunc_v2i64_v2i1: 24; AVX: # %bb.0: 25; AVX-NEXT: vpsllq $63, %xmm0, %xmm0 26; AVX-NEXT: vmovmskpd %xmm0, %eax 27; AVX-NEXT: testl %eax, %eax 28; AVX-NEXT: setne %al 29; AVX-NEXT: retq 30; 31; AVX512F-LABEL: trunc_v2i64_v2i1: 32; AVX512F: # %bb.0: 33; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0 34; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 35; AVX512F-NEXT: kmovw %k0, %eax 36; AVX512F-NEXT: testb $3, %al 37; AVX512F-NEXT: setne %al 38; AVX512F-NEXT: vzeroupper 39; AVX512F-NEXT: retq 40; 41; AVX512BW-LABEL: trunc_v2i64_v2i1: 42; AVX512BW: # %bb.0: 43; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0 44; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 45; AVX512BW-NEXT: kmovd %k0, %eax 46; AVX512BW-NEXT: testb $3, %al 47; AVX512BW-NEXT: setne %al 48; AVX512BW-NEXT: vzeroupper 49; AVX512BW-NEXT: retq 50; 51; AVX512VL-LABEL: trunc_v2i64_v2i1: 52; AVX512VL: # %bb.0: 53; AVX512VL-NEXT: vpsllq $63, %xmm0, %xmm0 54; AVX512VL-NEXT: vptestmq %xmm0, %xmm0, %k0 55; AVX512VL-NEXT: kmovd %k0, %eax 56; AVX512VL-NEXT: testb %al, %al 57; AVX512VL-NEXT: setne %al 58; AVX512VL-NEXT: retq 59 %a = trunc <2 x i64> %0 to <2 x i1> 60 %b = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %a) 61 ret i1 %b 62} 63 64define i1 @trunc_v4i32_v4i1(<4 x i32>) { 65; SSE-LABEL: trunc_v4i32_v4i1: 66; SSE: # %bb.0: 67; SSE-NEXT: pslld $31, %xmm0 68; SSE-NEXT: movmskps %xmm0, %eax 69; SSE-NEXT: testl %eax, %eax 70; SSE-NEXT: setne %al 71; SSE-NEXT: retq 72; 73; AVX-LABEL: trunc_v4i32_v4i1: 74; AVX: # %bb.0: 75; AVX-NEXT: vpslld $31, %xmm0, %xmm0 76; AVX-NEXT: vmovmskps %xmm0, %eax 77; AVX-NEXT: testl %eax, %eax 78; AVX-NEXT: setne %al 79; AVX-NEXT: retq 80; 81; AVX512F-LABEL: trunc_v4i32_v4i1: 82; AVX512F: # %bb.0: 83; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 84; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 85; AVX512F-NEXT: kmovw %k0, %eax 86; AVX512F-NEXT: testb $15, %al 87; AVX512F-NEXT: setne %al 88; AVX512F-NEXT: vzeroupper 89; AVX512F-NEXT: retq 90; 91; AVX512BW-LABEL: trunc_v4i32_v4i1: 92; AVX512BW: # %bb.0: 93; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 94; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0 95; AVX512BW-NEXT: kmovd %k0, %eax 96; AVX512BW-NEXT: testb $15, %al 97; AVX512BW-NEXT: setne %al 98; AVX512BW-NEXT: vzeroupper 99; AVX512BW-NEXT: retq 100; 101; AVX512VL-LABEL: trunc_v4i32_v4i1: 102; AVX512VL: # %bb.0: 103; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 104; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k0 105; AVX512VL-NEXT: kmovd %k0, %eax 106; AVX512VL-NEXT: testb %al, %al 107; AVX512VL-NEXT: setne %al 108; AVX512VL-NEXT: retq 109 %a = trunc <4 x i32> %0 to <4 x i1> 110 %b = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a) 111 ret i1 %b 112} 113 114define i1 @trunc_v8i16_v8i1(<8 x i8>) { 115; SSE2-LABEL: trunc_v8i16_v8i1: 116; SSE2: # %bb.0: 117; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 118; SSE2-NEXT: psllw $15, %xmm0 119; SSE2-NEXT: pmovmskb %xmm0, %eax 120; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA 121; SSE2-NEXT: setne %al 122; SSE2-NEXT: retq 123; 124; SSE41-LABEL: trunc_v8i16_v8i1: 125; SSE41: # %bb.0: 126; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 127; SSE41-NEXT: psllw $15, %xmm0 128; SSE41-NEXT: pmovmskb %xmm0, %eax 129; SSE41-NEXT: testl $43690, %eax # imm = 0xAAAA 130; SSE41-NEXT: setne %al 131; SSE41-NEXT: retq 132; 133; AVX-LABEL: trunc_v8i16_v8i1: 134; AVX: # %bb.0: 135; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 136; AVX-NEXT: vpsllw $15, %xmm0, %xmm0 137; AVX-NEXT: vpmovmskb %xmm0, %eax 138; AVX-NEXT: testl $43690, %eax # imm = 0xAAAA 139; AVX-NEXT: setne %al 140; AVX-NEXT: retq 141; 142; AVX512F-LABEL: trunc_v8i16_v8i1: 143; AVX512F: # %bb.0: 144; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 145; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 146; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 147; AVX512F-NEXT: kmovw %k0, %eax 148; AVX512F-NEXT: testb %al, %al 149; AVX512F-NEXT: setne %al 150; AVX512F-NEXT: vzeroupper 151; AVX512F-NEXT: retq 152; 153; AVX512BW-LABEL: trunc_v8i16_v8i1: 154; AVX512BW: # %bb.0: 155; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0 156; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 157; AVX512BW-NEXT: kmovd %k0, %eax 158; AVX512BW-NEXT: testb %al, %al 159; AVX512BW-NEXT: setne %al 160; AVX512BW-NEXT: vzeroupper 161; AVX512BW-NEXT: retq 162; 163; AVX512VL-LABEL: trunc_v8i16_v8i1: 164; AVX512VL: # %bb.0: 165; AVX512VL-NEXT: vpsllw $7, %xmm0, %xmm0 166; AVX512VL-NEXT: vpmovb2m %xmm0, %k0 167; AVX512VL-NEXT: kmovd %k0, %eax 168; AVX512VL-NEXT: testb %al, %al 169; AVX512VL-NEXT: setne %al 170; AVX512VL-NEXT: retq 171 %a = trunc <8 x i8> %0 to <8 x i1> 172 %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a) 173 ret i1 %b 174} 175 176define i1 @trunc_v16i8_v16i1(<16 x i8>) { 177; SSE-LABEL: trunc_v16i8_v16i1: 178; SSE: # %bb.0: 179; SSE-NEXT: psllw $7, %xmm0 180; SSE-NEXT: pmovmskb %xmm0, %eax 181; SSE-NEXT: testl %eax, %eax 182; SSE-NEXT: setne %al 183; SSE-NEXT: retq 184; 185; AVX-LABEL: trunc_v16i8_v16i1: 186; AVX: # %bb.0: 187; AVX-NEXT: vpsllw $7, %xmm0, %xmm0 188; AVX-NEXT: vpmovmskb %xmm0, %eax 189; AVX-NEXT: testl %eax, %eax 190; AVX-NEXT: setne %al 191; AVX-NEXT: retq 192; 193; AVX512-LABEL: trunc_v16i8_v16i1: 194; AVX512: # %bb.0: 195; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0 196; AVX512-NEXT: vpmovmskb %xmm0, %eax 197; AVX512-NEXT: testl %eax, %eax 198; AVX512-NEXT: setne %al 199; AVX512-NEXT: retq 200 %a = trunc <16 x i8> %0 to <16 x i1> 201 %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a) 202 ret i1 %b 203} 204 205define i1 @trunc_v4i64_v4i1(<4 x i64>) { 206; SSE-LABEL: trunc_v4i64_v4i1: 207; SSE: # %bb.0: 208; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 209; SSE-NEXT: pslld $31, %xmm0 210; SSE-NEXT: movmskps %xmm0, %eax 211; SSE-NEXT: testl %eax, %eax 212; SSE-NEXT: setne %al 213; SSE-NEXT: retq 214; 215; AVX1-LABEL: trunc_v4i64_v4i1: 216; AVX1: # %bb.0: 217; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 218; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 219; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 220; AVX1-NEXT: vmovmskps %xmm0, %eax 221; AVX1-NEXT: testl %eax, %eax 222; AVX1-NEXT: setne %al 223; AVX1-NEXT: vzeroupper 224; AVX1-NEXT: retq 225; 226; AVX2-LABEL: trunc_v4i64_v4i1: 227; AVX2: # %bb.0: 228; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0 229; AVX2-NEXT: vmovmskpd %ymm0, %eax 230; AVX2-NEXT: testl %eax, %eax 231; AVX2-NEXT: setne %al 232; AVX2-NEXT: vzeroupper 233; AVX2-NEXT: retq 234; 235; AVX512F-LABEL: trunc_v4i64_v4i1: 236; AVX512F: # %bb.0: 237; AVX512F-NEXT: vpsllq $63, %ymm0, %ymm0 238; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 239; AVX512F-NEXT: kmovw %k0, %eax 240; AVX512F-NEXT: testb $15, %al 241; AVX512F-NEXT: setne %al 242; AVX512F-NEXT: vzeroupper 243; AVX512F-NEXT: retq 244; 245; AVX512BW-LABEL: trunc_v4i64_v4i1: 246; AVX512BW: # %bb.0: 247; AVX512BW-NEXT: vpsllq $63, %ymm0, %ymm0 248; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 249; AVX512BW-NEXT: kmovd %k0, %eax 250; AVX512BW-NEXT: testb $15, %al 251; AVX512BW-NEXT: setne %al 252; AVX512BW-NEXT: vzeroupper 253; AVX512BW-NEXT: retq 254; 255; AVX512VL-LABEL: trunc_v4i64_v4i1: 256; AVX512VL: # %bb.0: 257; AVX512VL-NEXT: vpsllq $63, %ymm0, %ymm0 258; AVX512VL-NEXT: vptestmq %ymm0, %ymm0, %k0 259; AVX512VL-NEXT: kmovd %k0, %eax 260; AVX512VL-NEXT: testb %al, %al 261; AVX512VL-NEXT: setne %al 262; AVX512VL-NEXT: vzeroupper 263; AVX512VL-NEXT: retq 264 %a = trunc <4 x i64> %0 to <4 x i1> 265 %b = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a) 266 ret i1 %b 267} 268 269define i1 @trunc_v8i32_v8i1(<8 x i32>) { 270; SSE2-LABEL: trunc_v8i32_v8i1: 271; SSE2: # %bb.0: 272; SSE2-NEXT: pslld $16, %xmm1 273; SSE2-NEXT: psrad $16, %xmm1 274; SSE2-NEXT: pslld $16, %xmm0 275; SSE2-NEXT: psrad $16, %xmm0 276; SSE2-NEXT: packssdw %xmm1, %xmm0 277; SSE2-NEXT: psllw $15, %xmm0 278; SSE2-NEXT: pmovmskb %xmm0, %eax 279; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA 280; SSE2-NEXT: setne %al 281; SSE2-NEXT: retq 282; 283; SSE41-LABEL: trunc_v8i32_v8i1: 284; SSE41: # %bb.0: 285; SSE41-NEXT: pxor %xmm2, %xmm2 286; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] 287; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 288; SSE41-NEXT: packusdw %xmm1, %xmm0 289; SSE41-NEXT: psllw $15, %xmm0 290; SSE41-NEXT: pmovmskb %xmm0, %eax 291; SSE41-NEXT: testl $43690, %eax # imm = 0xAAAA 292; SSE41-NEXT: setne %al 293; SSE41-NEXT: retq 294; 295; AVX1-LABEL: trunc_v8i32_v8i1: 296; AVX1: # %bb.0: 297; AVX1-NEXT: vpslld $31, %xmm0, %xmm1 298; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 299; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 300; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 301; AVX1-NEXT: vmovmskps %ymm0, %eax 302; AVX1-NEXT: testl %eax, %eax 303; AVX1-NEXT: setne %al 304; AVX1-NEXT: vzeroupper 305; AVX1-NEXT: retq 306; 307; AVX2-LABEL: trunc_v8i32_v8i1: 308; AVX2: # %bb.0: 309; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 310; AVX2-NEXT: vmovmskps %ymm0, %eax 311; AVX2-NEXT: testl %eax, %eax 312; AVX2-NEXT: setne %al 313; AVX2-NEXT: vzeroupper 314; AVX2-NEXT: retq 315; 316; AVX512F-LABEL: trunc_v8i32_v8i1: 317; AVX512F: # %bb.0: 318; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0 319; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 320; AVX512F-NEXT: kmovw %k0, %eax 321; AVX512F-NEXT: testb %al, %al 322; AVX512F-NEXT: setne %al 323; AVX512F-NEXT: vzeroupper 324; AVX512F-NEXT: retq 325; 326; AVX512BW-LABEL: trunc_v8i32_v8i1: 327; AVX512BW: # %bb.0: 328; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0 329; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0 330; AVX512BW-NEXT: kmovd %k0, %eax 331; AVX512BW-NEXT: testb %al, %al 332; AVX512BW-NEXT: setne %al 333; AVX512BW-NEXT: vzeroupper 334; AVX512BW-NEXT: retq 335; 336; AVX512VL-LABEL: trunc_v8i32_v8i1: 337; AVX512VL: # %bb.0: 338; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0 339; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0 340; AVX512VL-NEXT: kmovd %k0, %eax 341; AVX512VL-NEXT: testb %al, %al 342; AVX512VL-NEXT: setne %al 343; AVX512VL-NEXT: vzeroupper 344; AVX512VL-NEXT: retq 345 %a = trunc <8 x i32> %0 to <8 x i1> 346 %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a) 347 ret i1 %b 348} 349 350define i1 @trunc_v16i16_v16i1(<16 x i16>) { 351; SSE-LABEL: trunc_v16i16_v16i1: 352; SSE: # %bb.0: 353; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 354; SSE-NEXT: pand %xmm2, %xmm1 355; SSE-NEXT: pand %xmm2, %xmm0 356; SSE-NEXT: packuswb %xmm1, %xmm0 357; SSE-NEXT: psllw $7, %xmm0 358; SSE-NEXT: pmovmskb %xmm0, %eax 359; SSE-NEXT: testl %eax, %eax 360; SSE-NEXT: setne %al 361; SSE-NEXT: retq 362; 363; AVX1-LABEL: trunc_v16i16_v16i1: 364; AVX1: # %bb.0: 365; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 366; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 367; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 368; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 369; AVX1-NEXT: vpmovmskb %xmm0, %eax 370; AVX1-NEXT: testl %eax, %eax 371; AVX1-NEXT: setne %al 372; AVX1-NEXT: vzeroupper 373; AVX1-NEXT: retq 374; 375; AVX2-LABEL: trunc_v16i16_v16i1: 376; AVX2: # %bb.0: 377; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 378; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 379; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 380; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0 381; AVX2-NEXT: vpmovmskb %xmm0, %eax 382; AVX2-NEXT: testl %eax, %eax 383; AVX2-NEXT: setne %al 384; AVX2-NEXT: vzeroupper 385; AVX2-NEXT: retq 386; 387; AVX512F-LABEL: trunc_v16i16_v16i1: 388; AVX512F: # %bb.0: 389; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 390; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 391; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 392; AVX512F-NEXT: kortestw %k0, %k0 393; AVX512F-NEXT: setne %al 394; AVX512F-NEXT: vzeroupper 395; AVX512F-NEXT: retq 396; 397; AVX512BW-LABEL: trunc_v16i16_v16i1: 398; AVX512BW: # %bb.0: 399; AVX512BW-NEXT: vpsllw $15, %ymm0, %ymm0 400; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 401; AVX512BW-NEXT: kortestw %k0, %k0 402; AVX512BW-NEXT: setne %al 403; AVX512BW-NEXT: vzeroupper 404; AVX512BW-NEXT: retq 405; 406; AVX512VL-LABEL: trunc_v16i16_v16i1: 407; AVX512VL: # %bb.0: 408; AVX512VL-NEXT: vpsllw $15, %ymm0, %ymm0 409; AVX512VL-NEXT: vpmovw2m %ymm0, %k0 410; AVX512VL-NEXT: kortestw %k0, %k0 411; AVX512VL-NEXT: setne %al 412; AVX512VL-NEXT: vzeroupper 413; AVX512VL-NEXT: retq 414 %a = trunc <16 x i16> %0 to <16 x i1> 415 %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a) 416 ret i1 %b 417} 418 419define i1 @trunc_v32i8_v32i1(<32 x i8>) { 420; SSE-LABEL: trunc_v32i8_v32i1: 421; SSE: # %bb.0: 422; SSE-NEXT: por %xmm1, %xmm0 423; SSE-NEXT: psllw $7, %xmm0 424; SSE-NEXT: pmovmskb %xmm0, %eax 425; SSE-NEXT: testl %eax, %eax 426; SSE-NEXT: setne %al 427; SSE-NEXT: retq 428; 429; AVX1-LABEL: trunc_v32i8_v32i1: 430; AVX1: # %bb.0: 431; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 432; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 433; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 434; AVX1-NEXT: vpmovmskb %xmm0, %eax 435; AVX1-NEXT: testl %eax, %eax 436; AVX1-NEXT: setne %al 437; AVX1-NEXT: vzeroupper 438; AVX1-NEXT: retq 439; 440; AVX2-LABEL: trunc_v32i8_v32i1: 441; AVX2: # %bb.0: 442; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0 443; AVX2-NEXT: vpmovmskb %ymm0, %eax 444; AVX2-NEXT: testl %eax, %eax 445; AVX2-NEXT: setne %al 446; AVX2-NEXT: vzeroupper 447; AVX2-NEXT: retq 448; 449; AVX512F-LABEL: trunc_v32i8_v32i1: 450; AVX512F: # %bb.0: 451; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 452; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 453; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 454; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 455; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 456; AVX512F-NEXT: kshiftrw $8, %k0, %k1 457; AVX512F-NEXT: korw %k1, %k0, %k0 458; AVX512F-NEXT: kshiftrw $4, %k0, %k1 459; AVX512F-NEXT: korw %k1, %k0, %k0 460; AVX512F-NEXT: kshiftrw $2, %k0, %k1 461; AVX512F-NEXT: korw %k1, %k0, %k0 462; AVX512F-NEXT: kshiftrw $1, %k0, %k1 463; AVX512F-NEXT: korw %k1, %k0, %k0 464; AVX512F-NEXT: kmovw %k0, %eax 465; AVX512F-NEXT: # kill: def $al killed $al killed $eax 466; AVX512F-NEXT: vzeroupper 467; AVX512F-NEXT: retq 468; 469; AVX512BW-LABEL: trunc_v32i8_v32i1: 470; AVX512BW: # %bb.0: 471; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0 472; AVX512BW-NEXT: vpmovmskb %ymm0, %eax 473; AVX512BW-NEXT: testl %eax, %eax 474; AVX512BW-NEXT: setne %al 475; AVX512BW-NEXT: vzeroupper 476; AVX512BW-NEXT: retq 477; 478; AVX512VL-LABEL: trunc_v32i8_v32i1: 479; AVX512VL: # %bb.0: 480; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0 481; AVX512VL-NEXT: vpmovmskb %ymm0, %eax 482; AVX512VL-NEXT: testl %eax, %eax 483; AVX512VL-NEXT: setne %al 484; AVX512VL-NEXT: vzeroupper 485; AVX512VL-NEXT: retq 486 %a = trunc <32 x i8> %0 to <32 x i1> 487 %b = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a) 488 ret i1 %b 489} 490 491define i1 @trunc_v8i64_v8i1(<8 x i64>) { 492; SSE2-LABEL: trunc_v8i64_v8i1: 493; SSE2: # %bb.0: 494; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 495; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 496; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 497; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 498; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 499; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] 500; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,0,2,4,5,6,7] 501; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 502; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 503; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 504; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 505; SSE2-NEXT: psllw $15, %xmm2 506; SSE2-NEXT: pmovmskb %xmm2, %eax 507; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA 508; SSE2-NEXT: setne %al 509; SSE2-NEXT: retq 510; 511; SSE41-LABEL: trunc_v8i64_v8i1: 512; SSE41: # %bb.0: 513; SSE41-NEXT: pxor %xmm4, %xmm4 514; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7] 515; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7] 516; SSE41-NEXT: packusdw %xmm3, %xmm2 517; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7] 518; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7] 519; SSE41-NEXT: packusdw %xmm1, %xmm0 520; SSE41-NEXT: packusdw %xmm2, %xmm0 521; SSE41-NEXT: psllw $15, %xmm0 522; SSE41-NEXT: pmovmskb %xmm0, %eax 523; SSE41-NEXT: testl $43690, %eax # imm = 0xAAAA 524; SSE41-NEXT: setne %al 525; SSE41-NEXT: retq 526; 527; AVX1-LABEL: trunc_v8i64_v8i1: 528; AVX1: # %bb.0: 529; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 530; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] 531; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 532; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] 533; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 534; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 535; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 536; AVX1-NEXT: vmovmskps %ymm0, %eax 537; AVX1-NEXT: testl %eax, %eax 538; AVX1-NEXT: setne %al 539; AVX1-NEXT: vzeroupper 540; AVX1-NEXT: retq 541; 542; AVX2-LABEL: trunc_v8i64_v8i1: 543; AVX2: # %bb.0: 544; AVX2-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 545; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 546; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] 547; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 548; AVX2-NEXT: vmovmskps %ymm0, %eax 549; AVX2-NEXT: testl %eax, %eax 550; AVX2-NEXT: setne %al 551; AVX2-NEXT: vzeroupper 552; AVX2-NEXT: retq 553; 554; AVX512F-LABEL: trunc_v8i64_v8i1: 555; AVX512F: # %bb.0: 556; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 557; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 558; AVX512F-NEXT: kmovw %k0, %eax 559; AVX512F-NEXT: testb %al, %al 560; AVX512F-NEXT: setne %al 561; AVX512F-NEXT: vzeroupper 562; AVX512F-NEXT: retq 563; 564; AVX512BW-LABEL: trunc_v8i64_v8i1: 565; AVX512BW: # %bb.0: 566; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0 567; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 568; AVX512BW-NEXT: kmovd %k0, %eax 569; AVX512BW-NEXT: testb %al, %al 570; AVX512BW-NEXT: setne %al 571; AVX512BW-NEXT: vzeroupper 572; AVX512BW-NEXT: retq 573; 574; AVX512VL-LABEL: trunc_v8i64_v8i1: 575; AVX512VL: # %bb.0: 576; AVX512VL-NEXT: vpsllq $63, %zmm0, %zmm0 577; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k0 578; AVX512VL-NEXT: kmovd %k0, %eax 579; AVX512VL-NEXT: testb %al, %al 580; AVX512VL-NEXT: setne %al 581; AVX512VL-NEXT: vzeroupper 582; AVX512VL-NEXT: retq 583 %a = trunc <8 x i64> %0 to <8 x i1> 584 %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a) 585 ret i1 %b 586} 587 588define i1 @trunc_v16i32_v16i1(<16 x i32>) { 589; SSE2-LABEL: trunc_v16i32_v16i1: 590; SSE2: # %bb.0: 591; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 592; SSE2-NEXT: pand %xmm4, %xmm3 593; SSE2-NEXT: pand %xmm4, %xmm2 594; SSE2-NEXT: packuswb %xmm3, %xmm2 595; SSE2-NEXT: pand %xmm4, %xmm1 596; SSE2-NEXT: pand %xmm4, %xmm0 597; SSE2-NEXT: packuswb %xmm1, %xmm0 598; SSE2-NEXT: packuswb %xmm2, %xmm0 599; SSE2-NEXT: psllw $7, %xmm0 600; SSE2-NEXT: pmovmskb %xmm0, %eax 601; SSE2-NEXT: testl %eax, %eax 602; SSE2-NEXT: setne %al 603; SSE2-NEXT: retq 604; 605; SSE41-LABEL: trunc_v16i32_v16i1: 606; SSE41: # %bb.0: 607; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 608; SSE41-NEXT: pand %xmm4, %xmm3 609; SSE41-NEXT: pand %xmm4, %xmm2 610; SSE41-NEXT: packusdw %xmm3, %xmm2 611; SSE41-NEXT: pand %xmm4, %xmm1 612; SSE41-NEXT: pand %xmm4, %xmm0 613; SSE41-NEXT: packusdw %xmm1, %xmm0 614; SSE41-NEXT: packuswb %xmm2, %xmm0 615; SSE41-NEXT: psllw $7, %xmm0 616; SSE41-NEXT: pmovmskb %xmm0, %eax 617; SSE41-NEXT: testl %eax, %eax 618; SSE41-NEXT: setne %al 619; SSE41-NEXT: retq 620; 621; AVX1-LABEL: trunc_v16i32_v16i1: 622; AVX1: # %bb.0: 623; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255] 624; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 625; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 626; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 627; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 628; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 629; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 630; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 631; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 632; AVX1-NEXT: vpmovmskb %xmm0, %eax 633; AVX1-NEXT: testl %eax, %eax 634; AVX1-NEXT: setne %al 635; AVX1-NEXT: vzeroupper 636; AVX1-NEXT: retq 637; 638; AVX2-LABEL: trunc_v16i32_v16i1: 639; AVX2: # %bb.0: 640; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255] 641; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 642; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 643; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 644; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 645; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 646; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 647; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0 648; AVX2-NEXT: vpmovmskb %xmm0, %eax 649; AVX2-NEXT: testl %eax, %eax 650; AVX2-NEXT: setne %al 651; AVX2-NEXT: vzeroupper 652; AVX2-NEXT: retq 653; 654; AVX512-LABEL: trunc_v16i32_v16i1: 655; AVX512: # %bb.0: 656; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 657; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 658; AVX512-NEXT: kortestw %k0, %k0 659; AVX512-NEXT: setne %al 660; AVX512-NEXT: vzeroupper 661; AVX512-NEXT: retq 662 %a = trunc <16 x i32> %0 to <16 x i1> 663 %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a) 664 ret i1 %b 665} 666 667define i1 @trunc_v32i16_v32i1(<32 x i16>) { 668; SSE-LABEL: trunc_v32i16_v32i1: 669; SSE: # %bb.0: 670; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] 671; SSE-NEXT: pand %xmm4, %xmm3 672; SSE-NEXT: pand %xmm4, %xmm2 673; SSE-NEXT: packuswb %xmm3, %xmm2 674; SSE-NEXT: pand %xmm4, %xmm1 675; SSE-NEXT: pand %xmm4, %xmm0 676; SSE-NEXT: packuswb %xmm1, %xmm0 677; SSE-NEXT: por %xmm2, %xmm0 678; SSE-NEXT: psllw $7, %xmm0 679; SSE-NEXT: pmovmskb %xmm0, %eax 680; SSE-NEXT: testl %eax, %eax 681; SSE-NEXT: setne %al 682; SSE-NEXT: retq 683; 684; AVX1-LABEL: trunc_v32i16_v32i1: 685; AVX1: # %bb.0: 686; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 687; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 688; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 689; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 690; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 691; AVX1-NEXT: vpmovmskb %xmm0, %eax 692; AVX1-NEXT: testl %eax, %eax 693; AVX1-NEXT: setne %al 694; AVX1-NEXT: vzeroupper 695; AVX1-NEXT: retq 696; 697; AVX2-LABEL: trunc_v32i16_v32i1: 698; AVX2: # %bb.0: 699; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 700; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 701; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 702; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 703; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 704; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0 705; AVX2-NEXT: vpmovmskb %ymm0, %eax 706; AVX2-NEXT: testl %eax, %eax 707; AVX2-NEXT: setne %al 708; AVX2-NEXT: vzeroupper 709; AVX2-NEXT: retq 710; 711; AVX512F-LABEL: trunc_v32i16_v32i1: 712; AVX512F: # %bb.0: 713; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 714; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 715; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 716; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 717; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 718; AVX512F-NEXT: kshiftrw $8, %k0, %k1 719; AVX512F-NEXT: korw %k1, %k0, %k0 720; AVX512F-NEXT: kshiftrw $4, %k0, %k1 721; AVX512F-NEXT: korw %k1, %k0, %k0 722; AVX512F-NEXT: kshiftrw $2, %k0, %k1 723; AVX512F-NEXT: korw %k1, %k0, %k0 724; AVX512F-NEXT: kshiftrw $1, %k0, %k1 725; AVX512F-NEXT: korw %k1, %k0, %k0 726; AVX512F-NEXT: kmovw %k0, %eax 727; AVX512F-NEXT: # kill: def $al killed $al killed $eax 728; AVX512F-NEXT: vzeroupper 729; AVX512F-NEXT: retq 730; 731; AVX512BW-LABEL: trunc_v32i16_v32i1: 732; AVX512BW: # %bb.0: 733; AVX512BW-NEXT: vpsllw $15, %zmm0, %zmm0 734; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 735; AVX512BW-NEXT: kortestd %k0, %k0 736; AVX512BW-NEXT: setne %al 737; AVX512BW-NEXT: vzeroupper 738; AVX512BW-NEXT: retq 739; 740; AVX512VL-LABEL: trunc_v32i16_v32i1: 741; AVX512VL: # %bb.0: 742; AVX512VL-NEXT: vpsllw $15, %zmm0, %zmm0 743; AVX512VL-NEXT: vpmovw2m %zmm0, %k0 744; AVX512VL-NEXT: kortestd %k0, %k0 745; AVX512VL-NEXT: setne %al 746; AVX512VL-NEXT: vzeroupper 747; AVX512VL-NEXT: retq 748 %a = trunc <32 x i16> %0 to <32 x i1> 749 %b = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a) 750 ret i1 %b 751} 752 753define i1 @trunc_v64i8_v64i1(<64 x i8>) { 754; SSE-LABEL: trunc_v64i8_v64i1: 755; SSE: # %bb.0: 756; SSE-NEXT: por %xmm3, %xmm1 757; SSE-NEXT: por %xmm2, %xmm1 758; SSE-NEXT: por %xmm0, %xmm1 759; SSE-NEXT: psllw $7, %xmm1 760; SSE-NEXT: pmovmskb %xmm1, %eax 761; SSE-NEXT: testl %eax, %eax 762; SSE-NEXT: setne %al 763; SSE-NEXT: retq 764; 765; AVX1-LABEL: trunc_v64i8_v64i1: 766; AVX1: # %bb.0: 767; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 768; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 769; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 770; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 771; AVX1-NEXT: vpmovmskb %xmm0, %eax 772; AVX1-NEXT: testl %eax, %eax 773; AVX1-NEXT: setne %al 774; AVX1-NEXT: vzeroupper 775; AVX1-NEXT: retq 776; 777; AVX2-LABEL: trunc_v64i8_v64i1: 778; AVX2: # %bb.0: 779; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 780; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0 781; AVX2-NEXT: vpmovmskb %ymm0, %eax 782; AVX2-NEXT: testl %eax, %eax 783; AVX2-NEXT: setne %al 784; AVX2-NEXT: vzeroupper 785; AVX2-NEXT: retq 786; 787; AVX512F-LABEL: trunc_v64i8_v64i1: 788; AVX512F: # %bb.0: 789; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 790; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 791; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3 792; AVX512F-NEXT: vpor %xmm2, %xmm3, %xmm2 793; AVX512F-NEXT: vpor %xmm2, %xmm1, %xmm1 794; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 795; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 796; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 797; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 798; AVX512F-NEXT: kshiftrw $8, %k0, %k1 799; AVX512F-NEXT: korw %k1, %k0, %k0 800; AVX512F-NEXT: kshiftrw $4, %k0, %k1 801; AVX512F-NEXT: korw %k1, %k0, %k0 802; AVX512F-NEXT: kshiftrw $2, %k0, %k1 803; AVX512F-NEXT: korw %k1, %k0, %k0 804; AVX512F-NEXT: kshiftrw $1, %k0, %k1 805; AVX512F-NEXT: korw %k1, %k0, %k0 806; AVX512F-NEXT: kmovw %k0, %eax 807; AVX512F-NEXT: # kill: def $al killed $al killed $eax 808; AVX512F-NEXT: vzeroupper 809; AVX512F-NEXT: retq 810; 811; AVX512BW-LABEL: trunc_v64i8_v64i1: 812; AVX512BW: # %bb.0: 813; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0 814; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 815; AVX512BW-NEXT: kortestq %k0, %k0 816; AVX512BW-NEXT: setne %al 817; AVX512BW-NEXT: vzeroupper 818; AVX512BW-NEXT: retq 819; 820; AVX512VL-LABEL: trunc_v64i8_v64i1: 821; AVX512VL: # %bb.0: 822; AVX512VL-NEXT: vpsllw $7, %zmm0, %zmm0 823; AVX512VL-NEXT: vpmovb2m %zmm0, %k0 824; AVX512VL-NEXT: kortestq %k0, %k0 825; AVX512VL-NEXT: setne %al 826; AVX512VL-NEXT: vzeroupper 827; AVX512VL-NEXT: retq 828 %a = trunc <64 x i8> %0 to <64 x i1> 829 %b = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> %a) 830 ret i1 %b 831} 832 833; 834; Comparison 835; 836 837define i1 @icmp_v2i64_v2i1(<2 x i64>) { 838; SSE2-LABEL: icmp_v2i64_v2i1: 839; SSE2: # %bb.0: 840; SSE2-NEXT: pxor %xmm1, %xmm1 841; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 842; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 843; SSE2-NEXT: pand %xmm1, %xmm0 844; SSE2-NEXT: movmskpd %xmm0, %eax 845; SSE2-NEXT: testl %eax, %eax 846; SSE2-NEXT: setne %al 847; SSE2-NEXT: retq 848; 849; SSE41-LABEL: icmp_v2i64_v2i1: 850; SSE41: # %bb.0: 851; SSE41-NEXT: pxor %xmm1, %xmm1 852; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 853; SSE41-NEXT: movmskpd %xmm1, %eax 854; SSE41-NEXT: testl %eax, %eax 855; SSE41-NEXT: setne %al 856; SSE41-NEXT: retq 857; 858; AVX-LABEL: icmp_v2i64_v2i1: 859; AVX: # %bb.0: 860; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 861; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 862; AVX-NEXT: vmovmskpd %xmm0, %eax 863; AVX-NEXT: testl %eax, %eax 864; AVX-NEXT: setne %al 865; AVX-NEXT: retq 866; 867; AVX512F-LABEL: icmp_v2i64_v2i1: 868; AVX512F: # %bb.0: 869; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 870; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 871; AVX512F-NEXT: kmovw %k0, %eax 872; AVX512F-NEXT: testb $3, %al 873; AVX512F-NEXT: setne %al 874; AVX512F-NEXT: vzeroupper 875; AVX512F-NEXT: retq 876; 877; AVX512BW-LABEL: icmp_v2i64_v2i1: 878; AVX512BW: # %bb.0: 879; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 880; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0 881; AVX512BW-NEXT: kmovd %k0, %eax 882; AVX512BW-NEXT: testb $3, %al 883; AVX512BW-NEXT: setne %al 884; AVX512BW-NEXT: vzeroupper 885; AVX512BW-NEXT: retq 886; 887; AVX512VL-LABEL: icmp_v2i64_v2i1: 888; AVX512VL: # %bb.0: 889; AVX512VL-NEXT: vptestnmq %xmm0, %xmm0, %k0 890; AVX512VL-NEXT: kmovd %k0, %eax 891; AVX512VL-NEXT: testb %al, %al 892; AVX512VL-NEXT: setne %al 893; AVX512VL-NEXT: retq 894 %a = icmp eq <2 x i64> %0, zeroinitializer 895 %b = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %a) 896 ret i1 %b 897} 898 899define i1 @icmp_v4i32_v4i1(<4 x i32>) { 900; SSE-LABEL: icmp_v4i32_v4i1: 901; SSE: # %bb.0: 902; SSE-NEXT: pxor %xmm1, %xmm1 903; SSE-NEXT: pcmpeqd %xmm0, %xmm1 904; SSE-NEXT: movmskps %xmm1, %eax 905; SSE-NEXT: testl %eax, %eax 906; SSE-NEXT: setne %al 907; SSE-NEXT: retq 908; 909; AVX-LABEL: icmp_v4i32_v4i1: 910; AVX: # %bb.0: 911; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 912; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 913; AVX-NEXT: vmovmskps %xmm0, %eax 914; AVX-NEXT: testl %eax, %eax 915; AVX-NEXT: setne %al 916; AVX-NEXT: retq 917; 918; AVX512F-LABEL: icmp_v4i32_v4i1: 919; AVX512F: # %bb.0: 920; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 921; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 922; AVX512F-NEXT: kmovw %k0, %eax 923; AVX512F-NEXT: testb $15, %al 924; AVX512F-NEXT: setne %al 925; AVX512F-NEXT: vzeroupper 926; AVX512F-NEXT: retq 927; 928; AVX512BW-LABEL: icmp_v4i32_v4i1: 929; AVX512BW: # %bb.0: 930; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 931; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 932; AVX512BW-NEXT: kmovd %k0, %eax 933; AVX512BW-NEXT: testb $15, %al 934; AVX512BW-NEXT: setne %al 935; AVX512BW-NEXT: vzeroupper 936; AVX512BW-NEXT: retq 937; 938; AVX512VL-LABEL: icmp_v4i32_v4i1: 939; AVX512VL: # %bb.0: 940; AVX512VL-NEXT: vptestnmd %xmm0, %xmm0, %k0 941; AVX512VL-NEXT: kmovd %k0, %eax 942; AVX512VL-NEXT: testb %al, %al 943; AVX512VL-NEXT: setne %al 944; AVX512VL-NEXT: retq 945 %a = icmp eq <4 x i32> %0, zeroinitializer 946 %b = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a) 947 ret i1 %b 948} 949 950define i1 @icmp_v8i16_v8i1(<8 x i8>) { 951; SSE-LABEL: icmp_v8i16_v8i1: 952; SSE: # %bb.0: 953; SSE-NEXT: pxor %xmm1, %xmm1 954; SSE-NEXT: pcmpeqb %xmm0, %xmm1 955; SSE-NEXT: pmovmskb %xmm1, %eax 956; SSE-NEXT: testb %al, %al 957; SSE-NEXT: setne %al 958; SSE-NEXT: retq 959; 960; AVX-LABEL: icmp_v8i16_v8i1: 961; AVX: # %bb.0: 962; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 963; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 964; AVX-NEXT: vpmovmskb %xmm0, %eax 965; AVX-NEXT: testb %al, %al 966; AVX-NEXT: setne %al 967; AVX-NEXT: retq 968; 969; AVX512F-LABEL: icmp_v8i16_v8i1: 970; AVX512F: # %bb.0: 971; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 972; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 973; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 974; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 975; AVX512F-NEXT: kmovw %k0, %eax 976; AVX512F-NEXT: testb %al, %al 977; AVX512F-NEXT: setne %al 978; AVX512F-NEXT: vzeroupper 979; AVX512F-NEXT: retq 980; 981; AVX512BW-LABEL: icmp_v8i16_v8i1: 982; AVX512BW: # %bb.0: 983; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 984; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0 985; AVX512BW-NEXT: kmovd %k0, %eax 986; AVX512BW-NEXT: testb %al, %al 987; AVX512BW-NEXT: setne %al 988; AVX512BW-NEXT: vzeroupper 989; AVX512BW-NEXT: retq 990; 991; AVX512VL-LABEL: icmp_v8i16_v8i1: 992; AVX512VL: # %bb.0: 993; AVX512VL-NEXT: vptestnmb %xmm0, %xmm0, %k0 994; AVX512VL-NEXT: kmovd %k0, %eax 995; AVX512VL-NEXT: testb %al, %al 996; AVX512VL-NEXT: setne %al 997; AVX512VL-NEXT: retq 998 %a = icmp eq <8 x i8> %0, zeroinitializer 999 %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a) 1000 ret i1 %b 1001} 1002 1003define i1 @icmp_v16i8_v16i1(<16 x i8>) { 1004; SSE-LABEL: icmp_v16i8_v16i1: 1005; SSE: # %bb.0: 1006; SSE-NEXT: pxor %xmm1, %xmm1 1007; SSE-NEXT: pcmpeqb %xmm0, %xmm1 1008; SSE-NEXT: pmovmskb %xmm1, %eax 1009; SSE-NEXT: testl %eax, %eax 1010; SSE-NEXT: setne %al 1011; SSE-NEXT: retq 1012; 1013; AVX-LABEL: icmp_v16i8_v16i1: 1014; AVX: # %bb.0: 1015; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1016; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1017; AVX-NEXT: vpmovmskb %xmm0, %eax 1018; AVX-NEXT: testl %eax, %eax 1019; AVX-NEXT: setne %al 1020; AVX-NEXT: retq 1021; 1022; AVX512F-LABEL: icmp_v16i8_v16i1: 1023; AVX512F: # %bb.0: 1024; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 1025; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1026; AVX512F-NEXT: vpmovmskb %xmm0, %eax 1027; AVX512F-NEXT: testl %eax, %eax 1028; AVX512F-NEXT: setne %al 1029; AVX512F-NEXT: retq 1030; 1031; AVX512BW-LABEL: icmp_v16i8_v16i1: 1032; AVX512BW: # %bb.0: 1033; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1034; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0 1035; AVX512BW-NEXT: kortestw %k0, %k0 1036; AVX512BW-NEXT: setne %al 1037; AVX512BW-NEXT: vzeroupper 1038; AVX512BW-NEXT: retq 1039; 1040; AVX512VL-LABEL: icmp_v16i8_v16i1: 1041; AVX512VL: # %bb.0: 1042; AVX512VL-NEXT: vptestnmb %xmm0, %xmm0, %k0 1043; AVX512VL-NEXT: kortestw %k0, %k0 1044; AVX512VL-NEXT: setne %al 1045; AVX512VL-NEXT: retq 1046 %a = icmp eq <16 x i8> %0, zeroinitializer 1047 %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a) 1048 ret i1 %b 1049} 1050 1051define i1 @icmp_v4i64_v4i1(<4 x i64>) { 1052; SSE2-LABEL: icmp_v4i64_v4i1: 1053; SSE2: # %bb.0: 1054; SSE2-NEXT: pxor %xmm2, %xmm2 1055; SSE2-NEXT: pcmpeqd %xmm2, %xmm1 1056; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2] 1057; SSE2-NEXT: pand %xmm1, %xmm3 1058; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 1059; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] 1060; SSE2-NEXT: pand %xmm0, %xmm1 1061; SSE2-NEXT: packssdw %xmm3, %xmm1 1062; SSE2-NEXT: movmskps %xmm1, %eax 1063; SSE2-NEXT: testl %eax, %eax 1064; SSE2-NEXT: setne %al 1065; SSE2-NEXT: retq 1066; 1067; SSE41-LABEL: icmp_v4i64_v4i1: 1068; SSE41: # %bb.0: 1069; SSE41-NEXT: pxor %xmm2, %xmm2 1070; SSE41-NEXT: pcmpeqq %xmm2, %xmm1 1071; SSE41-NEXT: pcmpeqq %xmm2, %xmm0 1072; SSE41-NEXT: packssdw %xmm1, %xmm0 1073; SSE41-NEXT: movmskps %xmm0, %eax 1074; SSE41-NEXT: testl %eax, %eax 1075; SSE41-NEXT: setne %al 1076; SSE41-NEXT: retq 1077; 1078; AVX1-LABEL: icmp_v4i64_v4i1: 1079; AVX1: # %bb.0: 1080; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1081; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1082; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 1083; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 1084; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1085; AVX1-NEXT: vmovmskpd %ymm0, %eax 1086; AVX1-NEXT: testl %eax, %eax 1087; AVX1-NEXT: setne %al 1088; AVX1-NEXT: vzeroupper 1089; AVX1-NEXT: retq 1090; 1091; AVX2-LABEL: icmp_v4i64_v4i1: 1092; AVX2: # %bb.0: 1093; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1094; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 1095; AVX2-NEXT: vmovmskpd %ymm0, %eax 1096; AVX2-NEXT: testl %eax, %eax 1097; AVX2-NEXT: setne %al 1098; AVX2-NEXT: vzeroupper 1099; AVX2-NEXT: retq 1100; 1101; AVX512F-LABEL: icmp_v4i64_v4i1: 1102; AVX512F: # %bb.0: 1103; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1104; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 1105; AVX512F-NEXT: kmovw %k0, %eax 1106; AVX512F-NEXT: testb $15, %al 1107; AVX512F-NEXT: setne %al 1108; AVX512F-NEXT: vzeroupper 1109; AVX512F-NEXT: retq 1110; 1111; AVX512BW-LABEL: icmp_v4i64_v4i1: 1112; AVX512BW: # %bb.0: 1113; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1114; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0 1115; AVX512BW-NEXT: kmovd %k0, %eax 1116; AVX512BW-NEXT: testb $15, %al 1117; AVX512BW-NEXT: setne %al 1118; AVX512BW-NEXT: vzeroupper 1119; AVX512BW-NEXT: retq 1120; 1121; AVX512VL-LABEL: icmp_v4i64_v4i1: 1122; AVX512VL: # %bb.0: 1123; AVX512VL-NEXT: vptestnmq %ymm0, %ymm0, %k0 1124; AVX512VL-NEXT: kmovd %k0, %eax 1125; AVX512VL-NEXT: testb %al, %al 1126; AVX512VL-NEXT: setne %al 1127; AVX512VL-NEXT: vzeroupper 1128; AVX512VL-NEXT: retq 1129 %a = icmp eq <4 x i64> %0, zeroinitializer 1130 %b = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a) 1131 ret i1 %b 1132} 1133 1134define i1 @icmp_v8i32_v8i1(<8 x i32>) { 1135; SSE-LABEL: icmp_v8i32_v8i1: 1136; SSE: # %bb.0: 1137; SSE-NEXT: pxor %xmm2, %xmm2 1138; SSE-NEXT: pcmpeqd %xmm2, %xmm1 1139; SSE-NEXT: pcmpeqd %xmm2, %xmm0 1140; SSE-NEXT: packssdw %xmm1, %xmm0 1141; SSE-NEXT: pmovmskb %xmm0, %eax 1142; SSE-NEXT: testl %eax, %eax 1143; SSE-NEXT: setne %al 1144; SSE-NEXT: retq 1145; 1146; AVX1-LABEL: icmp_v8i32_v8i1: 1147; AVX1: # %bb.0: 1148; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1149; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1150; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1 1151; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 1152; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1153; AVX1-NEXT: vmovmskps %ymm0, %eax 1154; AVX1-NEXT: testl %eax, %eax 1155; AVX1-NEXT: setne %al 1156; AVX1-NEXT: vzeroupper 1157; AVX1-NEXT: retq 1158; 1159; AVX2-LABEL: icmp_v8i32_v8i1: 1160; AVX2: # %bb.0: 1161; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1162; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 1163; AVX2-NEXT: vmovmskps %ymm0, %eax 1164; AVX2-NEXT: testl %eax, %eax 1165; AVX2-NEXT: setne %al 1166; AVX2-NEXT: vzeroupper 1167; AVX2-NEXT: retq 1168; 1169; AVX512F-LABEL: icmp_v8i32_v8i1: 1170; AVX512F: # %bb.0: 1171; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1172; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 1173; AVX512F-NEXT: kmovw %k0, %eax 1174; AVX512F-NEXT: testb %al, %al 1175; AVX512F-NEXT: setne %al 1176; AVX512F-NEXT: vzeroupper 1177; AVX512F-NEXT: retq 1178; 1179; AVX512BW-LABEL: icmp_v8i32_v8i1: 1180; AVX512BW: # %bb.0: 1181; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1182; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 1183; AVX512BW-NEXT: kmovd %k0, %eax 1184; AVX512BW-NEXT: testb %al, %al 1185; AVX512BW-NEXT: setne %al 1186; AVX512BW-NEXT: vzeroupper 1187; AVX512BW-NEXT: retq 1188; 1189; AVX512VL-LABEL: icmp_v8i32_v8i1: 1190; AVX512VL: # %bb.0: 1191; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0 1192; AVX512VL-NEXT: kmovd %k0, %eax 1193; AVX512VL-NEXT: testb %al, %al 1194; AVX512VL-NEXT: setne %al 1195; AVX512VL-NEXT: vzeroupper 1196; AVX512VL-NEXT: retq 1197 %a = icmp eq <8 x i32> %0, zeroinitializer 1198 %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a) 1199 ret i1 %b 1200} 1201 1202define i1 @icmp_v16i16_v16i1(<16 x i16>) { 1203; SSE-LABEL: icmp_v16i16_v16i1: 1204; SSE: # %bb.0: 1205; SSE-NEXT: pxor %xmm2, %xmm2 1206; SSE-NEXT: pcmpeqw %xmm2, %xmm1 1207; SSE-NEXT: pcmpeqw %xmm2, %xmm0 1208; SSE-NEXT: packsswb %xmm1, %xmm0 1209; SSE-NEXT: pmovmskb %xmm0, %eax 1210; SSE-NEXT: testl %eax, %eax 1211; SSE-NEXT: setne %al 1212; SSE-NEXT: retq 1213; 1214; AVX1-LABEL: icmp_v16i16_v16i1: 1215; AVX1: # %bb.0: 1216; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1217; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1218; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 1219; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 1220; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 1221; AVX1-NEXT: vpmovmskb %xmm0, %eax 1222; AVX1-NEXT: testl %eax, %eax 1223; AVX1-NEXT: setne %al 1224; AVX1-NEXT: vzeroupper 1225; AVX1-NEXT: retq 1226; 1227; AVX2-LABEL: icmp_v16i16_v16i1: 1228; AVX2: # %bb.0: 1229; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1230; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 1231; AVX2-NEXT: vpmovmskb %ymm0, %eax 1232; AVX2-NEXT: testl %eax, %eax 1233; AVX2-NEXT: setne %al 1234; AVX2-NEXT: vzeroupper 1235; AVX2-NEXT: retq 1236; 1237; AVX512F-LABEL: icmp_v16i16_v16i1: 1238; AVX512F: # %bb.0: 1239; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 1240; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 1241; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 1242; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 1243; AVX512F-NEXT: kortestw %k0, %k0 1244; AVX512F-NEXT: setne %al 1245; AVX512F-NEXT: vzeroupper 1246; AVX512F-NEXT: retq 1247; 1248; AVX512BW-LABEL: icmp_v16i16_v16i1: 1249; AVX512BW: # %bb.0: 1250; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1251; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0 1252; AVX512BW-NEXT: kortestw %k0, %k0 1253; AVX512BW-NEXT: setne %al 1254; AVX512BW-NEXT: vzeroupper 1255; AVX512BW-NEXT: retq 1256; 1257; AVX512VL-LABEL: icmp_v16i16_v16i1: 1258; AVX512VL: # %bb.0: 1259; AVX512VL-NEXT: vptestnmw %ymm0, %ymm0, %k0 1260; AVX512VL-NEXT: kortestw %k0, %k0 1261; AVX512VL-NEXT: setne %al 1262; AVX512VL-NEXT: vzeroupper 1263; AVX512VL-NEXT: retq 1264 %a = icmp eq <16 x i16> %0, zeroinitializer 1265 %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a) 1266 ret i1 %b 1267} 1268 1269define i1 @icmp_v32i8_v32i1(<32 x i8>) { 1270; SSE-LABEL: icmp_v32i8_v32i1: 1271; SSE: # %bb.0: 1272; SSE-NEXT: pxor %xmm2, %xmm2 1273; SSE-NEXT: pcmpeqb %xmm2, %xmm1 1274; SSE-NEXT: pcmpeqb %xmm2, %xmm0 1275; SSE-NEXT: por %xmm1, %xmm0 1276; SSE-NEXT: pmovmskb %xmm0, %eax 1277; SSE-NEXT: testl %eax, %eax 1278; SSE-NEXT: setne %al 1279; SSE-NEXT: retq 1280; 1281; AVX1-LABEL: icmp_v32i8_v32i1: 1282; AVX1: # %bb.0: 1283; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1284; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1285; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 1286; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 1287; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 1288; AVX1-NEXT: vpmovmskb %xmm0, %eax 1289; AVX1-NEXT: testl %eax, %eax 1290; AVX1-NEXT: setne %al 1291; AVX1-NEXT: vzeroupper 1292; AVX1-NEXT: retq 1293; 1294; AVX2-LABEL: icmp_v32i8_v32i1: 1295; AVX2: # %bb.0: 1296; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1297; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 1298; AVX2-NEXT: vpmovmskb %ymm0, %eax 1299; AVX2-NEXT: testl %eax, %eax 1300; AVX2-NEXT: setne %al 1301; AVX2-NEXT: vzeroupper 1302; AVX2-NEXT: retq 1303; 1304; AVX512F-LABEL: icmp_v32i8_v32i1: 1305; AVX512F: # %bb.0: 1306; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 1307; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 1308; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 1309; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 1310; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 1311; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 1312; AVX512F-NEXT: kshiftrw $8, %k0, %k1 1313; AVX512F-NEXT: korw %k1, %k0, %k0 1314; AVX512F-NEXT: kshiftrw $4, %k0, %k1 1315; AVX512F-NEXT: korw %k1, %k0, %k0 1316; AVX512F-NEXT: kshiftrw $2, %k0, %k1 1317; AVX512F-NEXT: korw %k1, %k0, %k0 1318; AVX512F-NEXT: kshiftrw $1, %k0, %k1 1319; AVX512F-NEXT: korw %k1, %k0, %k0 1320; AVX512F-NEXT: kmovw %k0, %eax 1321; AVX512F-NEXT: # kill: def $al killed $al killed $eax 1322; AVX512F-NEXT: vzeroupper 1323; AVX512F-NEXT: retq 1324; 1325; AVX512BW-LABEL: icmp_v32i8_v32i1: 1326; AVX512BW: # %bb.0: 1327; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1328; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0 1329; AVX512BW-NEXT: kortestd %k0, %k0 1330; AVX512BW-NEXT: setne %al 1331; AVX512BW-NEXT: vzeroupper 1332; AVX512BW-NEXT: retq 1333; 1334; AVX512VL-LABEL: icmp_v32i8_v32i1: 1335; AVX512VL: # %bb.0: 1336; AVX512VL-NEXT: vptestnmb %ymm0, %ymm0, %k0 1337; AVX512VL-NEXT: kortestd %k0, %k0 1338; AVX512VL-NEXT: setne %al 1339; AVX512VL-NEXT: vzeroupper 1340; AVX512VL-NEXT: retq 1341 %a = icmp eq <32 x i8> %0, zeroinitializer 1342 %b = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a) 1343 ret i1 %b 1344} 1345 1346define i1 @icmp_v8i64_v8i1(<8 x i64>) { 1347; SSE2-LABEL: icmp_v8i64_v8i1: 1348; SSE2: # %bb.0: 1349; SSE2-NEXT: pxor %xmm4, %xmm4 1350; SSE2-NEXT: pcmpeqd %xmm4, %xmm3 1351; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2] 1352; SSE2-NEXT: pand %xmm3, %xmm5 1353; SSE2-NEXT: pcmpeqd %xmm4, %xmm2 1354; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2] 1355; SSE2-NEXT: pand %xmm2, %xmm3 1356; SSE2-NEXT: packssdw %xmm5, %xmm3 1357; SSE2-NEXT: pcmpeqd %xmm4, %xmm1 1358; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2] 1359; SSE2-NEXT: pand %xmm1, %xmm2 1360; SSE2-NEXT: pcmpeqd %xmm4, %xmm0 1361; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] 1362; SSE2-NEXT: pand %xmm0, %xmm1 1363; SSE2-NEXT: packssdw %xmm2, %xmm1 1364; SSE2-NEXT: packssdw %xmm3, %xmm1 1365; SSE2-NEXT: pmovmskb %xmm1, %eax 1366; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA 1367; SSE2-NEXT: setne %al 1368; SSE2-NEXT: retq 1369; 1370; SSE41-LABEL: icmp_v8i64_v8i1: 1371; SSE41: # %bb.0: 1372; SSE41-NEXT: pxor %xmm4, %xmm4 1373; SSE41-NEXT: pcmpeqq %xmm4, %xmm3 1374; SSE41-NEXT: pcmpeqq %xmm4, %xmm2 1375; SSE41-NEXT: packssdw %xmm3, %xmm2 1376; SSE41-NEXT: pcmpeqq %xmm4, %xmm1 1377; SSE41-NEXT: pcmpeqq %xmm4, %xmm0 1378; SSE41-NEXT: packssdw %xmm1, %xmm0 1379; SSE41-NEXT: packssdw %xmm2, %xmm0 1380; SSE41-NEXT: pmovmskb %xmm0, %eax 1381; SSE41-NEXT: testl $43690, %eax # imm = 0xAAAA 1382; SSE41-NEXT: setne %al 1383; SSE41-NEXT: retq 1384; 1385; AVX1-LABEL: icmp_v8i64_v8i1: 1386; AVX1: # %bb.0: 1387; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1388; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 1389; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2 1390; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1 1391; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 1392; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1393; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2 1394; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0 1395; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 1396; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1397; AVX1-NEXT: vmovmskps %ymm0, %eax 1398; AVX1-NEXT: testl %eax, %eax 1399; AVX1-NEXT: setne %al 1400; AVX1-NEXT: vzeroupper 1401; AVX1-NEXT: retq 1402; 1403; AVX2-LABEL: icmp_v8i64_v8i1: 1404; AVX2: # %bb.0: 1405; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 1406; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1 1407; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0 1408; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 1409; AVX2-NEXT: vmovmskps %ymm0, %eax 1410; AVX2-NEXT: testl %eax, %eax 1411; AVX2-NEXT: setne %al 1412; AVX2-NEXT: vzeroupper 1413; AVX2-NEXT: retq 1414; 1415; AVX512F-LABEL: icmp_v8i64_v8i1: 1416; AVX512F: # %bb.0: 1417; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 1418; AVX512F-NEXT: kmovw %k0, %eax 1419; AVX512F-NEXT: testb %al, %al 1420; AVX512F-NEXT: setne %al 1421; AVX512F-NEXT: vzeroupper 1422; AVX512F-NEXT: retq 1423; 1424; AVX512BW-LABEL: icmp_v8i64_v8i1: 1425; AVX512BW: # %bb.0: 1426; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0 1427; AVX512BW-NEXT: kmovd %k0, %eax 1428; AVX512BW-NEXT: testb %al, %al 1429; AVX512BW-NEXT: setne %al 1430; AVX512BW-NEXT: vzeroupper 1431; AVX512BW-NEXT: retq 1432; 1433; AVX512VL-LABEL: icmp_v8i64_v8i1: 1434; AVX512VL: # %bb.0: 1435; AVX512VL-NEXT: vptestnmq %zmm0, %zmm0, %k0 1436; AVX512VL-NEXT: kmovd %k0, %eax 1437; AVX512VL-NEXT: testb %al, %al 1438; AVX512VL-NEXT: setne %al 1439; AVX512VL-NEXT: vzeroupper 1440; AVX512VL-NEXT: retq 1441 %a = icmp eq <8 x i64> %0, zeroinitializer 1442 %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a) 1443 ret i1 %b 1444} 1445 1446define i1 @icmp_v16i32_v16i1(<16 x i32>) { 1447; SSE-LABEL: icmp_v16i32_v16i1: 1448; SSE: # %bb.0: 1449; SSE-NEXT: pxor %xmm4, %xmm4 1450; SSE-NEXT: pcmpeqd %xmm4, %xmm3 1451; SSE-NEXT: pcmpeqd %xmm4, %xmm2 1452; SSE-NEXT: packssdw %xmm3, %xmm2 1453; SSE-NEXT: pcmpeqd %xmm4, %xmm1 1454; SSE-NEXT: pcmpeqd %xmm4, %xmm0 1455; SSE-NEXT: packssdw %xmm1, %xmm0 1456; SSE-NEXT: packsswb %xmm2, %xmm0 1457; SSE-NEXT: pmovmskb %xmm0, %eax 1458; SSE-NEXT: testl %eax, %eax 1459; SSE-NEXT: setne %al 1460; SSE-NEXT: retq 1461; 1462; AVX1-LABEL: icmp_v16i32_v16i1: 1463; AVX1: # %bb.0: 1464; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1465; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 1466; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 1467; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1 1468; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 1469; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1470; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 1471; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 1472; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 1473; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 1474; AVX1-NEXT: vpmovmskb %xmm0, %eax 1475; AVX1-NEXT: testl %eax, %eax 1476; AVX1-NEXT: setne %al 1477; AVX1-NEXT: vzeroupper 1478; AVX1-NEXT: retq 1479; 1480; AVX2-LABEL: icmp_v16i32_v16i1: 1481; AVX2: # %bb.0: 1482; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 1483; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1 1484; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0 1485; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 1486; AVX2-NEXT: vpmovmskb %ymm0, %eax 1487; AVX2-NEXT: testl %eax, %eax 1488; AVX2-NEXT: setne %al 1489; AVX2-NEXT: vzeroupper 1490; AVX2-NEXT: retq 1491; 1492; AVX512-LABEL: icmp_v16i32_v16i1: 1493; AVX512: # %bb.0: 1494; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 1495; AVX512-NEXT: kortestw %k0, %k0 1496; AVX512-NEXT: setne %al 1497; AVX512-NEXT: vzeroupper 1498; AVX512-NEXT: retq 1499 %a = icmp eq <16 x i32> %0, zeroinitializer 1500 %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a) 1501 ret i1 %b 1502} 1503 1504define i1 @icmp_v32i16_v32i1(<32 x i16>) { 1505; SSE-LABEL: icmp_v32i16_v32i1: 1506; SSE: # %bb.0: 1507; SSE-NEXT: pxor %xmm4, %xmm4 1508; SSE-NEXT: pcmpeqw %xmm4, %xmm1 1509; SSE-NEXT: pcmpeqw %xmm4, %xmm0 1510; SSE-NEXT: packsswb %xmm1, %xmm0 1511; SSE-NEXT: pcmpeqw %xmm4, %xmm3 1512; SSE-NEXT: pcmpeqw %xmm4, %xmm2 1513; SSE-NEXT: packsswb %xmm3, %xmm2 1514; SSE-NEXT: por %xmm0, %xmm2 1515; SSE-NEXT: pmovmskb %xmm2, %eax 1516; SSE-NEXT: testl %eax, %eax 1517; SSE-NEXT: setne %al 1518; SSE-NEXT: retq 1519; 1520; AVX1-LABEL: icmp_v32i16_v32i1: 1521; AVX1: # %bb.0: 1522; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1523; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 1524; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 1525; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1 1526; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 1527; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1528; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 1529; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 1530; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 1531; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 1532; AVX1-NEXT: vpmovmskb %xmm0, %eax 1533; AVX1-NEXT: testl %eax, %eax 1534; AVX1-NEXT: setne %al 1535; AVX1-NEXT: vzeroupper 1536; AVX1-NEXT: retq 1537; 1538; AVX2-LABEL: icmp_v32i16_v32i1: 1539; AVX2: # %bb.0: 1540; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 1541; AVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1 1542; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 1543; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 1544; AVX2-NEXT: vpmovmskb %ymm0, %eax 1545; AVX2-NEXT: testl %eax, %eax 1546; AVX2-NEXT: setne %al 1547; AVX2-NEXT: vzeroupper 1548; AVX2-NEXT: retq 1549; 1550; AVX512F-LABEL: icmp_v32i16_v32i1: 1551; AVX512F: # %bb.0: 1552; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1553; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 1554; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1 1555; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 1556; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 1557; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 1558; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 1559; AVX512F-NEXT: kshiftrw $8, %k0, %k1 1560; AVX512F-NEXT: korw %k1, %k0, %k0 1561; AVX512F-NEXT: kshiftrw $4, %k0, %k1 1562; AVX512F-NEXT: korw %k1, %k0, %k0 1563; AVX512F-NEXT: kshiftrw $2, %k0, %k1 1564; AVX512F-NEXT: korw %k1, %k0, %k0 1565; AVX512F-NEXT: kshiftrw $1, %k0, %k1 1566; AVX512F-NEXT: korw %k1, %k0, %k0 1567; AVX512F-NEXT: kmovw %k0, %eax 1568; AVX512F-NEXT: # kill: def $al killed $al killed $eax 1569; AVX512F-NEXT: vzeroupper 1570; AVX512F-NEXT: retq 1571; 1572; AVX512BW-LABEL: icmp_v32i16_v32i1: 1573; AVX512BW: # %bb.0: 1574; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0 1575; AVX512BW-NEXT: kortestd %k0, %k0 1576; AVX512BW-NEXT: setne %al 1577; AVX512BW-NEXT: vzeroupper 1578; AVX512BW-NEXT: retq 1579; 1580; AVX512VL-LABEL: icmp_v32i16_v32i1: 1581; AVX512VL: # %bb.0: 1582; AVX512VL-NEXT: vptestnmw %zmm0, %zmm0, %k0 1583; AVX512VL-NEXT: kortestd %k0, %k0 1584; AVX512VL-NEXT: setne %al 1585; AVX512VL-NEXT: vzeroupper 1586; AVX512VL-NEXT: retq 1587 %a = icmp eq <32 x i16> %0, zeroinitializer 1588 %b = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a) 1589 ret i1 %b 1590} 1591 1592define i1 @icmp_v64i8_v64i1(<64 x i8>) { 1593; SSE-LABEL: icmp_v64i8_v64i1: 1594; SSE: # %bb.0: 1595; SSE-NEXT: pxor %xmm4, %xmm4 1596; SSE-NEXT: pcmpeqb %xmm4, %xmm2 1597; SSE-NEXT: pcmpeqb %xmm4, %xmm0 1598; SSE-NEXT: pcmpeqb %xmm4, %xmm3 1599; SSE-NEXT: pcmpeqb %xmm4, %xmm1 1600; SSE-NEXT: por %xmm3, %xmm1 1601; SSE-NEXT: por %xmm2, %xmm1 1602; SSE-NEXT: por %xmm0, %xmm1 1603; SSE-NEXT: pmovmskb %xmm1, %eax 1604; SSE-NEXT: testl %eax, %eax 1605; SSE-NEXT: setne %al 1606; SSE-NEXT: retq 1607; 1608; AVX1-LABEL: icmp_v64i8_v64i1: 1609; AVX1: # %bb.0: 1610; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1611; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm3 1612; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm4 1613; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1614; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 1615; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1616; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 1617; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 1618; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0 1619; AVX1-NEXT: vpor %xmm0, %xmm4, %xmm0 1620; AVX1-NEXT: vpmovmskb %xmm0, %eax 1621; AVX1-NEXT: testl %eax, %eax 1622; AVX1-NEXT: setne %al 1623; AVX1-NEXT: vzeroupper 1624; AVX1-NEXT: retq 1625; 1626; AVX2-LABEL: icmp_v64i8_v64i1: 1627; AVX2: # %bb.0: 1628; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 1629; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 1630; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 1631; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1632; AVX2-NEXT: vpmovmskb %ymm0, %eax 1633; AVX2-NEXT: testl %eax, %eax 1634; AVX2-NEXT: setne %al 1635; AVX2-NEXT: vzeroupper 1636; AVX2-NEXT: retq 1637; 1638; AVX512F-LABEL: icmp_v64i8_v64i1: 1639; AVX512F: # %bb.0: 1640; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1641; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 1642; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 1643; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 1644; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 1645; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3 1646; AVX512F-NEXT: vpor %xmm2, %xmm3, %xmm2 1647; AVX512F-NEXT: vpor %xmm2, %xmm1, %xmm1 1648; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 1649; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 1650; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 1651; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 1652; AVX512F-NEXT: kshiftrw $8, %k0, %k1 1653; AVX512F-NEXT: korw %k1, %k0, %k0 1654; AVX512F-NEXT: kshiftrw $4, %k0, %k1 1655; AVX512F-NEXT: korw %k1, %k0, %k0 1656; AVX512F-NEXT: kshiftrw $2, %k0, %k1 1657; AVX512F-NEXT: korw %k1, %k0, %k0 1658; AVX512F-NEXT: kshiftrw $1, %k0, %k1 1659; AVX512F-NEXT: korw %k1, %k0, %k0 1660; AVX512F-NEXT: kmovw %k0, %eax 1661; AVX512F-NEXT: # kill: def $al killed $al killed $eax 1662; AVX512F-NEXT: vzeroupper 1663; AVX512F-NEXT: retq 1664; 1665; AVX512BW-LABEL: icmp_v64i8_v64i1: 1666; AVX512BW: # %bb.0: 1667; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0 1668; AVX512BW-NEXT: kortestq %k0, %k0 1669; AVX512BW-NEXT: setne %al 1670; AVX512BW-NEXT: vzeroupper 1671; AVX512BW-NEXT: retq 1672; 1673; AVX512VL-LABEL: icmp_v64i8_v64i1: 1674; AVX512VL: # %bb.0: 1675; AVX512VL-NEXT: vptestnmb %zmm0, %zmm0, %k0 1676; AVX512VL-NEXT: kortestq %k0, %k0 1677; AVX512VL-NEXT: setne %al 1678; AVX512VL-NEXT: vzeroupper 1679; AVX512VL-NEXT: retq 1680 %a = icmp eq <64 x i8> %0, zeroinitializer 1681 %b = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> %a) 1682 ret i1 %b 1683} 1684 1685declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1>) 1686declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1>) 1687declare i1 @llvm.vector.reduce.or.v8i1(<8 x i1>) 1688declare i1 @llvm.vector.reduce.or.v16i1(<16 x i1>) 1689declare i1 @llvm.vector.reduce.or.v32i1(<32 x i1>) 1690declare i1 @llvm.vector.reduce.or.v64i1(<64 x i1>) 1691