1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512 6 7define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) { 8; SSE-LABEL: test_v2f64_sext: 9; SSE: # %bb.0: 10; SSE-NEXT: cmpltpd %xmm0, %xmm1 11; SSE-NEXT: movmskpd %xmm1, %ecx 12; SSE-NEXT: xorl %eax, %eax 13; SSE-NEXT: cmpl $3, %ecx 14; SSE-NEXT: sete %al 15; SSE-NEXT: negq %rax 16; SSE-NEXT: retq 17; 18; AVX-LABEL: test_v2f64_sext: 19; AVX: # %bb.0: 20; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 21; AVX-NEXT: vmovmskpd %xmm0, %ecx 22; AVX-NEXT: xorl %eax, %eax 23; AVX-NEXT: cmpl $3, %ecx 24; AVX-NEXT: sete %al 25; AVX-NEXT: negq %rax 26; AVX-NEXT: retq 27; 28; AVX512-LABEL: test_v2f64_sext: 29; AVX512: # %bb.0: 30; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 31; AVX512-NEXT: vmovmskpd %xmm0, %ecx 32; AVX512-NEXT: xorl %eax, %eax 33; AVX512-NEXT: cmpl $3, %ecx 34; AVX512-NEXT: sete %al 35; AVX512-NEXT: negq %rax 36; AVX512-NEXT: retq 37 %c = fcmp ogt <2 x double> %a0, %a1 38 %s = sext <2 x i1> %c to <2 x i64> 39 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 40 %2 = and <2 x i64> %s, %1 41 %3 = extractelement <2 x i64> %2, i32 0 42 ret i64 %3 43} 44 45define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) { 46; SSE-LABEL: test_v4f64_sext: 47; SSE: # %bb.0: 48; SSE-NEXT: cmpltpd %xmm1, %xmm3 49; SSE-NEXT: cmpltpd %xmm0, %xmm2 50; SSE-NEXT: andpd %xmm3, %xmm2 51; SSE-NEXT: movmskpd %xmm2, %ecx 52; SSE-NEXT: xorl %eax, %eax 53; SSE-NEXT: cmpl $3, %ecx 54; SSE-NEXT: sete %al 55; SSE-NEXT: negq %rax 56; SSE-NEXT: retq 57; 58; AVX-LABEL: test_v4f64_sext: 59; AVX: # %bb.0: 60; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 61; AVX-NEXT: vmovmskpd %ymm0, %ecx 62; AVX-NEXT: xorl %eax, %eax 63; AVX-NEXT: cmpl $15, %ecx 64; AVX-NEXT: sete %al 65; AVX-NEXT: negq %rax 66; AVX-NEXT: vzeroupper 67; AVX-NEXT: retq 68; 69; AVX512-LABEL: test_v4f64_sext: 70; AVX512: # %bb.0: 71; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 72; AVX512-NEXT: vmovmskpd %ymm0, %ecx 73; AVX512-NEXT: xorl %eax, %eax 74; AVX512-NEXT: cmpl $15, %ecx 75; AVX512-NEXT: sete %al 76; AVX512-NEXT: negq %rax 77; AVX512-NEXT: vzeroupper 78; AVX512-NEXT: retq 79 %c = fcmp ogt <4 x double> %a0, %a1 80 %s = sext <4 x i1> %c to <4 x i64> 81 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 82 %2 = and <4 x i64> %s, %1 83 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 84 %4 = and <4 x i64> %2, %3 85 %5 = extractelement <4 x i64> %4, i64 0 86 ret i64 %5 87} 88 89define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) { 90; SSE-LABEL: test_v4f64_legal_sext: 91; SSE: # %bb.0: 92; SSE-NEXT: cmpltpd %xmm1, %xmm3 93; SSE-NEXT: cmpltpd %xmm0, %xmm2 94; SSE-NEXT: packssdw %xmm3, %xmm2 95; SSE-NEXT: movmskps %xmm2, %ecx 96; SSE-NEXT: xorl %eax, %eax 97; SSE-NEXT: cmpl $15, %ecx 98; SSE-NEXT: sete %al 99; SSE-NEXT: negq %rax 100; SSE-NEXT: retq 101; 102; AVX-LABEL: test_v4f64_legal_sext: 103; AVX: # %bb.0: 104; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 105; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 106; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 107; AVX-NEXT: vmovmskps %xmm0, %ecx 108; AVX-NEXT: xorl %eax, %eax 109; AVX-NEXT: cmpl $15, %ecx 110; AVX-NEXT: sete %al 111; AVX-NEXT: negq %rax 112; AVX-NEXT: vzeroupper 113; AVX-NEXT: retq 114; 115; AVX512-LABEL: test_v4f64_legal_sext: 116; AVX512: # %bb.0: 117; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1 118; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 119; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 120; AVX512-NEXT: vmovmskps %xmm0, %ecx 121; AVX512-NEXT: xorl %eax, %eax 122; AVX512-NEXT: cmpl $15, %ecx 123; AVX512-NEXT: sete %al 124; AVX512-NEXT: negq %rax 125; AVX512-NEXT: vzeroupper 126; AVX512-NEXT: retq 127 %c = fcmp ogt <4 x double> %a0, %a1 128 %s = sext <4 x i1> %c to <4 x i32> 129 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 130 %2 = and <4 x i32> %s, %1 131 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 132 %4 = and <4 x i32> %2, %3 133 %5 = extractelement <4 x i32> %4, i64 0 134 %6 = sext i32 %5 to i64 135 ret i64 %6 136} 137 138define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) { 139; SSE-LABEL: test_v4f32_sext: 140; SSE: # %bb.0: 141; SSE-NEXT: cmpltps %xmm0, %xmm1 142; SSE-NEXT: movmskps %xmm1, %ecx 143; SSE-NEXT: xorl %eax, %eax 144; SSE-NEXT: cmpl $15, %ecx 145; SSE-NEXT: sete %al 146; SSE-NEXT: negl %eax 147; SSE-NEXT: retq 148; 149; AVX-LABEL: test_v4f32_sext: 150; AVX: # %bb.0: 151; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 152; AVX-NEXT: vmovmskps %xmm0, %ecx 153; AVX-NEXT: xorl %eax, %eax 154; AVX-NEXT: cmpl $15, %ecx 155; AVX-NEXT: sete %al 156; AVX-NEXT: negl %eax 157; AVX-NEXT: retq 158; 159; AVX512-LABEL: test_v4f32_sext: 160; AVX512: # %bb.0: 161; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 162; AVX512-NEXT: vmovmskps %xmm0, %ecx 163; AVX512-NEXT: xorl %eax, %eax 164; AVX512-NEXT: cmpl $15, %ecx 165; AVX512-NEXT: sete %al 166; AVX512-NEXT: negl %eax 167; AVX512-NEXT: retq 168 %c = fcmp ogt <4 x float> %a0, %a1 169 %s = sext <4 x i1> %c to <4 x i32> 170 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 171 %2 = and <4 x i32> %s, %1 172 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 173 %4 = and <4 x i32> %2, %3 174 %5 = extractelement <4 x i32> %4, i32 0 175 ret i32 %5 176} 177 178define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) { 179; SSE-LABEL: test_v8f32_sext: 180; SSE: # %bb.0: 181; SSE-NEXT: cmpltps %xmm1, %xmm3 182; SSE-NEXT: cmpltps %xmm0, %xmm2 183; SSE-NEXT: andps %xmm3, %xmm2 184; SSE-NEXT: movmskps %xmm2, %ecx 185; SSE-NEXT: xorl %eax, %eax 186; SSE-NEXT: cmpl $15, %ecx 187; SSE-NEXT: sete %al 188; SSE-NEXT: negl %eax 189; SSE-NEXT: retq 190; 191; AVX-LABEL: test_v8f32_sext: 192; AVX: # %bb.0: 193; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 194; AVX-NEXT: vmovmskps %ymm0, %ecx 195; AVX-NEXT: xorl %eax, %eax 196; AVX-NEXT: cmpl $255, %ecx 197; AVX-NEXT: sete %al 198; AVX-NEXT: negl %eax 199; AVX-NEXT: vzeroupper 200; AVX-NEXT: retq 201; 202; AVX512-LABEL: test_v8f32_sext: 203; AVX512: # %bb.0: 204; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 205; AVX512-NEXT: vmovmskps %ymm0, %ecx 206; AVX512-NEXT: xorl %eax, %eax 207; AVX512-NEXT: cmpl $255, %ecx 208; AVX512-NEXT: sete %al 209; AVX512-NEXT: negl %eax 210; AVX512-NEXT: vzeroupper 211; AVX512-NEXT: retq 212 %c = fcmp ogt <8 x float> %a0, %a1 213 %s = sext <8 x i1> %c to <8 x i32> 214 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 215 %2 = and <8 x i32> %s, %1 216 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 217 %4 = and <8 x i32> %2, %3 218 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 219 %6 = and <8 x i32> %4, %5 220 %7 = extractelement <8 x i32> %6, i32 0 221 ret i32 %7 222} 223 224define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) { 225; SSE-LABEL: test_v8f32_legal_sext: 226; SSE: # %bb.0: 227; SSE-NEXT: cmpltps %xmm1, %xmm3 228; SSE-NEXT: cmpltps %xmm0, %xmm2 229; SSE-NEXT: packssdw %xmm3, %xmm2 230; SSE-NEXT: pmovmskb %xmm2, %ecx 231; SSE-NEXT: xorl %eax, %eax 232; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 233; SSE-NEXT: sete %al 234; SSE-NEXT: negl %eax 235; SSE-NEXT: retq 236; 237; AVX-LABEL: test_v8f32_legal_sext: 238; AVX: # %bb.0: 239; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 240; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 241; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 242; AVX-NEXT: vpmovmskb %xmm0, %ecx 243; AVX-NEXT: xorl %eax, %eax 244; AVX-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 245; AVX-NEXT: sete %al 246; AVX-NEXT: negl %eax 247; AVX-NEXT: vzeroupper 248; AVX-NEXT: retq 249; 250; AVX512-LABEL: test_v8f32_legal_sext: 251; AVX512: # %bb.0: 252; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k0 253; AVX512-NEXT: vpmovm2w %k0, %xmm0 254; AVX512-NEXT: vpmovmskb %xmm0, %ecx 255; AVX512-NEXT: xorl %eax, %eax 256; AVX512-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 257; AVX512-NEXT: sete %al 258; AVX512-NEXT: negl %eax 259; AVX512-NEXT: vzeroupper 260; AVX512-NEXT: retq 261 %c = fcmp ogt <8 x float> %a0, %a1 262 %s = sext <8 x i1> %c to <8 x i16> 263 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 264 %2 = and <8 x i16> %s, %1 265 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 266 %4 = and <8 x i16> %2, %3 267 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 268 %6 = and <8 x i16> %4, %5 269 %7 = extractelement <8 x i16> %6, i32 0 270 %8 = sext i16 %7 to i32 271 ret i32 %8 272} 273 274define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) { 275; SSE-LABEL: test_v2i64_sext: 276; SSE: # %bb.0: 277; SSE-NEXT: pcmpgtq %xmm1, %xmm0 278; SSE-NEXT: movmskpd %xmm0, %ecx 279; SSE-NEXT: xorl %eax, %eax 280; SSE-NEXT: cmpl $3, %ecx 281; SSE-NEXT: sete %al 282; SSE-NEXT: negq %rax 283; SSE-NEXT: retq 284; 285; AVX-LABEL: test_v2i64_sext: 286; AVX: # %bb.0: 287; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 288; AVX-NEXT: vmovmskpd %xmm0, %ecx 289; AVX-NEXT: xorl %eax, %eax 290; AVX-NEXT: cmpl $3, %ecx 291; AVX-NEXT: sete %al 292; AVX-NEXT: negq %rax 293; AVX-NEXT: retq 294; 295; AVX512-LABEL: test_v2i64_sext: 296; AVX512: # %bb.0: 297; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 298; AVX512-NEXT: vmovmskpd %xmm0, %ecx 299; AVX512-NEXT: xorl %eax, %eax 300; AVX512-NEXT: cmpl $3, %ecx 301; AVX512-NEXT: sete %al 302; AVX512-NEXT: negq %rax 303; AVX512-NEXT: retq 304 %c = icmp sgt <2 x i64> %a0, %a1 305 %s = sext <2 x i1> %c to <2 x i64> 306 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 307 %2 = and <2 x i64> %s, %1 308 %3 = extractelement <2 x i64> %2, i32 0 309 ret i64 %3 310} 311 312define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) { 313; SSE-LABEL: test_v4i64_sext: 314; SSE: # %bb.0: 315; SSE-NEXT: pcmpgtq %xmm3, %xmm1 316; SSE-NEXT: pcmpgtq %xmm2, %xmm0 317; SSE-NEXT: pand %xmm1, %xmm0 318; SSE-NEXT: movmskpd %xmm0, %ecx 319; SSE-NEXT: xorl %eax, %eax 320; SSE-NEXT: cmpl $3, %ecx 321; SSE-NEXT: sete %al 322; SSE-NEXT: negq %rax 323; SSE-NEXT: retq 324; 325; AVX1-LABEL: test_v4i64_sext: 326; AVX1: # %bb.0: 327; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 328; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 329; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 330; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 331; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 332; AVX1-NEXT: vmovmskpd %ymm0, %ecx 333; AVX1-NEXT: xorl %eax, %eax 334; AVX1-NEXT: cmpl $15, %ecx 335; AVX1-NEXT: sete %al 336; AVX1-NEXT: negq %rax 337; AVX1-NEXT: vzeroupper 338; AVX1-NEXT: retq 339; 340; AVX2-LABEL: test_v4i64_sext: 341; AVX2: # %bb.0: 342; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 343; AVX2-NEXT: vmovmskpd %ymm0, %ecx 344; AVX2-NEXT: xorl %eax, %eax 345; AVX2-NEXT: cmpl $15, %ecx 346; AVX2-NEXT: sete %al 347; AVX2-NEXT: negq %rax 348; AVX2-NEXT: vzeroupper 349; AVX2-NEXT: retq 350; 351; AVX512-LABEL: test_v4i64_sext: 352; AVX512: # %bb.0: 353; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 354; AVX512-NEXT: vmovmskpd %ymm0, %ecx 355; AVX512-NEXT: xorl %eax, %eax 356; AVX512-NEXT: cmpl $15, %ecx 357; AVX512-NEXT: sete %al 358; AVX512-NEXT: negq %rax 359; AVX512-NEXT: vzeroupper 360; AVX512-NEXT: retq 361 %c = icmp sgt <4 x i64> %a0, %a1 362 %s = sext <4 x i1> %c to <4 x i64> 363 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 364 %2 = and <4 x i64> %s, %1 365 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 366 %4 = and <4 x i64> %2, %3 367 %5 = extractelement <4 x i64> %4, i64 0 368 ret i64 %5 369} 370 371define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) { 372; SSE-LABEL: test_v4i64_legal_sext: 373; SSE: # %bb.0: 374; SSE-NEXT: pcmpgtq %xmm3, %xmm1 375; SSE-NEXT: pcmpgtq %xmm2, %xmm0 376; SSE-NEXT: packssdw %xmm1, %xmm0 377; SSE-NEXT: movmskps %xmm0, %ecx 378; SSE-NEXT: xorl %eax, %eax 379; SSE-NEXT: cmpl $15, %ecx 380; SSE-NEXT: sete %al 381; SSE-NEXT: negq %rax 382; SSE-NEXT: retq 383; 384; AVX1-LABEL: test_v4i64_legal_sext: 385; AVX1: # %bb.0: 386; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 387; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 388; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 389; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 390; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 391; AVX1-NEXT: vmovmskps %xmm0, %ecx 392; AVX1-NEXT: xorl %eax, %eax 393; AVX1-NEXT: cmpl $15, %ecx 394; AVX1-NEXT: sete %al 395; AVX1-NEXT: negq %rax 396; AVX1-NEXT: vzeroupper 397; AVX1-NEXT: retq 398; 399; AVX2-LABEL: test_v4i64_legal_sext: 400; AVX2: # %bb.0: 401; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 402; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 403; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 404; AVX2-NEXT: vmovmskps %xmm0, %ecx 405; AVX2-NEXT: xorl %eax, %eax 406; AVX2-NEXT: cmpl $15, %ecx 407; AVX2-NEXT: sete %al 408; AVX2-NEXT: negq %rax 409; AVX2-NEXT: vzeroupper 410; AVX2-NEXT: retq 411; 412; AVX512-LABEL: test_v4i64_legal_sext: 413; AVX512: # %bb.0: 414; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1 415; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 416; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 417; AVX512-NEXT: vmovmskps %xmm0, %ecx 418; AVX512-NEXT: xorl %eax, %eax 419; AVX512-NEXT: cmpl $15, %ecx 420; AVX512-NEXT: sete %al 421; AVX512-NEXT: negq %rax 422; AVX512-NEXT: vzeroupper 423; AVX512-NEXT: retq 424 %c = icmp sgt <4 x i64> %a0, %a1 425 %s = sext <4 x i1> %c to <4 x i32> 426 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 427 %2 = and <4 x i32> %s, %1 428 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 429 %4 = and <4 x i32> %2, %3 430 %5 = extractelement <4 x i32> %4, i64 0 431 %6 = sext i32 %5 to i64 432 ret i64 %6 433} 434 435define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) { 436; SSE-LABEL: test_v4i32_sext: 437; SSE: # %bb.0: 438; SSE-NEXT: pcmpgtd %xmm1, %xmm0 439; SSE-NEXT: movmskps %xmm0, %ecx 440; SSE-NEXT: xorl %eax, %eax 441; SSE-NEXT: cmpl $15, %ecx 442; SSE-NEXT: sete %al 443; SSE-NEXT: negl %eax 444; SSE-NEXT: retq 445; 446; AVX-LABEL: test_v4i32_sext: 447; AVX: # %bb.0: 448; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 449; AVX-NEXT: vmovmskps %xmm0, %ecx 450; AVX-NEXT: xorl %eax, %eax 451; AVX-NEXT: cmpl $15, %ecx 452; AVX-NEXT: sete %al 453; AVX-NEXT: negl %eax 454; AVX-NEXT: retq 455; 456; AVX512-LABEL: test_v4i32_sext: 457; AVX512: # %bb.0: 458; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 459; AVX512-NEXT: vmovmskps %xmm0, %ecx 460; AVX512-NEXT: xorl %eax, %eax 461; AVX512-NEXT: cmpl $15, %ecx 462; AVX512-NEXT: sete %al 463; AVX512-NEXT: negl %eax 464; AVX512-NEXT: retq 465 %c = icmp sgt <4 x i32> %a0, %a1 466 %s = sext <4 x i1> %c to <4 x i32> 467 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 468 %2 = and <4 x i32> %s, %1 469 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 470 %4 = and <4 x i32> %2, %3 471 %5 = extractelement <4 x i32> %4, i32 0 472 ret i32 %5 473} 474 475define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) { 476; SSE-LABEL: test_v8i32_sext: 477; SSE: # %bb.0: 478; SSE-NEXT: pcmpgtd %xmm3, %xmm1 479; SSE-NEXT: pcmpgtd %xmm2, %xmm0 480; SSE-NEXT: pand %xmm1, %xmm0 481; SSE-NEXT: movmskps %xmm0, %ecx 482; SSE-NEXT: xorl %eax, %eax 483; SSE-NEXT: cmpl $15, %ecx 484; SSE-NEXT: sete %al 485; SSE-NEXT: negl %eax 486; SSE-NEXT: retq 487; 488; AVX1-LABEL: test_v8i32_sext: 489; AVX1: # %bb.0: 490; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 491; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 492; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 493; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 494; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 495; AVX1-NEXT: vmovmskps %ymm0, %ecx 496; AVX1-NEXT: xorl %eax, %eax 497; AVX1-NEXT: cmpl $255, %ecx 498; AVX1-NEXT: sete %al 499; AVX1-NEXT: negl %eax 500; AVX1-NEXT: vzeroupper 501; AVX1-NEXT: retq 502; 503; AVX2-LABEL: test_v8i32_sext: 504; AVX2: # %bb.0: 505; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 506; AVX2-NEXT: vmovmskps %ymm0, %ecx 507; AVX2-NEXT: xorl %eax, %eax 508; AVX2-NEXT: cmpl $255, %ecx 509; AVX2-NEXT: sete %al 510; AVX2-NEXT: negl %eax 511; AVX2-NEXT: vzeroupper 512; AVX2-NEXT: retq 513; 514; AVX512-LABEL: test_v8i32_sext: 515; AVX512: # %bb.0: 516; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 517; AVX512-NEXT: vmovmskps %ymm0, %ecx 518; AVX512-NEXT: xorl %eax, %eax 519; AVX512-NEXT: cmpl $255, %ecx 520; AVX512-NEXT: sete %al 521; AVX512-NEXT: negl %eax 522; AVX512-NEXT: vzeroupper 523; AVX512-NEXT: retq 524 %c = icmp sgt <8 x i32> %a0, %a1 525 %s = sext <8 x i1> %c to <8 x i32> 526 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 527 %2 = and <8 x i32> %s, %1 528 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 529 %4 = and <8 x i32> %2, %3 530 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 531 %6 = and <8 x i32> %4, %5 532 %7 = extractelement <8 x i32> %6, i32 0 533 ret i32 %7 534} 535 536define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) { 537; SSE-LABEL: test_v8i32_legal_sext: 538; SSE: # %bb.0: 539; SSE-NEXT: pcmpgtd %xmm3, %xmm1 540; SSE-NEXT: pcmpgtd %xmm2, %xmm0 541; SSE-NEXT: packssdw %xmm1, %xmm0 542; SSE-NEXT: pmovmskb %xmm0, %ecx 543; SSE-NEXT: xorl %eax, %eax 544; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 545; SSE-NEXT: sete %al 546; SSE-NEXT: negl %eax 547; SSE-NEXT: retq 548; 549; AVX1-LABEL: test_v8i32_legal_sext: 550; AVX1: # %bb.0: 551; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 552; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 553; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 554; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 555; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 556; AVX1-NEXT: vpmovmskb %xmm0, %ecx 557; AVX1-NEXT: xorl %eax, %eax 558; AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 559; AVX1-NEXT: sete %al 560; AVX1-NEXT: negl %eax 561; AVX1-NEXT: vzeroupper 562; AVX1-NEXT: retq 563; 564; AVX2-LABEL: test_v8i32_legal_sext: 565; AVX2: # %bb.0: 566; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 567; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 568; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 569; AVX2-NEXT: vpmovmskb %xmm0, %ecx 570; AVX2-NEXT: xorl %eax, %eax 571; AVX2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 572; AVX2-NEXT: sete %al 573; AVX2-NEXT: negl %eax 574; AVX2-NEXT: vzeroupper 575; AVX2-NEXT: retq 576; 577; AVX512-LABEL: test_v8i32_legal_sext: 578; AVX512: # %bb.0: 579; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 580; AVX512-NEXT: vpmovm2w %k0, %xmm0 581; AVX512-NEXT: vpmovmskb %xmm0, %ecx 582; AVX512-NEXT: xorl %eax, %eax 583; AVX512-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 584; AVX512-NEXT: sete %al 585; AVX512-NEXT: negl %eax 586; AVX512-NEXT: vzeroupper 587; AVX512-NEXT: retq 588 %c = icmp sgt <8 x i32> %a0, %a1 589 %s = sext <8 x i1> %c to <8 x i16> 590 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 591 %2 = and <8 x i16> %s, %1 592 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 593 %4 = and <8 x i16> %2, %3 594 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 595 %6 = and <8 x i16> %4, %5 596 %7 = extractelement <8 x i16> %6, i32 0 597 %8 = sext i16 %7 to i32 598 ret i32 %8 599} 600 601define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) { 602; SSE-LABEL: test_v8i16_sext: 603; SSE: # %bb.0: 604; SSE-NEXT: pcmpgtw %xmm1, %xmm0 605; SSE-NEXT: pmovmskb %xmm0, %ecx 606; SSE-NEXT: xorl %eax, %eax 607; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 608; SSE-NEXT: sete %al 609; SSE-NEXT: negl %eax 610; SSE-NEXT: # kill: def $ax killed $ax killed $eax 611; SSE-NEXT: retq 612; 613; AVX-LABEL: test_v8i16_sext: 614; AVX: # %bb.0: 615; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 616; AVX-NEXT: vpmovmskb %xmm0, %ecx 617; AVX-NEXT: xorl %eax, %eax 618; AVX-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 619; AVX-NEXT: sete %al 620; AVX-NEXT: negl %eax 621; AVX-NEXT: # kill: def $ax killed $ax killed $eax 622; AVX-NEXT: retq 623; 624; AVX512-LABEL: test_v8i16_sext: 625; AVX512: # %bb.0: 626; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 627; AVX512-NEXT: vpmovmskb %xmm0, %ecx 628; AVX512-NEXT: xorl %eax, %eax 629; AVX512-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 630; AVX512-NEXT: sete %al 631; AVX512-NEXT: negl %eax 632; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 633; AVX512-NEXT: retq 634 %c = icmp sgt <8 x i16> %a0, %a1 635 %s = sext <8 x i1> %c to <8 x i16> 636 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 637 %2 = and <8 x i16> %s, %1 638 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 639 %4 = and <8 x i16> %2, %3 640 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 641 %6 = and <8 x i16> %4, %5 642 %7 = extractelement <8 x i16> %6, i32 0 643 ret i16 %7 644} 645 646define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) { 647; SSE-LABEL: test_v16i16_sext: 648; SSE: # %bb.0: 649; SSE-NEXT: pcmpgtw %xmm3, %xmm1 650; SSE-NEXT: pcmpgtw %xmm2, %xmm0 651; SSE-NEXT: pand %xmm1, %xmm0 652; SSE-NEXT: pmovmskb %xmm0, %ecx 653; SSE-NEXT: xorl %eax, %eax 654; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 655; SSE-NEXT: sete %al 656; SSE-NEXT: negl %eax 657; SSE-NEXT: # kill: def $ax killed $ax killed $eax 658; SSE-NEXT: retq 659; 660; AVX1-LABEL: test_v16i16_sext: 661; AVX1: # %bb.0: 662; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 663; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 664; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 665; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 666; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 667; AVX1-NEXT: vpmovmskb %xmm0, %ecx 668; AVX1-NEXT: xorl %eax, %eax 669; AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 670; AVX1-NEXT: sete %al 671; AVX1-NEXT: negl %eax 672; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 673; AVX1-NEXT: vzeroupper 674; AVX1-NEXT: retq 675; 676; AVX2-LABEL: test_v16i16_sext: 677; AVX2: # %bb.0: 678; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 679; AVX2-NEXT: vpmovmskb %ymm0, %ecx 680; AVX2-NEXT: xorl %eax, %eax 681; AVX2-NEXT: cmpl $-1, %ecx 682; AVX2-NEXT: sete %al 683; AVX2-NEXT: negl %eax 684; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 685; AVX2-NEXT: vzeroupper 686; AVX2-NEXT: retq 687; 688; AVX512-LABEL: test_v16i16_sext: 689; AVX512: # %bb.0: 690; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 691; AVX512-NEXT: vpmovmskb %ymm0, %ecx 692; AVX512-NEXT: xorl %eax, %eax 693; AVX512-NEXT: cmpl $-1, %ecx 694; AVX512-NEXT: sete %al 695; AVX512-NEXT: negl %eax 696; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 697; AVX512-NEXT: vzeroupper 698; AVX512-NEXT: retq 699 %c = icmp sgt <16 x i16> %a0, %a1 700 %s = sext <16 x i1> %c to <16 x i16> 701 %1 = shufflevector <16 x i16> %s, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 702 %2 = and <16 x i16> %s, %1 703 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 704 %4 = and <16 x i16> %2, %3 705 %5 = shufflevector <16 x i16> %4, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 706 %6 = and <16 x i16> %4, %5 707 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 708 %8 = and <16 x i16> %6, %7 709 %9 = extractelement <16 x i16> %8, i32 0 710 ret i16 %9 711} 712 713define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) { 714; SSE-LABEL: test_v16i16_legal_sext: 715; SSE: # %bb.0: 716; SSE-NEXT: pcmpgtw %xmm3, %xmm1 717; SSE-NEXT: pcmpgtw %xmm2, %xmm0 718; SSE-NEXT: packsswb %xmm1, %xmm0 719; SSE-NEXT: pmovmskb %xmm0, %eax 720; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF 721; SSE-NEXT: sete %al 722; SSE-NEXT: negb %al 723; SSE-NEXT: movsbl %al, %eax 724; SSE-NEXT: # kill: def $ax killed $ax killed $eax 725; SSE-NEXT: retq 726; 727; AVX1-LABEL: test_v16i16_legal_sext: 728; AVX1: # %bb.0: 729; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 730; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 731; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 732; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 733; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 734; AVX1-NEXT: vpmovmskb %xmm0, %eax 735; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF 736; AVX1-NEXT: sete %al 737; AVX1-NEXT: negb %al 738; AVX1-NEXT: movsbl %al, %eax 739; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 740; AVX1-NEXT: vzeroupper 741; AVX1-NEXT: retq 742; 743; AVX2-LABEL: test_v16i16_legal_sext: 744; AVX2: # %bb.0: 745; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 746; AVX2-NEXT: vpmovmskb %ymm0, %eax 747; AVX2-NEXT: cmpl $-1, %eax 748; AVX2-NEXT: sete %al 749; AVX2-NEXT: negb %al 750; AVX2-NEXT: movsbl %al, %eax 751; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 752; AVX2-NEXT: vzeroupper 753; AVX2-NEXT: retq 754; 755; AVX512-LABEL: test_v16i16_legal_sext: 756; AVX512: # %bb.0: 757; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 758; AVX512-NEXT: vpmovm2b %k0, %xmm0 759; AVX512-NEXT: vpmovmskb %xmm0, %eax 760; AVX512-NEXT: cmpl $65535, %eax # imm = 0xFFFF 761; AVX512-NEXT: sete %al 762; AVX512-NEXT: negb %al 763; AVX512-NEXT: movsbl %al, %eax 764; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 765; AVX512-NEXT: vzeroupper 766; AVX512-NEXT: retq 767 %c = icmp sgt <16 x i16> %a0, %a1 768 %s = sext <16 x i1> %c to <16 x i8> 769 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 770 %2 = and <16 x i8> %s, %1 771 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 772 %4 = and <16 x i8> %2, %3 773 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 774 %6 = and <16 x i8> %4, %5 775 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 776 %8 = and <16 x i8> %6, %7 777 %9 = extractelement <16 x i8> %8, i32 0 778 %10 = sext i8 %9 to i16 779 ret i16 %10 780} 781 782define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) { 783; SSE-LABEL: test_v16i8_sext: 784; SSE: # %bb.0: 785; SSE-NEXT: pcmpgtb %xmm1, %xmm0 786; SSE-NEXT: pmovmskb %xmm0, %eax 787; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF 788; SSE-NEXT: sete %al 789; SSE-NEXT: negb %al 790; SSE-NEXT: retq 791; 792; AVX-LABEL: test_v16i8_sext: 793; AVX: # %bb.0: 794; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 795; AVX-NEXT: vpmovmskb %xmm0, %eax 796; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF 797; AVX-NEXT: sete %al 798; AVX-NEXT: negb %al 799; AVX-NEXT: retq 800; 801; AVX512-LABEL: test_v16i8_sext: 802; AVX512: # %bb.0: 803; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 804; AVX512-NEXT: vpmovmskb %xmm0, %eax 805; AVX512-NEXT: cmpl $65535, %eax # imm = 0xFFFF 806; AVX512-NEXT: sete %al 807; AVX512-NEXT: negb %al 808; AVX512-NEXT: retq 809 %c = icmp sgt <16 x i8> %a0, %a1 810 %s = sext <16 x i1> %c to <16 x i8> 811 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 812 %2 = and <16 x i8> %s, %1 813 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 814 %4 = and <16 x i8> %2, %3 815 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 816 %6 = and <16 x i8> %4, %5 817 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 818 %8 = and <16 x i8> %6, %7 819 %9 = extractelement <16 x i8> %8, i32 0 820 ret i8 %9 821} 822 823define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) { 824; SSE-LABEL: test_v32i8_sext: 825; SSE: # %bb.0: 826; SSE-NEXT: pcmpgtb %xmm3, %xmm1 827; SSE-NEXT: pcmpgtb %xmm2, %xmm0 828; SSE-NEXT: pand %xmm1, %xmm0 829; SSE-NEXT: pmovmskb %xmm0, %eax 830; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF 831; SSE-NEXT: sete %al 832; SSE-NEXT: negb %al 833; SSE-NEXT: retq 834; 835; AVX1-LABEL: test_v32i8_sext: 836; AVX1: # %bb.0: 837; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 838; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 839; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 840; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 841; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 842; AVX1-NEXT: vpmovmskb %xmm0, %eax 843; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF 844; AVX1-NEXT: sete %al 845; AVX1-NEXT: negb %al 846; AVX1-NEXT: vzeroupper 847; AVX1-NEXT: retq 848; 849; AVX2-LABEL: test_v32i8_sext: 850; AVX2: # %bb.0: 851; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 852; AVX2-NEXT: vpmovmskb %ymm0, %eax 853; AVX2-NEXT: cmpl $-1, %eax 854; AVX2-NEXT: sete %al 855; AVX2-NEXT: negb %al 856; AVX2-NEXT: vzeroupper 857; AVX2-NEXT: retq 858; 859; AVX512-LABEL: test_v32i8_sext: 860; AVX512: # %bb.0: 861; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 862; AVX512-NEXT: vpmovmskb %ymm0, %eax 863; AVX512-NEXT: cmpl $-1, %eax 864; AVX512-NEXT: sete %al 865; AVX512-NEXT: negb %al 866; AVX512-NEXT: vzeroupper 867; AVX512-NEXT: retq 868 %c = icmp sgt <32 x i8> %a0, %a1 869 %s = sext <32 x i1> %c to <32 x i8> 870 %1 = shufflevector <32 x i8> %s, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 871 %2 = and <32 x i8> %s, %1 872 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 873 %4 = and <32 x i8> %2, %3 874 %5 = shufflevector <32 x i8> %4, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 875 %6 = and <32 x i8> %4, %5 876 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 877 %8 = and <32 x i8> %6, %7 878 %9 = shufflevector <32 x i8> %8, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 879 %10 = and <32 x i8> %8, %9 880 %11 = extractelement <32 x i8> %10, i32 0 881 ret i8 %11 882} 883 884define i1 @bool_reduction_v2f64(<2 x double> %x, <2 x double> %y) { 885; SSE-LABEL: bool_reduction_v2f64: 886; SSE: # %bb.0: 887; SSE-NEXT: cmpltpd %xmm0, %xmm1 888; SSE-NEXT: movmskpd %xmm1, %eax 889; SSE-NEXT: cmpb $3, %al 890; SSE-NEXT: sete %al 891; SSE-NEXT: retq 892; 893; AVX-LABEL: bool_reduction_v2f64: 894; AVX: # %bb.0: 895; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 896; AVX-NEXT: vmovmskpd %xmm0, %eax 897; AVX-NEXT: cmpb $3, %al 898; AVX-NEXT: sete %al 899; AVX-NEXT: retq 900; 901; AVX512-LABEL: bool_reduction_v2f64: 902; AVX512: # %bb.0: 903; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k0 904; AVX512-NEXT: kmovd %k0, %eax 905; AVX512-NEXT: cmpb $3, %al 906; AVX512-NEXT: sete %al 907; AVX512-NEXT: retq 908 %a = fcmp ogt <2 x double> %x, %y 909 %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef> 910 %c = and <2 x i1> %a, %b 911 %d = extractelement <2 x i1> %c, i32 0 912 ret i1 %d 913} 914 915define i1 @bool_reduction_v4f32(<4 x float> %x, <4 x float> %y) { 916; SSE-LABEL: bool_reduction_v4f32: 917; SSE: # %bb.0: 918; SSE-NEXT: cmpeqps %xmm1, %xmm0 919; SSE-NEXT: movmskps %xmm0, %eax 920; SSE-NEXT: cmpb $15, %al 921; SSE-NEXT: sete %al 922; SSE-NEXT: retq 923; 924; AVX-LABEL: bool_reduction_v4f32: 925; AVX: # %bb.0: 926; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 927; AVX-NEXT: vmovmskps %xmm0, %eax 928; AVX-NEXT: cmpb $15, %al 929; AVX-NEXT: sete %al 930; AVX-NEXT: retq 931; 932; AVX512-LABEL: bool_reduction_v4f32: 933; AVX512: # %bb.0: 934; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0 935; AVX512-NEXT: kmovd %k0, %eax 936; AVX512-NEXT: cmpb $15, %al 937; AVX512-NEXT: sete %al 938; AVX512-NEXT: retq 939 %a = fcmp oeq <4 x float> %x, %y 940 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 941 %b = and <4 x i1> %s1, %a 942 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 943 %c = and <4 x i1> %s2, %b 944 %d = extractelement <4 x i1> %c, i32 0 945 ret i1 %d 946} 947 948define i1 @bool_reduction_v4f64(<4 x double> %x, <4 x double> %y) { 949; SSE-LABEL: bool_reduction_v4f64: 950; SSE: # %bb.0: 951; SSE-NEXT: cmplepd %xmm1, %xmm3 952; SSE-NEXT: cmplepd %xmm0, %xmm2 953; SSE-NEXT: packssdw %xmm3, %xmm2 954; SSE-NEXT: movmskps %xmm2, %eax 955; SSE-NEXT: cmpb $15, %al 956; SSE-NEXT: sete %al 957; SSE-NEXT: retq 958; 959; AVX-LABEL: bool_reduction_v4f64: 960; AVX: # %bb.0: 961; AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0 962; AVX-NEXT: vmovmskpd %ymm0, %eax 963; AVX-NEXT: cmpb $15, %al 964; AVX-NEXT: sete %al 965; AVX-NEXT: vzeroupper 966; AVX-NEXT: retq 967; 968; AVX512-LABEL: bool_reduction_v4f64: 969; AVX512: # %bb.0: 970; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k0 971; AVX512-NEXT: kmovd %k0, %eax 972; AVX512-NEXT: cmpb $15, %al 973; AVX512-NEXT: sete %al 974; AVX512-NEXT: vzeroupper 975; AVX512-NEXT: retq 976 %a = fcmp oge <4 x double> %x, %y 977 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 978 %b = and <4 x i1> %s1, %a 979 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 980 %c = and <4 x i1> %s2, %b 981 %d = extractelement <4 x i1> %c, i32 0 982 ret i1 %d 983} 984 985define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) { 986; SSE-LABEL: bool_reduction_v8f32: 987; SSE: # %bb.0: 988; SSE-NEXT: cmpneqps %xmm3, %xmm1 989; SSE-NEXT: cmpneqps %xmm2, %xmm0 990; SSE-NEXT: packssdw %xmm1, %xmm0 991; SSE-NEXT: packsswb %xmm0, %xmm0 992; SSE-NEXT: pmovmskb %xmm0, %eax 993; SSE-NEXT: cmpb $-1, %al 994; SSE-NEXT: sete %al 995; SSE-NEXT: retq 996; 997; AVX-LABEL: bool_reduction_v8f32: 998; AVX: # %bb.0: 999; AVX-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0 1000; AVX-NEXT: vmovmskps %ymm0, %eax 1001; AVX-NEXT: cmpb $-1, %al 1002; AVX-NEXT: sete %al 1003; AVX-NEXT: vzeroupper 1004; AVX-NEXT: retq 1005; 1006; AVX512-LABEL: bool_reduction_v8f32: 1007; AVX512: # %bb.0: 1008; AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k0 1009; AVX512-NEXT: kmovd %k0, %eax 1010; AVX512-NEXT: cmpb $-1, %al 1011; AVX512-NEXT: sete %al 1012; AVX512-NEXT: vzeroupper 1013; AVX512-NEXT: retq 1014 %a = fcmp une <8 x float> %x, %y 1015 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1016 %b = and <8 x i1> %s1, %a 1017 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1018 %c = and <8 x i1> %s2, %b 1019 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1020 %d = and <8 x i1> %s3, %c 1021 %e = extractelement <8 x i1> %d, i32 0 1022 ret i1 %e 1023} 1024 1025define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) { 1026; SSE-LABEL: bool_reduction_v2i64: 1027; SSE: # %bb.0: 1028; SSE-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 1029; SSE-NEXT: pxor %xmm2, %xmm1 1030; SSE-NEXT: pxor %xmm2, %xmm0 1031; SSE-NEXT: pcmpgtq %xmm1, %xmm0 1032; SSE-NEXT: movmskpd %xmm0, %eax 1033; SSE-NEXT: cmpb $3, %al 1034; SSE-NEXT: sete %al 1035; SSE-NEXT: retq 1036; 1037; AVX-LABEL: bool_reduction_v2i64: 1038; AVX: # %bb.0: 1039; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 1040; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 1041; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 1042; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 1043; AVX-NEXT: vmovmskpd %xmm0, %eax 1044; AVX-NEXT: cmpb $3, %al 1045; AVX-NEXT: sete %al 1046; AVX-NEXT: retq 1047; 1048; AVX512-LABEL: bool_reduction_v2i64: 1049; AVX512: # %bb.0: 1050; AVX512-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0 1051; AVX512-NEXT: kmovd %k0, %eax 1052; AVX512-NEXT: cmpb $3, %al 1053; AVX512-NEXT: sete %al 1054; AVX512-NEXT: retq 1055 %a = icmp ugt <2 x i64> %x, %y 1056 %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef> 1057 %c = and <2 x i1> %a, %b 1058 %d = extractelement <2 x i1> %c, i32 0 1059 ret i1 %d 1060} 1061 1062define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { 1063; SSE-LABEL: bool_reduction_v4i32: 1064; SSE: # %bb.0: 1065; SSE-NEXT: pcmpeqd %xmm1, %xmm0 1066; SSE-NEXT: movmskps %xmm0, %eax 1067; SSE-NEXT: xorl $15, %eax 1068; SSE-NEXT: cmpb $15, %al 1069; SSE-NEXT: sete %al 1070; SSE-NEXT: retq 1071; 1072; AVX-LABEL: bool_reduction_v4i32: 1073; AVX: # %bb.0: 1074; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1075; AVX-NEXT: vmovmskps %xmm0, %eax 1076; AVX-NEXT: xorl $15, %eax 1077; AVX-NEXT: cmpb $15, %al 1078; AVX-NEXT: sete %al 1079; AVX-NEXT: retq 1080; 1081; AVX512-LABEL: bool_reduction_v4i32: 1082; AVX512: # %bb.0: 1083; AVX512-NEXT: vpcmpneqd %xmm1, %xmm0, %k0 1084; AVX512-NEXT: kmovd %k0, %eax 1085; AVX512-NEXT: cmpb $15, %al 1086; AVX512-NEXT: sete %al 1087; AVX512-NEXT: retq 1088 %a = icmp ne <4 x i32> %x, %y 1089 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 1090 %b = and <4 x i1> %s1, %a 1091 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 1092 %c = and <4 x i1> %s2, %b 1093 %d = extractelement <4 x i1> %c, i32 0 1094 ret i1 %d 1095} 1096 1097define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) { 1098; SSE-LABEL: bool_reduction_v8i16: 1099; SSE: # %bb.0: 1100; SSE-NEXT: pcmpgtw %xmm0, %xmm1 1101; SSE-NEXT: packsswb %xmm1, %xmm1 1102; SSE-NEXT: pmovmskb %xmm1, %eax 1103; SSE-NEXT: cmpb $-1, %al 1104; SSE-NEXT: sete %al 1105; SSE-NEXT: retq 1106; 1107; AVX-LABEL: bool_reduction_v8i16: 1108; AVX: # %bb.0: 1109; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 1110; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 1111; AVX-NEXT: vpmovmskb %xmm0, %eax 1112; AVX-NEXT: cmpb $-1, %al 1113; AVX-NEXT: sete %al 1114; AVX-NEXT: retq 1115; 1116; AVX512-LABEL: bool_reduction_v8i16: 1117; AVX512: # %bb.0: 1118; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %k0 1119; AVX512-NEXT: kmovd %k0, %eax 1120; AVX512-NEXT: cmpb $-1, %al 1121; AVX512-NEXT: sete %al 1122; AVX512-NEXT: retq 1123 %a = icmp slt <8 x i16> %x, %y 1124 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1125 %b = and <8 x i1> %s1, %a 1126 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1127 %c = and <8 x i1> %s2, %b 1128 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1129 %d = and <8 x i1> %s3, %c 1130 %e = extractelement <8 x i1> %d, i32 0 1131 ret i1 %e 1132} 1133 1134define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) { 1135; SSE-LABEL: bool_reduction_v16i8: 1136; SSE: # %bb.0: 1137; SSE-NEXT: pcmpgtb %xmm1, %xmm0 1138; SSE-NEXT: pmovmskb %xmm0, %eax 1139; SSE-NEXT: cmpw $-1, %ax 1140; SSE-NEXT: sete %al 1141; SSE-NEXT: retq 1142; 1143; AVX-LABEL: bool_reduction_v16i8: 1144; AVX: # %bb.0: 1145; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 1146; AVX-NEXT: vpmovmskb %xmm0, %eax 1147; AVX-NEXT: cmpw $-1, %ax 1148; AVX-NEXT: sete %al 1149; AVX-NEXT: retq 1150; 1151; AVX512-LABEL: bool_reduction_v16i8: 1152; AVX512: # %bb.0: 1153; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 1154; AVX512-NEXT: kortestw %k0, %k0 1155; AVX512-NEXT: setb %al 1156; AVX512-NEXT: retq 1157 %a = icmp sgt <16 x i8> %x, %y 1158 %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1159 %b = and <16 x i1> %s1, %a 1160 %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1161 %c = and <16 x i1> %s2, %b 1162 %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1163 %d = and <16 x i1> %s3, %c 1164 %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1165 %e = and <16 x i1> %s4, %d 1166 %f = extractelement <16 x i1> %e, i32 0 1167 ret i1 %f 1168} 1169 1170define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) { 1171; SSE-LABEL: bool_reduction_v4i64: 1172; SSE: # %bb.0: 1173; SSE-NEXT: pcmpgtq %xmm1, %xmm3 1174; SSE-NEXT: pcmpgtq %xmm0, %xmm2 1175; SSE-NEXT: packssdw %xmm3, %xmm2 1176; SSE-NEXT: movmskps %xmm2, %eax 1177; SSE-NEXT: cmpb $15, %al 1178; SSE-NEXT: sete %al 1179; SSE-NEXT: retq 1180; 1181; AVX1-LABEL: bool_reduction_v4i64: 1182; AVX1: # %bb.0: 1183; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1184; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1185; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1186; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 1187; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1188; AVX1-NEXT: vmovmskpd %ymm0, %eax 1189; AVX1-NEXT: cmpb $15, %al 1190; AVX1-NEXT: sete %al 1191; AVX1-NEXT: vzeroupper 1192; AVX1-NEXT: retq 1193; 1194; AVX2-LABEL: bool_reduction_v4i64: 1195; AVX2: # %bb.0: 1196; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 1197; AVX2-NEXT: vmovmskpd %ymm0, %eax 1198; AVX2-NEXT: cmpb $15, %al 1199; AVX2-NEXT: sete %al 1200; AVX2-NEXT: vzeroupper 1201; AVX2-NEXT: retq 1202; 1203; AVX512-LABEL: bool_reduction_v4i64: 1204; AVX512: # %bb.0: 1205; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0 1206; AVX512-NEXT: kmovd %k0, %eax 1207; AVX512-NEXT: cmpb $15, %al 1208; AVX512-NEXT: sete %al 1209; AVX512-NEXT: vzeroupper 1210; AVX512-NEXT: retq 1211 %a = icmp slt <4 x i64> %x, %y 1212 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 1213 %b = and <4 x i1> %s1, %a 1214 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 1215 %c = and <4 x i1> %s2, %b 1216 %d = extractelement <4 x i1> %c, i32 0 1217 ret i1 %d 1218} 1219 1220define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) { 1221; SSE-LABEL: bool_reduction_v8i32: 1222; SSE: # %bb.0: 1223; SSE-NEXT: pminud %xmm1, %xmm3 1224; SSE-NEXT: pcmpeqd %xmm1, %xmm3 1225; SSE-NEXT: pminud %xmm0, %xmm2 1226; SSE-NEXT: pcmpeqd %xmm0, %xmm2 1227; SSE-NEXT: packssdw %xmm3, %xmm2 1228; SSE-NEXT: packsswb %xmm2, %xmm2 1229; SSE-NEXT: pmovmskb %xmm2, %eax 1230; SSE-NEXT: cmpb $-1, %al 1231; SSE-NEXT: sete %al 1232; SSE-NEXT: retq 1233; 1234; AVX1-LABEL: bool_reduction_v8i32: 1235; AVX1: # %bb.0: 1236; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1237; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1238; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2 1239; AVX1-NEXT: vpcmpeqd %xmm2, %xmm3, %xmm2 1240; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm1 1241; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1242; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1243; AVX1-NEXT: vmovmskps %ymm0, %eax 1244; AVX1-NEXT: cmpb $-1, %al 1245; AVX1-NEXT: sete %al 1246; AVX1-NEXT: vzeroupper 1247; AVX1-NEXT: retq 1248; 1249; AVX2-LABEL: bool_reduction_v8i32: 1250; AVX2: # %bb.0: 1251; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm1 1252; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 1253; AVX2-NEXT: vmovmskps %ymm0, %eax 1254; AVX2-NEXT: cmpb $-1, %al 1255; AVX2-NEXT: sete %al 1256; AVX2-NEXT: vzeroupper 1257; AVX2-NEXT: retq 1258; 1259; AVX512-LABEL: bool_reduction_v8i32: 1260; AVX512: # %bb.0: 1261; AVX512-NEXT: vpcmpleud %ymm1, %ymm0, %k0 1262; AVX512-NEXT: kmovd %k0, %eax 1263; AVX512-NEXT: cmpb $-1, %al 1264; AVX512-NEXT: sete %al 1265; AVX512-NEXT: vzeroupper 1266; AVX512-NEXT: retq 1267 %a = icmp ule <8 x i32> %x, %y 1268 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1269 %b = and <8 x i1> %s1, %a 1270 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1271 %c = and <8 x i1> %s2, %b 1272 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1273 %d = and <8 x i1> %s3, %c 1274 %e = extractelement <8 x i1> %d, i32 0 1275 ret i1 %e 1276} 1277 1278define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) { 1279; SSE-LABEL: bool_reduction_v16i16: 1280; SSE: # %bb.0: 1281; SSE-NEXT: pcmpeqw %xmm3, %xmm1 1282; SSE-NEXT: pcmpeqw %xmm2, %xmm0 1283; SSE-NEXT: packsswb %xmm1, %xmm0 1284; SSE-NEXT: pmovmskb %xmm0, %eax 1285; SSE-NEXT: cmpw $-1, %ax 1286; SSE-NEXT: sete %al 1287; SSE-NEXT: retq 1288; 1289; AVX1-LABEL: bool_reduction_v16i16: 1290; AVX1: # %bb.0: 1291; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1292; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1293; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2 1294; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 1295; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 1296; AVX1-NEXT: vpmovmskb %xmm0, %eax 1297; AVX1-NEXT: cmpw $-1, %ax 1298; AVX1-NEXT: sete %al 1299; AVX1-NEXT: vzeroupper 1300; AVX1-NEXT: retq 1301; 1302; AVX2-LABEL: bool_reduction_v16i16: 1303; AVX2: # %bb.0: 1304; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 1305; AVX2-NEXT: vpmovmskb %ymm0, %eax 1306; AVX2-NEXT: cmpl $-1, %eax 1307; AVX2-NEXT: sete %al 1308; AVX2-NEXT: vzeroupper 1309; AVX2-NEXT: retq 1310; 1311; AVX512-LABEL: bool_reduction_v16i16: 1312; AVX512: # %bb.0: 1313; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 1314; AVX512-NEXT: kortestw %k0, %k0 1315; AVX512-NEXT: setb %al 1316; AVX512-NEXT: vzeroupper 1317; AVX512-NEXT: retq 1318 %a = icmp eq <16 x i16> %x, %y 1319 %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1320 %b = and <16 x i1> %s1, %a 1321 %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1322 %c = and <16 x i1> %s2, %b 1323 %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1324 %d = and <16 x i1> %s3, %c 1325 %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1326 %e = and <16 x i1> %s4, %d 1327 %f = extractelement <16 x i1> %e, i32 0 1328 ret i1 %f 1329} 1330 1331define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) { 1332; SSE-LABEL: bool_reduction_v32i8: 1333; SSE: # %bb.0: 1334; SSE-NEXT: pcmpeqb %xmm3, %xmm1 1335; SSE-NEXT: pcmpeqb %xmm2, %xmm0 1336; SSE-NEXT: pand %xmm1, %xmm0 1337; SSE-NEXT: pmovmskb %xmm0, %eax 1338; SSE-NEXT: cmpw $-1, %ax 1339; SSE-NEXT: sete %al 1340; SSE-NEXT: retq 1341; 1342; AVX1-LABEL: bool_reduction_v32i8: 1343; AVX1: # %bb.0: 1344; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1345; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1346; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2 1347; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1348; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 1349; AVX1-NEXT: vpmovmskb %xmm0, %eax 1350; AVX1-NEXT: cmpw $-1, %ax 1351; AVX1-NEXT: sete %al 1352; AVX1-NEXT: vzeroupper 1353; AVX1-NEXT: retq 1354; 1355; AVX2-LABEL: bool_reduction_v32i8: 1356; AVX2: # %bb.0: 1357; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 1358; AVX2-NEXT: vpmovmskb %ymm0, %eax 1359; AVX2-NEXT: cmpl $-1, %eax 1360; AVX2-NEXT: sete %al 1361; AVX2-NEXT: vzeroupper 1362; AVX2-NEXT: retq 1363; 1364; AVX512-LABEL: bool_reduction_v32i8: 1365; AVX512: # %bb.0: 1366; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 1367; AVX512-NEXT: kortestd %k0, %k0 1368; AVX512-NEXT: setb %al 1369; AVX512-NEXT: vzeroupper 1370; AVX512-NEXT: retq 1371 %a = icmp eq <32 x i8> %x, %y 1372 %s1 = shufflevector <32 x i1> %a, <32 x i1> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1373 %b = and <32 x i1> %s1, %a 1374 %s2 = shufflevector <32 x i1> %b, <32 x i1> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1375 %c = and <32 x i1> %s2, %b 1376 %s3 = shufflevector <32 x i1> %c, <32 x i1> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1377 %d = and <32 x i1> %s3, %c 1378 %s4 = shufflevector <32 x i1> %d, <32 x i1> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1379 %e = and <32 x i1> %s4, %d 1380 %s5 = shufflevector <32 x i1> %e, <32 x i1> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1381 %f = and <32 x i1> %s5, %e 1382 %g = extractelement <32 x i1> %f, i32 0 1383 ret i1 %g 1384} 1385