1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512 6 7define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) { 8; SSE-LABEL: test_v2f64_sext: 9; SSE: # %bb.0: 10; SSE-NEXT: cmpltpd %xmm0, %xmm1 11; SSE-NEXT: movmskpd %xmm1, %eax 12; SSE-NEXT: negl %eax 13; SSE-NEXT: sbbq %rax, %rax 14; SSE-NEXT: retq 15; 16; AVX-LABEL: test_v2f64_sext: 17; AVX: # %bb.0: 18; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 19; AVX-NEXT: vmovmskpd %xmm0, %eax 20; AVX-NEXT: negl %eax 21; AVX-NEXT: sbbq %rax, %rax 22; AVX-NEXT: retq 23; 24; AVX512-LABEL: test_v2f64_sext: 25; AVX512: # %bb.0: 26; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 27; AVX512-NEXT: vmovmskpd %xmm0, %eax 28; AVX512-NEXT: negl %eax 29; AVX512-NEXT: sbbq %rax, %rax 30; AVX512-NEXT: retq 31 %c = fcmp ogt <2 x double> %a0, %a1 32 %s = sext <2 x i1> %c to <2 x i64> 33 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 34 %2 = or <2 x i64> %s, %1 35 %3 = extractelement <2 x i64> %2, i32 0 36 ret i64 %3 37} 38 39define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) { 40; SSE-LABEL: test_v4f64_sext: 41; SSE: # %bb.0: 42; SSE-NEXT: cmpltpd %xmm1, %xmm3 43; SSE-NEXT: cmpltpd %xmm0, %xmm2 44; SSE-NEXT: orpd %xmm3, %xmm2 45; SSE-NEXT: movmskpd %xmm2, %eax 46; SSE-NEXT: negl %eax 47; SSE-NEXT: sbbq %rax, %rax 48; SSE-NEXT: retq 49; 50; AVX-LABEL: test_v4f64_sext: 51; AVX: # %bb.0: 52; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 53; AVX-NEXT: vmovmskpd %ymm0, %eax 54; AVX-NEXT: negl %eax 55; AVX-NEXT: sbbq %rax, %rax 56; AVX-NEXT: vzeroupper 57; AVX-NEXT: retq 58; 59; AVX512-LABEL: test_v4f64_sext: 60; AVX512: # %bb.0: 61; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 62; AVX512-NEXT: vmovmskpd %ymm0, %eax 63; AVX512-NEXT: negl %eax 64; AVX512-NEXT: sbbq %rax, %rax 65; AVX512-NEXT: vzeroupper 66; AVX512-NEXT: retq 67 %c = fcmp ogt <4 x double> %a0, %a1 68 %s = sext <4 x i1> %c to <4 x i64> 69 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 70 %2 = or <4 x i64> %s, %1 71 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 72 %4 = or <4 x i64> %2, %3 73 %5 = extractelement <4 x i64> %4, i64 0 74 ret i64 %5 75} 76 77define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) { 78; SSE-LABEL: test_v4f64_legal_sext: 79; SSE: # %bb.0: 80; SSE-NEXT: cmpltpd %xmm1, %xmm3 81; SSE-NEXT: cmpltpd %xmm0, %xmm2 82; SSE-NEXT: packssdw %xmm3, %xmm2 83; SSE-NEXT: movmskps %xmm2, %eax 84; SSE-NEXT: negl %eax 85; SSE-NEXT: sbbq %rax, %rax 86; SSE-NEXT: retq 87; 88; AVX-LABEL: test_v4f64_legal_sext: 89; AVX: # %bb.0: 90; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 91; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 92; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 93; AVX-NEXT: vmovmskps %xmm0, %eax 94; AVX-NEXT: negl %eax 95; AVX-NEXT: sbbq %rax, %rax 96; AVX-NEXT: vzeroupper 97; AVX-NEXT: retq 98; 99; AVX512-LABEL: test_v4f64_legal_sext: 100; AVX512: # %bb.0: 101; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1 102; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 103; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 104; AVX512-NEXT: vmovmskps %xmm0, %eax 105; AVX512-NEXT: negl %eax 106; AVX512-NEXT: sbbq %rax, %rax 107; AVX512-NEXT: vzeroupper 108; AVX512-NEXT: retq 109 %c = fcmp ogt <4 x double> %a0, %a1 110 %s = sext <4 x i1> %c to <4 x i32> 111 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 112 %2 = or <4 x i32> %s, %1 113 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 114 %4 = or <4 x i32> %2, %3 115 %5 = extractelement <4 x i32> %4, i64 0 116 %6 = sext i32 %5 to i64 117 ret i64 %6 118} 119 120define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) { 121; SSE-LABEL: test_v4f32_sext: 122; SSE: # %bb.0: 123; SSE-NEXT: cmpltps %xmm0, %xmm1 124; SSE-NEXT: movmskps %xmm1, %eax 125; SSE-NEXT: negl %eax 126; SSE-NEXT: sbbl %eax, %eax 127; SSE-NEXT: retq 128; 129; AVX-LABEL: test_v4f32_sext: 130; AVX: # %bb.0: 131; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 132; AVX-NEXT: vmovmskps %xmm0, %eax 133; AVX-NEXT: negl %eax 134; AVX-NEXT: sbbl %eax, %eax 135; AVX-NEXT: retq 136; 137; AVX512-LABEL: test_v4f32_sext: 138; AVX512: # %bb.0: 139; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 140; AVX512-NEXT: vmovmskps %xmm0, %eax 141; AVX512-NEXT: negl %eax 142; AVX512-NEXT: sbbl %eax, %eax 143; AVX512-NEXT: retq 144 %c = fcmp ogt <4 x float> %a0, %a1 145 %s = sext <4 x i1> %c to <4 x i32> 146 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 147 %2 = or <4 x i32> %s, %1 148 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 149 %4 = or <4 x i32> %2, %3 150 %5 = extractelement <4 x i32> %4, i32 0 151 ret i32 %5 152} 153 154define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) { 155; SSE-LABEL: test_v8f32_sext: 156; SSE: # %bb.0: 157; SSE-NEXT: cmpltps %xmm1, %xmm3 158; SSE-NEXT: cmpltps %xmm0, %xmm2 159; SSE-NEXT: orps %xmm3, %xmm2 160; SSE-NEXT: movmskps %xmm2, %eax 161; SSE-NEXT: negl %eax 162; SSE-NEXT: sbbl %eax, %eax 163; SSE-NEXT: retq 164; 165; AVX-LABEL: test_v8f32_sext: 166; AVX: # %bb.0: 167; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 168; AVX-NEXT: vmovmskps %ymm0, %eax 169; AVX-NEXT: negl %eax 170; AVX-NEXT: sbbl %eax, %eax 171; AVX-NEXT: vzeroupper 172; AVX-NEXT: retq 173; 174; AVX512-LABEL: test_v8f32_sext: 175; AVX512: # %bb.0: 176; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 177; AVX512-NEXT: vmovmskps %ymm0, %eax 178; AVX512-NEXT: negl %eax 179; AVX512-NEXT: sbbl %eax, %eax 180; AVX512-NEXT: vzeroupper 181; AVX512-NEXT: retq 182 %c = fcmp ogt <8 x float> %a0, %a1 183 %s = sext <8 x i1> %c to <8 x i32> 184 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 185 %2 = or <8 x i32> %s, %1 186 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 187 %4 = or <8 x i32> %2, %3 188 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 189 %6 = or <8 x i32> %4, %5 190 %7 = extractelement <8 x i32> %6, i32 0 191 ret i32 %7 192} 193 194define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) { 195; SSE-LABEL: test_v8f32_legal_sext: 196; SSE: # %bb.0: 197; SSE-NEXT: cmpltps %xmm1, %xmm3 198; SSE-NEXT: cmpltps %xmm0, %xmm2 199; SSE-NEXT: packssdw %xmm3, %xmm2 200; SSE-NEXT: pmovmskb %xmm2, %eax 201; SSE-NEXT: negl %eax 202; SSE-NEXT: sbbl %eax, %eax 203; SSE-NEXT: retq 204; 205; AVX-LABEL: test_v8f32_legal_sext: 206; AVX: # %bb.0: 207; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 208; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 209; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 210; AVX-NEXT: vpmovmskb %xmm0, %eax 211; AVX-NEXT: negl %eax 212; AVX-NEXT: sbbl %eax, %eax 213; AVX-NEXT: vzeroupper 214; AVX-NEXT: retq 215; 216; AVX512-LABEL: test_v8f32_legal_sext: 217; AVX512: # %bb.0: 218; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k0 219; AVX512-NEXT: vpmovm2w %k0, %xmm0 220; AVX512-NEXT: vpmovmskb %xmm0, %eax 221; AVX512-NEXT: negl %eax 222; AVX512-NEXT: sbbl %eax, %eax 223; AVX512-NEXT: vzeroupper 224; AVX512-NEXT: retq 225 %c = fcmp ogt <8 x float> %a0, %a1 226 %s = sext <8 x i1> %c to <8 x i16> 227 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 228 %2 = or <8 x i16> %s, %1 229 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 230 %4 = or <8 x i16> %2, %3 231 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 232 %6 = or <8 x i16> %4, %5 233 %7 = extractelement <8 x i16> %6, i32 0 234 %8 = sext i16 %7 to i32 235 ret i32 %8 236} 237 238define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) { 239; SSE-LABEL: test_v2i64_sext: 240; SSE: # %bb.0: 241; SSE-NEXT: pcmpgtq %xmm1, %xmm0 242; SSE-NEXT: movmskpd %xmm0, %eax 243; SSE-NEXT: negl %eax 244; SSE-NEXT: sbbq %rax, %rax 245; SSE-NEXT: retq 246; 247; AVX-LABEL: test_v2i64_sext: 248; AVX: # %bb.0: 249; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 250; AVX-NEXT: vmovmskpd %xmm0, %eax 251; AVX-NEXT: negl %eax 252; AVX-NEXT: sbbq %rax, %rax 253; AVX-NEXT: retq 254; 255; AVX512-LABEL: test_v2i64_sext: 256; AVX512: # %bb.0: 257; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 258; AVX512-NEXT: vmovmskpd %xmm0, %eax 259; AVX512-NEXT: negl %eax 260; AVX512-NEXT: sbbq %rax, %rax 261; AVX512-NEXT: retq 262 %c = icmp sgt <2 x i64> %a0, %a1 263 %s = sext <2 x i1> %c to <2 x i64> 264 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 265 %2 = or <2 x i64> %s, %1 266 %3 = extractelement <2 x i64> %2, i32 0 267 ret i64 %3 268} 269 270define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) { 271; SSE-LABEL: test_v4i64_sext: 272; SSE: # %bb.0: 273; SSE-NEXT: pcmpgtq %xmm3, %xmm1 274; SSE-NEXT: pcmpgtq %xmm2, %xmm0 275; SSE-NEXT: por %xmm1, %xmm0 276; SSE-NEXT: movmskpd %xmm0, %eax 277; SSE-NEXT: negl %eax 278; SSE-NEXT: sbbq %rax, %rax 279; SSE-NEXT: retq 280; 281; AVX1-LABEL: test_v4i64_sext: 282; AVX1: # %bb.0: 283; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 284; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 285; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 286; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 287; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 288; AVX1-NEXT: vmovmskpd %ymm0, %eax 289; AVX1-NEXT: negl %eax 290; AVX1-NEXT: sbbq %rax, %rax 291; AVX1-NEXT: vzeroupper 292; AVX1-NEXT: retq 293; 294; AVX2-LABEL: test_v4i64_sext: 295; AVX2: # %bb.0: 296; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 297; AVX2-NEXT: vmovmskpd %ymm0, %eax 298; AVX2-NEXT: negl %eax 299; AVX2-NEXT: sbbq %rax, %rax 300; AVX2-NEXT: vzeroupper 301; AVX2-NEXT: retq 302; 303; AVX512-LABEL: test_v4i64_sext: 304; AVX512: # %bb.0: 305; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 306; AVX512-NEXT: vmovmskpd %ymm0, %eax 307; AVX512-NEXT: negl %eax 308; AVX512-NEXT: sbbq %rax, %rax 309; AVX512-NEXT: vzeroupper 310; AVX512-NEXT: retq 311 %c = icmp sgt <4 x i64> %a0, %a1 312 %s = sext <4 x i1> %c to <4 x i64> 313 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 314 %2 = or <4 x i64> %s, %1 315 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 316 %4 = or <4 x i64> %2, %3 317 %5 = extractelement <4 x i64> %4, i64 0 318 ret i64 %5 319} 320 321define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) { 322; SSE-LABEL: test_v4i64_legal_sext: 323; SSE: # %bb.0: 324; SSE-NEXT: pcmpgtq %xmm3, %xmm1 325; SSE-NEXT: pcmpgtq %xmm2, %xmm0 326; SSE-NEXT: packssdw %xmm1, %xmm0 327; SSE-NEXT: movmskps %xmm0, %eax 328; SSE-NEXT: negl %eax 329; SSE-NEXT: sbbq %rax, %rax 330; SSE-NEXT: retq 331; 332; AVX1-LABEL: test_v4i64_legal_sext: 333; AVX1: # %bb.0: 334; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 335; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 336; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 337; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 338; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 339; AVX1-NEXT: vmovmskps %xmm0, %eax 340; AVX1-NEXT: negl %eax 341; AVX1-NEXT: sbbq %rax, %rax 342; AVX1-NEXT: vzeroupper 343; AVX1-NEXT: retq 344; 345; AVX2-LABEL: test_v4i64_legal_sext: 346; AVX2: # %bb.0: 347; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 348; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 349; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 350; AVX2-NEXT: vmovmskps %xmm0, %eax 351; AVX2-NEXT: negl %eax 352; AVX2-NEXT: sbbq %rax, %rax 353; AVX2-NEXT: vzeroupper 354; AVX2-NEXT: retq 355; 356; AVX512-LABEL: test_v4i64_legal_sext: 357; AVX512: # %bb.0: 358; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1 359; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 360; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 361; AVX512-NEXT: vmovmskps %xmm0, %eax 362; AVX512-NEXT: negl %eax 363; AVX512-NEXT: sbbq %rax, %rax 364; AVX512-NEXT: vzeroupper 365; AVX512-NEXT: retq 366 %c = icmp sgt <4 x i64> %a0, %a1 367 %s = sext <4 x i1> %c to <4 x i32> 368 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 369 %2 = or <4 x i32> %s, %1 370 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 371 %4 = or <4 x i32> %2, %3 372 %5 = extractelement <4 x i32> %4, i64 0 373 %6 = sext i32 %5 to i64 374 ret i64 %6 375} 376 377define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) { 378; SSE-LABEL: test_v4i32_sext: 379; SSE: # %bb.0: 380; SSE-NEXT: pcmpgtd %xmm1, %xmm0 381; SSE-NEXT: movmskps %xmm0, %eax 382; SSE-NEXT: negl %eax 383; SSE-NEXT: sbbl %eax, %eax 384; SSE-NEXT: retq 385; 386; AVX-LABEL: test_v4i32_sext: 387; AVX: # %bb.0: 388; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 389; AVX-NEXT: vmovmskps %xmm0, %eax 390; AVX-NEXT: negl %eax 391; AVX-NEXT: sbbl %eax, %eax 392; AVX-NEXT: retq 393; 394; AVX512-LABEL: test_v4i32_sext: 395; AVX512: # %bb.0: 396; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 397; AVX512-NEXT: vmovmskps %xmm0, %eax 398; AVX512-NEXT: negl %eax 399; AVX512-NEXT: sbbl %eax, %eax 400; AVX512-NEXT: retq 401 %c = icmp sgt <4 x i32> %a0, %a1 402 %s = sext <4 x i1> %c to <4 x i32> 403 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 404 %2 = or <4 x i32> %s, %1 405 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 406 %4 = or <4 x i32> %2, %3 407 %5 = extractelement <4 x i32> %4, i32 0 408 ret i32 %5 409} 410 411define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) { 412; SSE-LABEL: test_v8i32_sext: 413; SSE: # %bb.0: 414; SSE-NEXT: pcmpgtd %xmm3, %xmm1 415; SSE-NEXT: pcmpgtd %xmm2, %xmm0 416; SSE-NEXT: por %xmm1, %xmm0 417; SSE-NEXT: movmskps %xmm0, %eax 418; SSE-NEXT: negl %eax 419; SSE-NEXT: sbbl %eax, %eax 420; SSE-NEXT: retq 421; 422; AVX1-LABEL: test_v8i32_sext: 423; AVX1: # %bb.0: 424; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 425; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 426; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 427; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 428; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 429; AVX1-NEXT: vmovmskps %ymm0, %eax 430; AVX1-NEXT: negl %eax 431; AVX1-NEXT: sbbl %eax, %eax 432; AVX1-NEXT: vzeroupper 433; AVX1-NEXT: retq 434; 435; AVX2-LABEL: test_v8i32_sext: 436; AVX2: # %bb.0: 437; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 438; AVX2-NEXT: vmovmskps %ymm0, %eax 439; AVX2-NEXT: negl %eax 440; AVX2-NEXT: sbbl %eax, %eax 441; AVX2-NEXT: vzeroupper 442; AVX2-NEXT: retq 443; 444; AVX512-LABEL: test_v8i32_sext: 445; AVX512: # %bb.0: 446; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 447; AVX512-NEXT: vmovmskps %ymm0, %eax 448; AVX512-NEXT: negl %eax 449; AVX512-NEXT: sbbl %eax, %eax 450; AVX512-NEXT: vzeroupper 451; AVX512-NEXT: retq 452 %c = icmp sgt <8 x i32> %a0, %a1 453 %s = sext <8 x i1> %c to <8 x i32> 454 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 455 %2 = or <8 x i32> %s, %1 456 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 457 %4 = or <8 x i32> %2, %3 458 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 459 %6 = or <8 x i32> %4, %5 460 %7 = extractelement <8 x i32> %6, i32 0 461 ret i32 %7 462} 463 464define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) { 465; SSE-LABEL: test_v8i32_legal_sext: 466; SSE: # %bb.0: 467; SSE-NEXT: pcmpgtd %xmm3, %xmm1 468; SSE-NEXT: pcmpgtd %xmm2, %xmm0 469; SSE-NEXT: packssdw %xmm1, %xmm0 470; SSE-NEXT: pmovmskb %xmm0, %eax 471; SSE-NEXT: negl %eax 472; SSE-NEXT: sbbl %eax, %eax 473; SSE-NEXT: retq 474; 475; AVX1-LABEL: test_v8i32_legal_sext: 476; AVX1: # %bb.0: 477; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 478; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 479; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 480; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 481; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 482; AVX1-NEXT: vpmovmskb %xmm0, %eax 483; AVX1-NEXT: negl %eax 484; AVX1-NEXT: sbbl %eax, %eax 485; AVX1-NEXT: vzeroupper 486; AVX1-NEXT: retq 487; 488; AVX2-LABEL: test_v8i32_legal_sext: 489; AVX2: # %bb.0: 490; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 491; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 492; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 493; AVX2-NEXT: vpmovmskb %xmm0, %eax 494; AVX2-NEXT: negl %eax 495; AVX2-NEXT: sbbl %eax, %eax 496; AVX2-NEXT: vzeroupper 497; AVX2-NEXT: retq 498; 499; AVX512-LABEL: test_v8i32_legal_sext: 500; AVX512: # %bb.0: 501; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 502; AVX512-NEXT: vpmovm2w %k0, %xmm0 503; AVX512-NEXT: vpmovmskb %xmm0, %eax 504; AVX512-NEXT: negl %eax 505; AVX512-NEXT: sbbl %eax, %eax 506; AVX512-NEXT: vzeroupper 507; AVX512-NEXT: retq 508 %c = icmp sgt <8 x i32> %a0, %a1 509 %s = sext <8 x i1> %c to <8 x i16> 510 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 511 %2 = or <8 x i16> %s, %1 512 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 513 %4 = or <8 x i16> %2, %3 514 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 515 %6 = or <8 x i16> %4, %5 516 %7 = extractelement <8 x i16> %6, i32 0 517 %8 = sext i16 %7 to i32 518 ret i32 %8 519} 520 521define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) { 522; SSE-LABEL: test_v8i16_sext: 523; SSE: # %bb.0: 524; SSE-NEXT: pcmpgtw %xmm1, %xmm0 525; SSE-NEXT: pmovmskb %xmm0, %eax 526; SSE-NEXT: negl %eax 527; SSE-NEXT: sbbl %eax, %eax 528; SSE-NEXT: # kill: def $ax killed $ax killed $eax 529; SSE-NEXT: retq 530; 531; AVX-LABEL: test_v8i16_sext: 532; AVX: # %bb.0: 533; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 534; AVX-NEXT: vpmovmskb %xmm0, %eax 535; AVX-NEXT: negl %eax 536; AVX-NEXT: sbbl %eax, %eax 537; AVX-NEXT: # kill: def $ax killed $ax killed $eax 538; AVX-NEXT: retq 539; 540; AVX512-LABEL: test_v8i16_sext: 541; AVX512: # %bb.0: 542; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 543; AVX512-NEXT: vpmovmskb %xmm0, %eax 544; AVX512-NEXT: negl %eax 545; AVX512-NEXT: sbbl %eax, %eax 546; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 547; AVX512-NEXT: retq 548 %c = icmp sgt <8 x i16> %a0, %a1 549 %s = sext <8 x i1> %c to <8 x i16> 550 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 551 %2 = or <8 x i16> %s, %1 552 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 553 %4 = or <8 x i16> %2, %3 554 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 555 %6 = or <8 x i16> %4, %5 556 %7 = extractelement <8 x i16> %6, i32 0 557 ret i16 %7 558} 559 560define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) { 561; SSE-LABEL: test_v16i16_sext: 562; SSE: # %bb.0: 563; SSE-NEXT: pcmpgtw %xmm3, %xmm1 564; SSE-NEXT: pcmpgtw %xmm2, %xmm0 565; SSE-NEXT: por %xmm1, %xmm0 566; SSE-NEXT: pmovmskb %xmm0, %eax 567; SSE-NEXT: negl %eax 568; SSE-NEXT: sbbl %eax, %eax 569; SSE-NEXT: # kill: def $ax killed $ax killed $eax 570; SSE-NEXT: retq 571; 572; AVX1-LABEL: test_v16i16_sext: 573; AVX1: # %bb.0: 574; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 575; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 576; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 577; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 578; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 579; AVX1-NEXT: vpmovmskb %xmm0, %eax 580; AVX1-NEXT: negl %eax 581; AVX1-NEXT: sbbl %eax, %eax 582; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 583; AVX1-NEXT: vzeroupper 584; AVX1-NEXT: retq 585; 586; AVX2-LABEL: test_v16i16_sext: 587; AVX2: # %bb.0: 588; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 589; AVX2-NEXT: vpmovmskb %ymm0, %eax 590; AVX2-NEXT: negl %eax 591; AVX2-NEXT: sbbl %eax, %eax 592; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 593; AVX2-NEXT: vzeroupper 594; AVX2-NEXT: retq 595; 596; AVX512-LABEL: test_v16i16_sext: 597; AVX512: # %bb.0: 598; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 599; AVX512-NEXT: vpmovmskb %ymm0, %eax 600; AVX512-NEXT: negl %eax 601; AVX512-NEXT: sbbl %eax, %eax 602; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 603; AVX512-NEXT: vzeroupper 604; AVX512-NEXT: retq 605 %c = icmp sgt <16 x i16> %a0, %a1 606 %s = sext <16 x i1> %c to <16 x i16> 607 %1 = shufflevector <16 x i16> %s, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 608 %2 = or <16 x i16> %s, %1 609 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 610 %4 = or <16 x i16> %2, %3 611 %5 = shufflevector <16 x i16> %4, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 612 %6 = or <16 x i16> %4, %5 613 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 614 %8 = or <16 x i16> %6, %7 615 %9 = extractelement <16 x i16> %8, i32 0 616 ret i16 %9 617} 618 619define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) { 620; SSE-LABEL: test_v16i16_legal_sext: 621; SSE: # %bb.0: 622; SSE-NEXT: pcmpgtw %xmm3, %xmm1 623; SSE-NEXT: pcmpgtw %xmm2, %xmm0 624; SSE-NEXT: packsswb %xmm1, %xmm0 625; SSE-NEXT: pmovmskb %xmm0, %eax 626; SSE-NEXT: negl %eax 627; SSE-NEXT: sbbl %eax, %eax 628; SSE-NEXT: # kill: def $ax killed $ax killed $eax 629; SSE-NEXT: retq 630; 631; AVX1-LABEL: test_v16i16_legal_sext: 632; AVX1: # %bb.0: 633; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 634; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 635; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 636; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 637; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 638; AVX1-NEXT: vpmovmskb %xmm0, %eax 639; AVX1-NEXT: negl %eax 640; AVX1-NEXT: sbbl %eax, %eax 641; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 642; AVX1-NEXT: vzeroupper 643; AVX1-NEXT: retq 644; 645; AVX2-LABEL: test_v16i16_legal_sext: 646; AVX2: # %bb.0: 647; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 648; AVX2-NEXT: vpmovmskb %ymm0, %eax 649; AVX2-NEXT: negl %eax 650; AVX2-NEXT: sbbl %eax, %eax 651; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 652; AVX2-NEXT: vzeroupper 653; AVX2-NEXT: retq 654; 655; AVX512-LABEL: test_v16i16_legal_sext: 656; AVX512: # %bb.0: 657; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 658; AVX512-NEXT: vpmovm2b %k0, %xmm0 659; AVX512-NEXT: vpmovmskb %xmm0, %eax 660; AVX512-NEXT: negl %eax 661; AVX512-NEXT: sbbl %eax, %eax 662; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 663; AVX512-NEXT: vzeroupper 664; AVX512-NEXT: retq 665 %c = icmp sgt <16 x i16> %a0, %a1 666 %s = sext <16 x i1> %c to <16 x i8> 667 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 668 %2 = or <16 x i8> %s, %1 669 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 670 %4 = or <16 x i8> %2, %3 671 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 672 %6 = or <16 x i8> %4, %5 673 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 674 %8 = or <16 x i8> %6, %7 675 %9 = extractelement <16 x i8> %8, i32 0 676 %10 = sext i8 %9 to i16 677 ret i16 %10 678} 679 680define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) { 681; SSE-LABEL: test_v16i8_sext: 682; SSE: # %bb.0: 683; SSE-NEXT: pcmpgtb %xmm1, %xmm0 684; SSE-NEXT: pmovmskb %xmm0, %eax 685; SSE-NEXT: negl %eax 686; SSE-NEXT: sbbl %eax, %eax 687; SSE-NEXT: # kill: def $al killed $al killed $eax 688; SSE-NEXT: retq 689; 690; AVX-LABEL: test_v16i8_sext: 691; AVX: # %bb.0: 692; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 693; AVX-NEXT: vpmovmskb %xmm0, %eax 694; AVX-NEXT: negl %eax 695; AVX-NEXT: sbbl %eax, %eax 696; AVX-NEXT: # kill: def $al killed $al killed $eax 697; AVX-NEXT: retq 698; 699; AVX512-LABEL: test_v16i8_sext: 700; AVX512: # %bb.0: 701; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 702; AVX512-NEXT: vpmovmskb %xmm0, %eax 703; AVX512-NEXT: negl %eax 704; AVX512-NEXT: sbbl %eax, %eax 705; AVX512-NEXT: # kill: def $al killed $al killed $eax 706; AVX512-NEXT: retq 707 %c = icmp sgt <16 x i8> %a0, %a1 708 %s = sext <16 x i1> %c to <16 x i8> 709 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 710 %2 = or <16 x i8> %s, %1 711 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 712 %4 = or <16 x i8> %2, %3 713 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 714 %6 = or <16 x i8> %4, %5 715 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 716 %8 = or <16 x i8> %6, %7 717 %9 = extractelement <16 x i8> %8, i32 0 718 ret i8 %9 719} 720 721define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) { 722; SSE-LABEL: test_v32i8_sext: 723; SSE: # %bb.0: 724; SSE-NEXT: pcmpgtb %xmm3, %xmm1 725; SSE-NEXT: pcmpgtb %xmm2, %xmm0 726; SSE-NEXT: por %xmm1, %xmm0 727; SSE-NEXT: pmovmskb %xmm0, %eax 728; SSE-NEXT: negl %eax 729; SSE-NEXT: sbbl %eax, %eax 730; SSE-NEXT: # kill: def $al killed $al killed $eax 731; SSE-NEXT: retq 732; 733; AVX1-LABEL: test_v32i8_sext: 734; AVX1: # %bb.0: 735; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 736; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 737; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 738; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 739; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 740; AVX1-NEXT: vpmovmskb %xmm0, %eax 741; AVX1-NEXT: negl %eax 742; AVX1-NEXT: sbbl %eax, %eax 743; AVX1-NEXT: # kill: def $al killed $al killed $eax 744; AVX1-NEXT: vzeroupper 745; AVX1-NEXT: retq 746; 747; AVX2-LABEL: test_v32i8_sext: 748; AVX2: # %bb.0: 749; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 750; AVX2-NEXT: vpmovmskb %ymm0, %eax 751; AVX2-NEXT: negl %eax 752; AVX2-NEXT: sbbl %eax, %eax 753; AVX2-NEXT: # kill: def $al killed $al killed $eax 754; AVX2-NEXT: vzeroupper 755; AVX2-NEXT: retq 756; 757; AVX512-LABEL: test_v32i8_sext: 758; AVX512: # %bb.0: 759; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 760; AVX512-NEXT: vpmovmskb %ymm0, %eax 761; AVX512-NEXT: negl %eax 762; AVX512-NEXT: sbbl %eax, %eax 763; AVX512-NEXT: # kill: def $al killed $al killed $eax 764; AVX512-NEXT: vzeroupper 765; AVX512-NEXT: retq 766 %c = icmp sgt <32 x i8> %a0, %a1 767 %s = sext <32 x i1> %c to <32 x i8> 768 %1 = shufflevector <32 x i8> %s, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 769 %2 = or <32 x i8> %s, %1 770 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 771 %4 = or <32 x i8> %2, %3 772 %5 = shufflevector <32 x i8> %4, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 773 %6 = or <32 x i8> %4, %5 774 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 775 %8 = or <32 x i8> %6, %7 776 %9 = shufflevector <32 x i8> %8, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 777 %10 = or <32 x i8> %8, %9 778 %11 = extractelement <32 x i8> %10, i32 0 779 ret i8 %11 780} 781 782define i1 @bool_reduction_v2f64(<2 x double> %x, <2 x double> %y) { 783; SSE-LABEL: bool_reduction_v2f64: 784; SSE: # %bb.0: 785; SSE-NEXT: cmpltpd %xmm0, %xmm1 786; SSE-NEXT: movmskpd %xmm1, %eax 787; SSE-NEXT: testl %eax, %eax 788; SSE-NEXT: setne %al 789; SSE-NEXT: retq 790; 791; AVX-LABEL: bool_reduction_v2f64: 792; AVX: # %bb.0: 793; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 794; AVX-NEXT: vmovmskpd %xmm0, %eax 795; AVX-NEXT: testl %eax, %eax 796; AVX-NEXT: setne %al 797; AVX-NEXT: retq 798; 799; AVX512-LABEL: bool_reduction_v2f64: 800; AVX512: # %bb.0: 801; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k0 802; AVX512-NEXT: kmovd %k0, %eax 803; AVX512-NEXT: testb %al, %al 804; AVX512-NEXT: setne %al 805; AVX512-NEXT: retq 806 %a = fcmp ogt <2 x double> %x, %y 807 %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef> 808 %c = or <2 x i1> %a, %b 809 %d = extractelement <2 x i1> %c, i32 0 810 ret i1 %d 811} 812 813define i1 @bool_reduction_v4f32(<4 x float> %x, <4 x float> %y) { 814; SSE-LABEL: bool_reduction_v4f32: 815; SSE: # %bb.0: 816; SSE-NEXT: cmpeqps %xmm1, %xmm0 817; SSE-NEXT: movmskps %xmm0, %eax 818; SSE-NEXT: testl %eax, %eax 819; SSE-NEXT: setne %al 820; SSE-NEXT: retq 821; 822; AVX-LABEL: bool_reduction_v4f32: 823; AVX: # %bb.0: 824; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 825; AVX-NEXT: vmovmskps %xmm0, %eax 826; AVX-NEXT: testl %eax, %eax 827; AVX-NEXT: setne %al 828; AVX-NEXT: retq 829; 830; AVX512-LABEL: bool_reduction_v4f32: 831; AVX512: # %bb.0: 832; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0 833; AVX512-NEXT: kmovd %k0, %eax 834; AVX512-NEXT: testb %al, %al 835; AVX512-NEXT: setne %al 836; AVX512-NEXT: retq 837 %a = fcmp oeq <4 x float> %x, %y 838 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 839 %b = or <4 x i1> %s1, %a 840 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 841 %c = or <4 x i1> %s2, %b 842 %d = extractelement <4 x i1> %c, i32 0 843 ret i1 %d 844} 845 846define i1 @bool_reduction_v4f64(<4 x double> %x, <4 x double> %y) { 847; SSE-LABEL: bool_reduction_v4f64: 848; SSE: # %bb.0: 849; SSE-NEXT: cmplepd %xmm1, %xmm3 850; SSE-NEXT: cmplepd %xmm0, %xmm2 851; SSE-NEXT: packssdw %xmm3, %xmm2 852; SSE-NEXT: movmskps %xmm2, %eax 853; SSE-NEXT: testl %eax, %eax 854; SSE-NEXT: setne %al 855; SSE-NEXT: retq 856; 857; AVX-LABEL: bool_reduction_v4f64: 858; AVX: # %bb.0: 859; AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0 860; AVX-NEXT: vmovmskpd %ymm0, %eax 861; AVX-NEXT: testl %eax, %eax 862; AVX-NEXT: setne %al 863; AVX-NEXT: vzeroupper 864; AVX-NEXT: retq 865; 866; AVX512-LABEL: bool_reduction_v4f64: 867; AVX512: # %bb.0: 868; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k0 869; AVX512-NEXT: kmovd %k0, %eax 870; AVX512-NEXT: testb %al, %al 871; AVX512-NEXT: setne %al 872; AVX512-NEXT: vzeroupper 873; AVX512-NEXT: retq 874 %a = fcmp oge <4 x double> %x, %y 875 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 876 %b = or <4 x i1> %s1, %a 877 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 878 %c = or <4 x i1> %s2, %b 879 %d = extractelement <4 x i1> %c, i32 0 880 ret i1 %d 881} 882 883define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) { 884; SSE-LABEL: bool_reduction_v8f32: 885; SSE: # %bb.0: 886; SSE-NEXT: cmpneqps %xmm3, %xmm1 887; SSE-NEXT: cmpneqps %xmm2, %xmm0 888; SSE-NEXT: packssdw %xmm1, %xmm0 889; SSE-NEXT: pmovmskb %xmm0, %eax 890; SSE-NEXT: testl %eax, %eax 891; SSE-NEXT: setne %al 892; SSE-NEXT: retq 893; 894; AVX-LABEL: bool_reduction_v8f32: 895; AVX: # %bb.0: 896; AVX-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0 897; AVX-NEXT: vmovmskps %ymm0, %eax 898; AVX-NEXT: testl %eax, %eax 899; AVX-NEXT: setne %al 900; AVX-NEXT: vzeroupper 901; AVX-NEXT: retq 902; 903; AVX512-LABEL: bool_reduction_v8f32: 904; AVX512: # %bb.0: 905; AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k0 906; AVX512-NEXT: kmovd %k0, %eax 907; AVX512-NEXT: testb %al, %al 908; AVX512-NEXT: setne %al 909; AVX512-NEXT: vzeroupper 910; AVX512-NEXT: retq 911 %a = fcmp une <8 x float> %x, %y 912 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 913 %b = or <8 x i1> %s1, %a 914 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 915 %c = or <8 x i1> %s2, %b 916 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 917 %d = or <8 x i1> %s3, %c 918 %e = extractelement <8 x i1> %d, i32 0 919 ret i1 %e 920} 921 922define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) { 923; SSE-LABEL: bool_reduction_v2i64: 924; SSE: # %bb.0: 925; SSE-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 926; SSE-NEXT: pxor %xmm2, %xmm1 927; SSE-NEXT: pxor %xmm2, %xmm0 928; SSE-NEXT: pcmpgtq %xmm1, %xmm0 929; SSE-NEXT: movmskpd %xmm0, %eax 930; SSE-NEXT: testl %eax, %eax 931; SSE-NEXT: setne %al 932; SSE-NEXT: retq 933; 934; AVX-LABEL: bool_reduction_v2i64: 935; AVX: # %bb.0: 936; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 937; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 938; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 939; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 940; AVX-NEXT: vmovmskpd %xmm0, %eax 941; AVX-NEXT: testl %eax, %eax 942; AVX-NEXT: setne %al 943; AVX-NEXT: retq 944; 945; AVX512-LABEL: bool_reduction_v2i64: 946; AVX512: # %bb.0: 947; AVX512-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0 948; AVX512-NEXT: kmovd %k0, %eax 949; AVX512-NEXT: testb %al, %al 950; AVX512-NEXT: setne %al 951; AVX512-NEXT: retq 952 %a = icmp ugt <2 x i64> %x, %y 953 %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef> 954 %c = or <2 x i1> %a, %b 955 %d = extractelement <2 x i1> %c, i32 0 956 ret i1 %d 957} 958 959define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { 960; SSE-LABEL: bool_reduction_v4i32: 961; SSE: # %bb.0: 962; SSE-NEXT: pcmpeqd %xmm1, %xmm0 963; SSE-NEXT: movmskps %xmm0, %eax 964; SSE-NEXT: cmpl $15, %eax 965; SSE-NEXT: setne %al 966; SSE-NEXT: retq 967; 968; AVX-LABEL: bool_reduction_v4i32: 969; AVX: # %bb.0: 970; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 971; AVX-NEXT: vmovmskps %xmm0, %eax 972; AVX-NEXT: cmpl $15, %eax 973; AVX-NEXT: setne %al 974; AVX-NEXT: retq 975; 976; AVX512-LABEL: bool_reduction_v4i32: 977; AVX512: # %bb.0: 978; AVX512-NEXT: vpcmpneqd %xmm1, %xmm0, %k0 979; AVX512-NEXT: kmovd %k0, %eax 980; AVX512-NEXT: testb %al, %al 981; AVX512-NEXT: setne %al 982; AVX512-NEXT: retq 983 %a = icmp ne <4 x i32> %x, %y 984 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 985 %b = or <4 x i1> %s1, %a 986 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 987 %c = or <4 x i1> %s2, %b 988 %d = extractelement <4 x i1> %c, i32 0 989 ret i1 %d 990} 991 992define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) { 993; SSE-LABEL: bool_reduction_v8i16: 994; SSE: # %bb.0: 995; SSE-NEXT: pcmpgtw %xmm0, %xmm1 996; SSE-NEXT: pmovmskb %xmm1, %eax 997; SSE-NEXT: testl %eax, %eax 998; SSE-NEXT: setne %al 999; SSE-NEXT: retq 1000; 1001; AVX-LABEL: bool_reduction_v8i16: 1002; AVX: # %bb.0: 1003; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 1004; AVX-NEXT: vpmovmskb %xmm0, %eax 1005; AVX-NEXT: testl %eax, %eax 1006; AVX-NEXT: setne %al 1007; AVX-NEXT: retq 1008; 1009; AVX512-LABEL: bool_reduction_v8i16: 1010; AVX512: # %bb.0: 1011; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %k0 1012; AVX512-NEXT: kmovd %k0, %eax 1013; AVX512-NEXT: testb %al, %al 1014; AVX512-NEXT: setne %al 1015; AVX512-NEXT: retq 1016 %a = icmp slt <8 x i16> %x, %y 1017 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1018 %b = or <8 x i1> %s1, %a 1019 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1020 %c = or <8 x i1> %s2, %b 1021 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1022 %d = or <8 x i1> %s3, %c 1023 %e = extractelement <8 x i1> %d, i32 0 1024 ret i1 %e 1025} 1026 1027define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) { 1028; SSE-LABEL: bool_reduction_v16i8: 1029; SSE: # %bb.0: 1030; SSE-NEXT: pcmpgtb %xmm1, %xmm0 1031; SSE-NEXT: pmovmskb %xmm0, %eax 1032; SSE-NEXT: testl %eax, %eax 1033; SSE-NEXT: setne %al 1034; SSE-NEXT: retq 1035; 1036; AVX-LABEL: bool_reduction_v16i8: 1037; AVX: # %bb.0: 1038; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 1039; AVX-NEXT: vpmovmskb %xmm0, %eax 1040; AVX-NEXT: testl %eax, %eax 1041; AVX-NEXT: setne %al 1042; AVX-NEXT: retq 1043; 1044; AVX512-LABEL: bool_reduction_v16i8: 1045; AVX512: # %bb.0: 1046; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 1047; AVX512-NEXT: kortestw %k0, %k0 1048; AVX512-NEXT: setne %al 1049; AVX512-NEXT: retq 1050 %a = icmp sgt <16 x i8> %x, %y 1051 %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1052 %b = or <16 x i1> %s1, %a 1053 %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1054 %c = or <16 x i1> %s2, %b 1055 %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1056 %d = or <16 x i1> %s3, %c 1057 %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1058 %e = or <16 x i1> %s4, %d 1059 %f = extractelement <16 x i1> %e, i32 0 1060 ret i1 %f 1061} 1062 1063define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) { 1064; SSE-LABEL: bool_reduction_v4i64: 1065; SSE: # %bb.0: 1066; SSE-NEXT: pcmpgtq %xmm1, %xmm3 1067; SSE-NEXT: pcmpgtq %xmm0, %xmm2 1068; SSE-NEXT: packssdw %xmm3, %xmm2 1069; SSE-NEXT: movmskps %xmm2, %eax 1070; SSE-NEXT: testl %eax, %eax 1071; SSE-NEXT: setne %al 1072; SSE-NEXT: retq 1073; 1074; AVX1-LABEL: bool_reduction_v4i64: 1075; AVX1: # %bb.0: 1076; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1077; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1078; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1079; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 1080; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1081; AVX1-NEXT: vmovmskpd %ymm0, %eax 1082; AVX1-NEXT: testl %eax, %eax 1083; AVX1-NEXT: setne %al 1084; AVX1-NEXT: vzeroupper 1085; AVX1-NEXT: retq 1086; 1087; AVX2-LABEL: bool_reduction_v4i64: 1088; AVX2: # %bb.0: 1089; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 1090; AVX2-NEXT: vmovmskpd %ymm0, %eax 1091; AVX2-NEXT: testl %eax, %eax 1092; AVX2-NEXT: setne %al 1093; AVX2-NEXT: vzeroupper 1094; AVX2-NEXT: retq 1095; 1096; AVX512-LABEL: bool_reduction_v4i64: 1097; AVX512: # %bb.0: 1098; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0 1099; AVX512-NEXT: kmovd %k0, %eax 1100; AVX512-NEXT: testb %al, %al 1101; AVX512-NEXT: setne %al 1102; AVX512-NEXT: vzeroupper 1103; AVX512-NEXT: retq 1104 %a = icmp slt <4 x i64> %x, %y 1105 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 1106 %b = or <4 x i1> %s1, %a 1107 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 1108 %c = or <4 x i1> %s2, %b 1109 %d = extractelement <4 x i1> %c, i32 0 1110 ret i1 %d 1111} 1112 1113define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) { 1114; SSE-LABEL: bool_reduction_v8i32: 1115; SSE: # %bb.0: 1116; SSE-NEXT: pminud %xmm1, %xmm3 1117; SSE-NEXT: pcmpeqd %xmm1, %xmm3 1118; SSE-NEXT: pminud %xmm0, %xmm2 1119; SSE-NEXT: pcmpeqd %xmm0, %xmm2 1120; SSE-NEXT: packssdw %xmm3, %xmm2 1121; SSE-NEXT: pmovmskb %xmm2, %eax 1122; SSE-NEXT: testl %eax, %eax 1123; SSE-NEXT: setne %al 1124; SSE-NEXT: retq 1125; 1126; AVX1-LABEL: bool_reduction_v8i32: 1127; AVX1: # %bb.0: 1128; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1129; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1130; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2 1131; AVX1-NEXT: vpcmpeqd %xmm2, %xmm3, %xmm2 1132; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm1 1133; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1134; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1135; AVX1-NEXT: vmovmskps %ymm0, %eax 1136; AVX1-NEXT: testl %eax, %eax 1137; AVX1-NEXT: setne %al 1138; AVX1-NEXT: vzeroupper 1139; AVX1-NEXT: retq 1140; 1141; AVX2-LABEL: bool_reduction_v8i32: 1142; AVX2: # %bb.0: 1143; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm1 1144; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 1145; AVX2-NEXT: vmovmskps %ymm0, %eax 1146; AVX2-NEXT: testl %eax, %eax 1147; AVX2-NEXT: setne %al 1148; AVX2-NEXT: vzeroupper 1149; AVX2-NEXT: retq 1150; 1151; AVX512-LABEL: bool_reduction_v8i32: 1152; AVX512: # %bb.0: 1153; AVX512-NEXT: vpcmpleud %ymm1, %ymm0, %k0 1154; AVX512-NEXT: kmovd %k0, %eax 1155; AVX512-NEXT: testb %al, %al 1156; AVX512-NEXT: setne %al 1157; AVX512-NEXT: vzeroupper 1158; AVX512-NEXT: retq 1159 %a = icmp ule <8 x i32> %x, %y 1160 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1161 %b = or <8 x i1> %s1, %a 1162 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1163 %c = or <8 x i1> %s2, %b 1164 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1165 %d = or <8 x i1> %s3, %c 1166 %e = extractelement <8 x i1> %d, i32 0 1167 ret i1 %e 1168} 1169 1170define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) { 1171; SSE-LABEL: bool_reduction_v16i16: 1172; SSE: # %bb.0: 1173; SSE-NEXT: pcmpeqw %xmm3, %xmm1 1174; SSE-NEXT: pcmpeqw %xmm2, %xmm0 1175; SSE-NEXT: packsswb %xmm1, %xmm0 1176; SSE-NEXT: pmovmskb %xmm0, %eax 1177; SSE-NEXT: testl %eax, %eax 1178; SSE-NEXT: setne %al 1179; SSE-NEXT: retq 1180; 1181; AVX1-LABEL: bool_reduction_v16i16: 1182; AVX1: # %bb.0: 1183; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1184; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1185; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2 1186; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 1187; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 1188; AVX1-NEXT: vpmovmskb %xmm0, %eax 1189; AVX1-NEXT: testl %eax, %eax 1190; AVX1-NEXT: setne %al 1191; AVX1-NEXT: vzeroupper 1192; AVX1-NEXT: retq 1193; 1194; AVX2-LABEL: bool_reduction_v16i16: 1195; AVX2: # %bb.0: 1196; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 1197; AVX2-NEXT: vpmovmskb %ymm0, %eax 1198; AVX2-NEXT: testl %eax, %eax 1199; AVX2-NEXT: setne %al 1200; AVX2-NEXT: vzeroupper 1201; AVX2-NEXT: retq 1202; 1203; AVX512-LABEL: bool_reduction_v16i16: 1204; AVX512: # %bb.0: 1205; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 1206; AVX512-NEXT: kortestw %k0, %k0 1207; AVX512-NEXT: setne %al 1208; AVX512-NEXT: vzeroupper 1209; AVX512-NEXT: retq 1210 %a = icmp eq <16 x i16> %x, %y 1211 %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1212 %b = or <16 x i1> %s1, %a 1213 %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1214 %c = or <16 x i1> %s2, %b 1215 %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1216 %d = or <16 x i1> %s3, %c 1217 %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1218 %e = or <16 x i1> %s4, %d 1219 %f = extractelement <16 x i1> %e, i32 0 1220 ret i1 %f 1221} 1222 1223define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) { 1224; SSE-LABEL: bool_reduction_v32i8: 1225; SSE: # %bb.0: 1226; SSE-NEXT: pcmpeqb %xmm3, %xmm1 1227; SSE-NEXT: pcmpeqb %xmm2, %xmm0 1228; SSE-NEXT: por %xmm1, %xmm0 1229; SSE-NEXT: pmovmskb %xmm0, %eax 1230; SSE-NEXT: testl %eax, %eax 1231; SSE-NEXT: setne %al 1232; SSE-NEXT: retq 1233; 1234; AVX1-LABEL: bool_reduction_v32i8: 1235; AVX1: # %bb.0: 1236; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1237; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1238; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2 1239; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1240; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1241; AVX1-NEXT: vpmovmskb %xmm0, %eax 1242; AVX1-NEXT: testl %eax, %eax 1243; AVX1-NEXT: setne %al 1244; AVX1-NEXT: vzeroupper 1245; AVX1-NEXT: retq 1246; 1247; AVX2-LABEL: bool_reduction_v32i8: 1248; AVX2: # %bb.0: 1249; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 1250; AVX2-NEXT: vpmovmskb %ymm0, %eax 1251; AVX2-NEXT: testl %eax, %eax 1252; AVX2-NEXT: setne %al 1253; AVX2-NEXT: vzeroupper 1254; AVX2-NEXT: retq 1255; 1256; AVX512-LABEL: bool_reduction_v32i8: 1257; AVX512: # %bb.0: 1258; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 1259; AVX512-NEXT: kortestd %k0, %k0 1260; AVX512-NEXT: setne %al 1261; AVX512-NEXT: vzeroupper 1262; AVX512-NEXT: retq 1263 %a = icmp eq <32 x i8> %x, %y 1264 %s1 = shufflevector <32 x i1> %a, <32 x i1> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1265 %b = or <32 x i1> %s1, %a 1266 %s2 = shufflevector <32 x i1> %b, <32 x i1> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1267 %c = or <32 x i1> %s2, %b 1268 %s3 = shufflevector <32 x i1> %c, <32 x i1> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1269 %d = or <32 x i1> %s3, %c 1270 %s4 = shufflevector <32 x i1> %d, <32 x i1> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1271 %e = or <32 x i1> %s4, %d 1272 %s5 = shufflevector <32 x i1> %e, <32 x i1> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1273 %f = or <32 x i1> %s5, %e 1274 %g = extractelement <32 x i1> %f, i32 0 1275 ret i1 %g 1276} 1277