1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX 6 7declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) 8declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) 9declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) 10 11; Use widest possible vector for movmsk comparisons (PR37087) 12 13define i1 @movmskps_noneof_bitcast_v2f64(<2 x double> %a0) { 14; SSE-LABEL: movmskps_noneof_bitcast_v2f64: 15; SSE: # %bb.0: 16; SSE-NEXT: xorpd %xmm1, %xmm1 17; SSE-NEXT: cmpeqpd %xmm0, %xmm1 18; SSE-NEXT: movmskpd %xmm1, %eax 19; SSE-NEXT: testl %eax, %eax 20; SSE-NEXT: sete %al 21; SSE-NEXT: retq 22; 23; AVX-LABEL: movmskps_noneof_bitcast_v2f64: 24; AVX: # %bb.0: 25; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 26; AVX-NEXT: vcmpeqpd %xmm0, %xmm1, %xmm0 27; AVX-NEXT: vmovmskpd %xmm0, %eax 28; AVX-NEXT: testl %eax, %eax 29; AVX-NEXT: sete %al 30; AVX-NEXT: retq 31 %1 = fcmp oeq <2 x double> zeroinitializer, %a0 32 %2 = sext <2 x i1> %1 to <2 x i64> 33 %3 = bitcast <2 x i64> %2 to <4 x float> 34 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3) 35 %5 = icmp eq i32 %4, 0 36 ret i1 %5 37} 38 39define i1 @movmskps_allof_bitcast_v2f64(<2 x double> %a0) { 40; SSE-LABEL: movmskps_allof_bitcast_v2f64: 41; SSE: # %bb.0: 42; SSE-NEXT: xorpd %xmm1, %xmm1 43; SSE-NEXT: cmpeqpd %xmm0, %xmm1 44; SSE-NEXT: movmskpd %xmm1, %eax 45; SSE-NEXT: cmpl $3, %eax 46; SSE-NEXT: sete %al 47; SSE-NEXT: retq 48; 49; AVX-LABEL: movmskps_allof_bitcast_v2f64: 50; AVX: # %bb.0: 51; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 52; AVX-NEXT: vcmpeqpd %xmm0, %xmm1, %xmm0 53; AVX-NEXT: vmovmskpd %xmm0, %eax 54; AVX-NEXT: cmpl $3, %eax 55; AVX-NEXT: sete %al 56; AVX-NEXT: retq 57 %1 = fcmp oeq <2 x double> zeroinitializer, %a0 58 %2 = sext <2 x i1> %1 to <2 x i64> 59 %3 = bitcast <2 x i64> %2 to <4 x float> 60 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3) 61 %5 = icmp eq i32 %4, 15 62 ret i1 %5 63} 64 65define i1 @pmovmskb_noneof_bitcast_v2i64(<2 x i64> %a0) { 66; SSE2-LABEL: pmovmskb_noneof_bitcast_v2i64: 67; SSE2: # %bb.0: 68; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 69; SSE2-NEXT: movmskps %xmm0, %eax 70; SSE2-NEXT: testl %eax, %eax 71; SSE2-NEXT: sete %al 72; SSE2-NEXT: retq 73; 74; SSE42-LABEL: pmovmskb_noneof_bitcast_v2i64: 75; SSE42: # %bb.0: 76; SSE42-NEXT: movmskpd %xmm0, %eax 77; SSE42-NEXT: testl %eax, %eax 78; SSE42-NEXT: sete %al 79; SSE42-NEXT: retq 80; 81; AVX-LABEL: pmovmskb_noneof_bitcast_v2i64: 82; AVX: # %bb.0: 83; AVX-NEXT: vmovmskpd %xmm0, %eax 84; AVX-NEXT: testl %eax, %eax 85; AVX-NEXT: sete %al 86; AVX-NEXT: retq 87 %1 = icmp sgt <2 x i64> zeroinitializer, %a0 88 %2 = sext <2 x i1> %1 to <2 x i64> 89 %3 = bitcast <2 x i64> %2 to <16 x i8> 90 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3) 91 %5 = icmp eq i32 %4, 0 92 ret i1 %5 93} 94 95define i1 @pmovmskb_allof_bitcast_v2i64(<2 x i64> %a0) { 96; SSE2-LABEL: pmovmskb_allof_bitcast_v2i64: 97; SSE2: # %bb.0: 98; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 99; SSE2-NEXT: movmskps %xmm0, %eax 100; SSE2-NEXT: cmpl $15, %eax 101; SSE2-NEXT: sete %al 102; SSE2-NEXT: retq 103; 104; SSE42-LABEL: pmovmskb_allof_bitcast_v2i64: 105; SSE42: # %bb.0: 106; SSE42-NEXT: movmskpd %xmm0, %eax 107; SSE42-NEXT: cmpl $3, %eax 108; SSE42-NEXT: sete %al 109; SSE42-NEXT: retq 110; 111; AVX-LABEL: pmovmskb_allof_bitcast_v2i64: 112; AVX: # %bb.0: 113; AVX-NEXT: vmovmskpd %xmm0, %eax 114; AVX-NEXT: cmpl $3, %eax 115; AVX-NEXT: sete %al 116; AVX-NEXT: retq 117 %1 = icmp sgt <2 x i64> zeroinitializer, %a0 118 %2 = sext <2 x i1> %1 to <2 x i64> 119 %3 = bitcast <2 x i64> %2 to <16 x i8> 120 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3) 121 %5 = icmp eq i32 %4, 65535 122 ret i1 %5 123} 124 125define i1 @pmovmskb_noneof_bitcast_v4f32(<4 x float> %a0) { 126; SSE-LABEL: pmovmskb_noneof_bitcast_v4f32: 127; SSE: # %bb.0: 128; SSE-NEXT: xorps %xmm1, %xmm1 129; SSE-NEXT: cmpeqps %xmm0, %xmm1 130; SSE-NEXT: movmskps %xmm1, %eax 131; SSE-NEXT: testl %eax, %eax 132; SSE-NEXT: sete %al 133; SSE-NEXT: retq 134; 135; AVX-LABEL: pmovmskb_noneof_bitcast_v4f32: 136; AVX: # %bb.0: 137; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 138; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 139; AVX-NEXT: vmovmskps %xmm0, %eax 140; AVX-NEXT: testl %eax, %eax 141; AVX-NEXT: sete %al 142; AVX-NEXT: retq 143 %1 = fcmp oeq <4 x float> %a0, zeroinitializer 144 %2 = sext <4 x i1> %1 to <4 x i32> 145 %3 = bitcast <4 x i32> %2 to <16 x i8> 146 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3) 147 %5 = icmp eq i32 %4, 0 148 ret i1 %5 149} 150 151define i1 @pmovmskb_allof_bitcast_v4f32(<4 x float> %a0) { 152; SSE-LABEL: pmovmskb_allof_bitcast_v4f32: 153; SSE: # %bb.0: 154; SSE-NEXT: xorps %xmm1, %xmm1 155; SSE-NEXT: cmpeqps %xmm0, %xmm1 156; SSE-NEXT: movmskps %xmm1, %eax 157; SSE-NEXT: cmpl $15, %eax 158; SSE-NEXT: sete %al 159; SSE-NEXT: retq 160; 161; AVX-LABEL: pmovmskb_allof_bitcast_v4f32: 162; AVX: # %bb.0: 163; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 164; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 165; AVX-NEXT: vmovmskps %xmm0, %eax 166; AVX-NEXT: cmpl $15, %eax 167; AVX-NEXT: sete %al 168; AVX-NEXT: retq 169 %1 = fcmp oeq <4 x float> %a0, zeroinitializer 170 %2 = sext <4 x i1> %1 to <4 x i32> 171 %3 = bitcast <4 x i32> %2 to <16 x i8> 172 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3) 173 %5 = icmp eq i32 %4, 65535 174 ret i1 %5 175} 176 177; MOVMSK(ICMP_SGT(X,-1)) -> NOT(MOVMSK(X))) 178define i1 @movmskps_allof_v4i32_positive(<4 x i32> %a0) { 179; SSE-LABEL: movmskps_allof_v4i32_positive: 180; SSE: # %bb.0: 181; SSE-NEXT: movmskps %xmm0, %eax 182; SSE-NEXT: xorl $15, %eax 183; SSE-NEXT: cmpl $15, %eax 184; SSE-NEXT: sete %al 185; SSE-NEXT: retq 186; 187; AVX-LABEL: movmskps_allof_v4i32_positive: 188; AVX: # %bb.0: 189; AVX-NEXT: vmovmskps %xmm0, %eax 190; AVX-NEXT: xorl $15, %eax 191; AVX-NEXT: cmpl $15, %eax 192; AVX-NEXT: sete %al 193; AVX-NEXT: retq 194 %1 = icmp sgt <4 x i32> %a0, <i32 -1, i32 -1, i32 -1, i32 -1> 195 %2 = sext <4 x i1> %1 to <4 x i32> 196 %3 = bitcast <4 x i32> %2 to <4 x float> 197 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3) 198 %5 = icmp eq i32 %4, 15 199 ret i1 %5 200} 201 202define i1 @pmovmskb_noneof_v16i8_positive(<16 x i8> %a0) { 203; SSE-LABEL: pmovmskb_noneof_v16i8_positive: 204; SSE: # %bb.0: 205; SSE-NEXT: pmovmskb %xmm0, %eax 206; SSE-NEXT: xorl $65535, %eax # imm = 0xFFFF 207; SSE-NEXT: sete %al 208; SSE-NEXT: retq 209; 210; AVX-LABEL: pmovmskb_noneof_v16i8_positive: 211; AVX: # %bb.0: 212; AVX-NEXT: vpmovmskb %xmm0, %eax 213; AVX-NEXT: xorl $65535, %eax # imm = 0xFFFF 214; AVX-NEXT: sete %al 215; AVX-NEXT: retq 216 %1 = icmp sgt <16 x i8> %a0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 217 %2 = sext <16 x i1> %1 to <16 x i8> 218 %3 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %2) 219 %4 = icmp eq i32 %3, 0 220 ret i1 %4 221} 222 223; AND(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(AND(X,Y)) 224; XOR(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(XOR(X,Y)) 225; OR(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(OR(X,Y)) 226; if the elements are the same width. 227 228define i32 @and_movmskpd_movmskpd(<2 x double> %a0, <2 x i64> %a1) { 229; SSE-LABEL: and_movmskpd_movmskpd: 230; SSE: # %bb.0: 231; SSE-NEXT: xorpd %xmm2, %xmm2 232; SSE-NEXT: cmpeqpd %xmm0, %xmm2 233; SSE-NEXT: andpd %xmm1, %xmm2 234; SSE-NEXT: movmskpd %xmm2, %eax 235; SSE-NEXT: retq 236; 237; AVX-LABEL: and_movmskpd_movmskpd: 238; AVX: # %bb.0: 239; AVX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 240; AVX-NEXT: vcmpeqpd %xmm0, %xmm2, %xmm0 241; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0 242; AVX-NEXT: vmovmskpd %xmm0, %eax 243; AVX-NEXT: retq 244 %1 = fcmp oeq <2 x double> zeroinitializer, %a0 245 %2 = sext <2 x i1> %1 to <2 x i64> 246 %3 = bitcast <2 x i64> %2 to <2 x double> 247 %4 = tail call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %3) 248 %5 = icmp sgt <2 x i64> zeroinitializer, %a1 249 %6 = bitcast <2 x i1> %5 to i2 250 %7 = zext i2 %6 to i32 251 %8 = and i32 %4, %7 252 ret i32 %8 253} 254 255define i32 @xor_movmskps_movmskps(<4 x float> %a0, <4 x i32> %a1) { 256; SSE-LABEL: xor_movmskps_movmskps: 257; SSE: # %bb.0: 258; SSE-NEXT: xorps %xmm2, %xmm2 259; SSE-NEXT: cmpeqps %xmm0, %xmm2 260; SSE-NEXT: xorps %xmm1, %xmm2 261; SSE-NEXT: movmskps %xmm2, %eax 262; SSE-NEXT: retq 263; 264; AVX-LABEL: xor_movmskps_movmskps: 265; AVX: # %bb.0: 266; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2 267; AVX-NEXT: vcmpeqps %xmm0, %xmm2, %xmm0 268; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0 269; AVX-NEXT: vmovmskps %xmm0, %eax 270; AVX-NEXT: retq 271 %1 = fcmp oeq <4 x float> zeroinitializer, %a0 272 %2 = sext <4 x i1> %1 to <4 x i32> 273 %3 = bitcast <4 x i32> %2 to <4 x float> 274 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3) 275 %5 = ashr <4 x i32> %a1, <i32 31, i32 31, i32 31, i32 31> 276 %6 = bitcast <4 x i32> %5 to <4 x float> 277 %7 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %6) 278 %8 = xor i32 %4, %7 279 ret i32 %8 280} 281 282define i32 @or_pmovmskb_pmovmskb(<16 x i8> %a0, <8 x i16> %a1) { 283; SSE-LABEL: or_pmovmskb_pmovmskb: 284; SSE: # %bb.0: 285; SSE-NEXT: pxor %xmm2, %xmm2 286; SSE-NEXT: pcmpeqb %xmm0, %xmm2 287; SSE-NEXT: psraw $15, %xmm1 288; SSE-NEXT: por %xmm2, %xmm1 289; SSE-NEXT: pmovmskb %xmm1, %eax 290; SSE-NEXT: retq 291; 292; AVX-LABEL: or_pmovmskb_pmovmskb: 293; AVX: # %bb.0: 294; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 295; AVX-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 296; AVX-NEXT: vpsraw $15, %xmm1, %xmm1 297; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 298; AVX-NEXT: vpmovmskb %xmm0, %eax 299; AVX-NEXT: retq 300 %1 = icmp eq <16 x i8> zeroinitializer, %a0 301 %2 = sext <16 x i1> %1 to <16 x i8> 302 %3 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %2) 303 %4 = ashr <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 304 %5 = bitcast <8 x i16> %4 to <16 x i8> 305 %6 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %5) 306 %7 = or i32 %3, %6 307 ret i32 %7 308} 309