1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,X64-AVX1 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64,X64-AVX2 5 6define <2 x double> @signbits_sext_v2i64_sitofp_v2f64(i32 %a0, i32 %a1) nounwind { 7; X86-LABEL: signbits_sext_v2i64_sitofp_v2f64: 8; X86: # %bb.0: 9; X86-NEXT: vcvtdq2pd {{[0-9]+}}(%esp), %xmm0 10; X86-NEXT: retl 11; 12; X64-LABEL: signbits_sext_v2i64_sitofp_v2f64: 13; X64: # %bb.0: 14; X64-NEXT: vmovd %edi, %xmm0 15; X64-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 16; X64-NEXT: vcvtdq2pd %xmm0, %xmm0 17; X64-NEXT: retq 18 %1 = sext i32 %a0 to i64 19 %2 = sext i32 %a1 to i64 20 %3 = insertelement <2 x i64> undef, i64 %1, i32 0 21 %4 = insertelement <2 x i64> %3, i64 %2, i32 1 22 %5 = sitofp <2 x i64> %4 to <2 x double> 23 ret <2 x double> %5 24} 25 26define <4 x float> @signbits_sext_v4i64_sitofp_v4f32(i8 signext %a0, i16 signext %a1, i32 %a2, i32 %a3) nounwind { 27; X86-LABEL: signbits_sext_v4i64_sitofp_v4f32: 28; X86: # %bb.0: 29; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax 30; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx 31; X86-NEXT: vmovd %ecx, %xmm0 32; X86-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 33; X86-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 34; X86-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 35; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 36; X86-NEXT: retl 37; 38; X64-LABEL: signbits_sext_v4i64_sitofp_v4f32: 39; X64: # %bb.0: 40; X64-NEXT: vmovd %edi, %xmm0 41; X64-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 42; X64-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 43; X64-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 44; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 45; X64-NEXT: retq 46 %1 = sext i8 %a0 to i64 47 %2 = sext i16 %a1 to i64 48 %3 = sext i32 %a2 to i64 49 %4 = sext i32 %a3 to i64 50 %5 = insertelement <4 x i64> undef, i64 %1, i32 0 51 %6 = insertelement <4 x i64> %5, i64 %2, i32 1 52 %7 = insertelement <4 x i64> %6, i64 %3, i32 2 53 %8 = insertelement <4 x i64> %7, i64 %4, i32 3 54 %9 = sitofp <4 x i64> %8 to <4 x float> 55 ret <4 x float> %9 56} 57 58define <4 x double> @signbits_ashr_sitofp_0(<4 x i64> %a0) nounwind { 59; X86-LABEL: signbits_ashr_sitofp_0: 60; X86: # %bb.0: 61; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 62; X86-NEXT: vpsrlq $36, %xmm1, %xmm2 63; X86-NEXT: vpsrlq $35, %xmm1, %xmm1 64; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 65; X86-NEXT: vmovdqa {{.*#+}} xmm2 = [268435456,0,134217728,0] 66; X86-NEXT: vpxor %xmm2, %xmm1, %xmm1 67; X86-NEXT: vpsubq %xmm2, %xmm1, %xmm1 68; X86-NEXT: vpsrlq $34, %xmm0, %xmm2 69; X86-NEXT: vpsrlq $33, %xmm0, %xmm0 70; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 71; X86-NEXT: vmovdqa {{.*#+}} xmm2 = [1073741824,0,536870912,0] 72; X86-NEXT: vpxor %xmm2, %xmm0, %xmm0 73; X86-NEXT: vpsubq %xmm2, %xmm0, %xmm0 74; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 75; X86-NEXT: vcvtdq2pd %xmm0, %ymm0 76; X86-NEXT: retl 77; 78; X64-AVX1-LABEL: signbits_ashr_sitofp_0: 79; X64-AVX1: # %bb.0: 80; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 81; X64-AVX1-NEXT: vpsrlq $36, %xmm1, %xmm2 82; X64-AVX1-NEXT: vpsrlq $35, %xmm1, %xmm1 83; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 84; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [268435456,134217728] 85; X64-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 86; X64-AVX1-NEXT: vpsubq %xmm2, %xmm1, %xmm1 87; X64-AVX1-NEXT: vpsrlq $34, %xmm0, %xmm2 88; X64-AVX1-NEXT: vpsrlq $33, %xmm0, %xmm0 89; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 90; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1073741824,536870912] 91; X64-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 92; X64-AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0 93; X64-AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 94; X64-AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 95; X64-AVX1-NEXT: retq 96; 97; X64-AVX2-LABEL: signbits_ashr_sitofp_0: 98; X64-AVX2: # %bb.0: 99; X64-AVX2-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 100; X64-AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1073741824,536870912,268435456,134217728] 101; X64-AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 102; X64-AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0 103; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 104; X64-AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 105; X64-AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 106; X64-AVX2-NEXT: retq 107 %1 = ashr <4 x i64> %a0, <i64 33, i64 34, i64 35, i64 36> 108 %2 = sitofp <4 x i64> %1 to <4 x double> 109 ret <4 x double> %2 110} 111 112; PR45794 113define <4 x float> @signbits_ashr_sitofp_1(<4 x i64> %a0) nounwind { 114; X86-LABEL: signbits_ashr_sitofp_1: 115; X86: # %bb.0: 116; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 117; X86-NEXT: vpsrad $16, %xmm1, %xmm1 118; X86-NEXT: vpsrad $16, %xmm0, %xmm0 119; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] 120; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 121; X86-NEXT: vzeroupper 122; X86-NEXT: retl 123; 124; X64-AVX1-LABEL: signbits_ashr_sitofp_1: 125; X64-AVX1: # %bb.0: 126; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 127; X64-AVX1-NEXT: vpsrad $16, %xmm1, %xmm1 128; X64-AVX1-NEXT: vpsrad $16, %xmm0, %xmm0 129; X64-AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] 130; X64-AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 131; X64-AVX1-NEXT: vzeroupper 132; X64-AVX1-NEXT: retq 133; 134; X64-AVX2-LABEL: signbits_ashr_sitofp_1: 135; X64-AVX2: # %bb.0: 136; X64-AVX2-NEXT: vpsrad $16, %ymm0, %ymm0 137; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 138; X64-AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] 139; X64-AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0 140; X64-AVX2-NEXT: vzeroupper 141; X64-AVX2-NEXT: retq 142 %1 = ashr <4 x i64> %a0, <i64 48, i64 48, i64 48, i64 48> 143 %2 = sitofp <4 x i64> %1 to <4 x float> 144 ret <4 x float> %2 145} 146 147define float @signbits_ashr_extract_sitofp_0(<2 x i64> %a0) nounwind { 148; X86-LABEL: signbits_ashr_extract_sitofp_0: 149; X86: # %bb.0: 150; X86-NEXT: pushl %eax 151; X86-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 152; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 153; X86-NEXT: vmovss %xmm0, (%esp) 154; X86-NEXT: flds (%esp) 155; X86-NEXT: popl %eax 156; X86-NEXT: retl 157; 158; X64-LABEL: signbits_ashr_extract_sitofp_0: 159; X64: # %bb.0: 160; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 161; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 162; X64-NEXT: retq 163 %1 = ashr <2 x i64> %a0, <i64 32, i64 32> 164 %2 = extractelement <2 x i64> %1, i32 0 165 %3 = sitofp i64 %2 to float 166 ret float %3 167} 168 169define float @signbits_ashr_extract_sitofp_1(<2 x i64> %a0) nounwind { 170; X86-LABEL: signbits_ashr_extract_sitofp_1: 171; X86: # %bb.0: 172; X86-NEXT: pushl %eax 173; X86-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 174; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 175; X86-NEXT: vmovss %xmm0, (%esp) 176; X86-NEXT: flds (%esp) 177; X86-NEXT: popl %eax 178; X86-NEXT: retl 179; 180; X64-LABEL: signbits_ashr_extract_sitofp_1: 181; X64: # %bb.0: 182; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 183; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 184; X64-NEXT: retq 185 %1 = ashr <2 x i64> %a0, <i64 32, i64 63> 186 %2 = extractelement <2 x i64> %1, i32 0 187 %3 = sitofp i64 %2 to float 188 ret float %3 189} 190 191define float @signbits_ashr_shl_extract_sitofp(<2 x i64> %a0) nounwind { 192; X86-LABEL: signbits_ashr_shl_extract_sitofp: 193; X86: # %bb.0: 194; X86-NEXT: pushl %eax 195; X86-NEXT: vpsrad $29, %xmm0, %xmm0 196; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 197; X86-NEXT: vpsllq $20, %xmm0, %xmm0 198; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 199; X86-NEXT: vmovss %xmm0, (%esp) 200; X86-NEXT: flds (%esp) 201; X86-NEXT: popl %eax 202; X86-NEXT: retl 203; 204; X64-LABEL: signbits_ashr_shl_extract_sitofp: 205; X64: # %bb.0: 206; X64-NEXT: vpsrad $29, %xmm0, %xmm0 207; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 208; X64-NEXT: vpsllq $20, %xmm0, %xmm0 209; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 210; X64-NEXT: retq 211 %1 = ashr <2 x i64> %a0, <i64 61, i64 60> 212 %2 = shl <2 x i64> %1, <i64 20, i64 16> 213 %3 = extractelement <2 x i64> %2, i32 0 214 %4 = sitofp i64 %3 to float 215 ret float %4 216} 217 218define float @signbits_ashr_insert_ashr_extract_sitofp(i64 %a0, i64 %a1) nounwind { 219; X86-LABEL: signbits_ashr_insert_ashr_extract_sitofp: 220; X86: # %bb.0: 221; X86-NEXT: pushl %eax 222; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 223; X86-NEXT: movl %eax, %ecx 224; X86-NEXT: sarl $30, %ecx 225; X86-NEXT: shll $2, %eax 226; X86-NEXT: vmovd %eax, %xmm0 227; X86-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 228; X86-NEXT: vpsrlq $3, %xmm0, %xmm0 229; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 230; X86-NEXT: vmovss %xmm0, (%esp) 231; X86-NEXT: flds (%esp) 232; X86-NEXT: popl %eax 233; X86-NEXT: retl 234; 235; X64-LABEL: signbits_ashr_insert_ashr_extract_sitofp: 236; X64: # %bb.0: 237; X64-NEXT: sarq $30, %rdi 238; X64-NEXT: vmovq %rdi, %xmm0 239; X64-NEXT: vpsrlq $3, %xmm0, %xmm0 240; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 241; X64-NEXT: retq 242 %1 = ashr i64 %a0, 30 243 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 244 %3 = insertelement <2 x i64> %2, i64 %a1, i32 1 245 %4 = ashr <2 x i64> %3, <i64 3, i64 3> 246 %5 = extractelement <2 x i64> %4, i32 0 247 %6 = sitofp i64 %5 to float 248 ret float %6 249} 250 251define <4 x double> @signbits_sext_shuffle_sitofp(<4 x i32> %a0, <4 x i64> %a1) nounwind { 252; X86-LABEL: signbits_sext_shuffle_sitofp: 253; X86: # %bb.0: 254; X86-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero 255; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 256; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 257; X86-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 258; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 259; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 260; X86-NEXT: vcvtdq2pd %xmm0, %ymm0 261; X86-NEXT: retl 262; 263; X64-AVX1-LABEL: signbits_sext_shuffle_sitofp: 264; X64-AVX1: # %bb.0: 265; X64-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero 266; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 267; X64-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 268; X64-AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 269; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 270; X64-AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 271; X64-AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 272; X64-AVX1-NEXT: retq 273; 274; X64-AVX2-LABEL: signbits_sext_shuffle_sitofp: 275; X64-AVX2: # %bb.0: 276; X64-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 277; X64-AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0] 278; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 279; X64-AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 280; X64-AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 281; X64-AVX2-NEXT: retq 282 %1 = sext <4 x i32> %a0 to <4 x i64> 283 %2 = shufflevector <4 x i64> %1, <4 x i64>%a1, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 284 %3 = sitofp <4 x i64> %2 to <4 x double> 285 ret <4 x double> %3 286} 287 288define <2 x double> @signbits_sext_shl_sitofp(<2 x i16> %a0) nounwind { 289; X86-LABEL: signbits_sext_shl_sitofp: 290; X86: # %bb.0: 291; X86-NEXT: vpmovsxwq %xmm0, %xmm0 292; X86-NEXT: vpsllq $5, %xmm0, %xmm1 293; X86-NEXT: vpsllq $11, %xmm0, %xmm0 294; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 295; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 296; X86-NEXT: vcvtdq2pd %xmm0, %xmm0 297; X86-NEXT: retl 298; 299; X64-AVX1-LABEL: signbits_sext_shl_sitofp: 300; X64-AVX1: # %bb.0: 301; X64-AVX1-NEXT: vpmovsxwq %xmm0, %xmm0 302; X64-AVX1-NEXT: vpsllq $5, %xmm0, %xmm1 303; X64-AVX1-NEXT: vpsllq $11, %xmm0, %xmm0 304; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 305; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 306; X64-AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 307; X64-AVX1-NEXT: retq 308; 309; X64-AVX2-LABEL: signbits_sext_shl_sitofp: 310; X64-AVX2: # %bb.0: 311; X64-AVX2-NEXT: vpmovsxwq %xmm0, %xmm0 312; X64-AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 313; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 314; X64-AVX2-NEXT: vcvtdq2pd %xmm0, %xmm0 315; X64-AVX2-NEXT: retq 316 %1 = sext <2 x i16> %a0 to <2 x i64> 317 %2 = shl <2 x i64> %1, <i64 11, i64 5> 318 %3 = sitofp <2 x i64> %2 to <2 x double> 319 ret <2 x double> %3 320} 321 322define <2 x double> @signbits_ashr_concat_ashr_extract_sitofp(<2 x i64> %a0, <4 x i64> %a1) nounwind { 323; CHECK-LABEL: signbits_ashr_concat_ashr_extract_sitofp: 324; CHECK: # %bb.0: 325; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,2,3] 326; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0 327; CHECK-NEXT: ret{{[l|q]}} 328 %1 = ashr <2 x i64> %a0, <i64 16, i64 16> 329 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 330 %3 = shufflevector <4 x i64> %a1, <4 x i64> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 331 %4 = ashr <4 x i64> %3, <i64 16, i64 16, i64 16, i64 16> 332 %5 = shufflevector <4 x i64> %4, <4 x i64> undef, <2 x i32> <i32 2, i32 3> 333 %6 = sitofp <2 x i64> %5 to <2 x double> 334 ret <2 x double> %6 335} 336 337define float @signbits_ashr_sext_sextinreg_and_extract_sitofp(<2 x i64> %a0, <2 x i64> %a1, i32 %a2) nounwind { 338; X86-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp: 339; X86: # %bb.0: 340; X86-NEXT: pushl %eax 341; X86-NEXT: vpsrad $29, %xmm0, %xmm0 342; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 343; X86-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 344; X86-NEXT: vpand %xmm0, %xmm1, %xmm0 345; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 346; X86-NEXT: vmovss %xmm0, (%esp) 347; X86-NEXT: flds (%esp) 348; X86-NEXT: popl %eax 349; X86-NEXT: retl 350; 351; X64-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp: 352; X64: # %bb.0: 353; X64-NEXT: vpsrad $29, %xmm0, %xmm0 354; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 355; X64-NEXT: vmovd %edi, %xmm1 356; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 357; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 358; X64-NEXT: retq 359 %1 = ashr <2 x i64> %a0, <i64 61, i64 60> 360 %2 = sext i32 %a2 to i64 361 %3 = insertelement <2 x i64> %a1, i64 %2, i32 0 362 %4 = shl <2 x i64> %3, <i64 20, i64 20> 363 %5 = ashr <2 x i64> %4, <i64 20, i64 20> 364 %6 = and <2 x i64> %1, %5 365 %7 = extractelement <2 x i64> %6, i32 0 366 %8 = sitofp i64 %7 to float 367 ret float %8 368} 369 370define float @signbits_ashr_sextvecinreg_bitops_extract_sitofp(<2 x i64> %a0, <4 x i32> %a1) nounwind { 371; X86-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp: 372; X86: # %bb.0: 373; X86-NEXT: pushl %eax 374; X86-NEXT: vpsrlq $60, %xmm0, %xmm2 375; X86-NEXT: vpsrlq $61, %xmm0, %xmm0 376; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 377; X86-NEXT: vmovdqa {{.*#+}} xmm2 = [4,0,8,0] 378; X86-NEXT: vpxor %xmm2, %xmm0, %xmm0 379; X86-NEXT: vpsubq %xmm2, %xmm0, %xmm0 380; X86-NEXT: vpand %xmm1, %xmm0, %xmm2 381; X86-NEXT: vpor %xmm1, %xmm2, %xmm1 382; X86-NEXT: vpxor %xmm0, %xmm1, %xmm0 383; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 384; X86-NEXT: vmovss %xmm0, (%esp) 385; X86-NEXT: flds (%esp) 386; X86-NEXT: popl %eax 387; X86-NEXT: retl 388; 389; X64-AVX1-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp: 390; X64-AVX1: # %bb.0: 391; X64-AVX1-NEXT: vpsrlq $60, %xmm0, %xmm2 392; X64-AVX1-NEXT: vpsrlq $61, %xmm0, %xmm0 393; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 394; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,8] 395; X64-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 396; X64-AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0 397; X64-AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 398; X64-AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1 399; X64-AVX1-NEXT: vpxor %xmm0, %xmm1, %xmm0 400; X64-AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 401; X64-AVX1-NEXT: retq 402; 403; X64-AVX2-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp: 404; X64-AVX2: # %bb.0: 405; X64-AVX2-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 406; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,8] 407; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 408; X64-AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0 409; X64-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 410; X64-AVX2-NEXT: vpor %xmm1, %xmm2, %xmm1 411; X64-AVX2-NEXT: vpxor %xmm0, %xmm1, %xmm0 412; X64-AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0 413; X64-AVX2-NEXT: retq 414 %1 = ashr <2 x i64> %a0, <i64 61, i64 60> 415 %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 416 %3 = sext <2 x i32> %2 to <2 x i64> 417 %4 = and <2 x i64> %1, %3 418 %5 = or <2 x i64> %4, %3 419 %6 = xor <2 x i64> %5, %1 420 %7 = extractelement <2 x i64> %6, i32 0 421 %8 = sitofp i64 %7 to float 422 ret float %8 423} 424 425define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2, <4 x i32> %a3) nounwind { 426; X86-LABEL: signbits_ashr_sext_select_shuffle_sitofp: 427; X86: # %bb.0: 428; X86-NEXT: pushl %ebp 429; X86-NEXT: movl %esp, %ebp 430; X86-NEXT: andl $-16, %esp 431; X86-NEXT: subl $16, %esp 432; X86-NEXT: vpmovsxdq 8(%ebp), %xmm3 433; X86-NEXT: vpmovsxdq 16(%ebp), %xmm4 434; X86-NEXT: vpsrad $31, %xmm2, %xmm5 435; X86-NEXT: vpsrad $1, %xmm2, %xmm6 436; X86-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 437; X86-NEXT: vpblendw {{.*#+}} xmm5 = xmm6[0,1],xmm5[2,3],xmm6[4,5],xmm5[6,7] 438; X86-NEXT: vextractf128 $1, %ymm2, %xmm2 439; X86-NEXT: vpsrad $31, %xmm2, %xmm6 440; X86-NEXT: vpsrad $1, %xmm2, %xmm2 441; X86-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 442; X86-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3],xmm2[4,5],xmm6[6,7] 443; X86-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm6 444; X86-NEXT: vblendvpd %xmm6, %xmm5, %xmm3, %xmm3 445; X86-NEXT: vextractf128 $1, %ymm1, %xmm1 446; X86-NEXT: vextractf128 $1, %ymm0, %xmm0 447; X86-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 448; X86-NEXT: vblendvpd %xmm0, %xmm2, %xmm4, %xmm0 449; X86-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 450; X86-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 451; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 452; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 453; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 454; X86-NEXT: movl %ebp, %esp 455; X86-NEXT: popl %ebp 456; X86-NEXT: vzeroupper 457; X86-NEXT: retl 458; 459; X64-AVX1-LABEL: signbits_ashr_sext_select_shuffle_sitofp: 460; X64-AVX1: # %bb.0: 461; X64-AVX1-NEXT: vpsrad $31, %xmm2, %xmm4 462; X64-AVX1-NEXT: vpsrad $1, %xmm2, %xmm5 463; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 464; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1],xmm4[2,3],xmm5[4,5],xmm4[6,7] 465; X64-AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 466; X64-AVX1-NEXT: vpsrad $31, %xmm2, %xmm5 467; X64-AVX1-NEXT: vpsrad $1, %xmm2, %xmm2 468; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 469; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5],xmm5[6,7] 470; X64-AVX1-NEXT: vpmovsxdq %xmm3, %xmm5 471; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3] 472; X64-AVX1-NEXT: vpmovsxdq %xmm3, %xmm3 473; X64-AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm6 474; X64-AVX1-NEXT: vblendvpd %xmm6, %xmm4, %xmm5, %xmm4 475; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 476; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 477; X64-AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 478; X64-AVX1-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0 479; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0 480; X64-AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 481; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 482; X64-AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 483; X64-AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 484; X64-AVX1-NEXT: vzeroupper 485; X64-AVX1-NEXT: retq 486; 487; X64-AVX2-LABEL: signbits_ashr_sext_select_shuffle_sitofp: 488; X64-AVX2: # %bb.0: 489; X64-AVX2-NEXT: vpsrad $31, %ymm2, %ymm4 490; X64-AVX2-NEXT: vpsrad $1, %ymm2, %ymm2 491; X64-AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[1,1,3,3,5,5,7,7] 492; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],ymm4[1],ymm2[2],ymm4[3],ymm2[4],ymm4[5],ymm2[6],ymm4[7] 493; X64-AVX2-NEXT: vpmovsxdq %xmm3, %ymm3 494; X64-AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 495; X64-AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm3, %ymm0 496; X64-AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] 497; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 498; X64-AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 499; X64-AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0 500; X64-AVX2-NEXT: vzeroupper 501; X64-AVX2-NEXT: retq 502 %1 = ashr <4 x i64> %a2, <i64 33, i64 63, i64 33, i64 63> 503 %2 = sext <4 x i32> %a3 to <4 x i64> 504 %3 = icmp eq <4 x i64> %a0, %a1 505 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2 506 %5 = shufflevector <4 x i64> %4, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 507 %6 = sitofp <4 x i64> %5 to <4 x float> 508 ret <4 x float> %6 509} 510 511define <4 x i32> @signbits_mask_ashr_smax(<4 x i32> %a0, <4 x i32> %a1) { 512; X86-LABEL: signbits_mask_ashr_smax: 513; X86: # %bb.0: 514; X86-NEXT: vpsrad $25, %xmm0, %xmm0 515; X86-NEXT: vpsrad $25, %xmm1, %xmm1 516; X86-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 517; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 518; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 519; X86-NEXT: retl 520; 521; X64-AVX1-LABEL: signbits_mask_ashr_smax: 522; X64-AVX1: # %bb.0: 523; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0 524; X64-AVX1-NEXT: vpsrad $25, %xmm1, %xmm1 525; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 526; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 527; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 528; X64-AVX1-NEXT: retq 529; 530; X64-AVX2-LABEL: signbits_mask_ashr_smax: 531; X64-AVX2: # %bb.0: 532; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [25,26,27,0] 533; X64-AVX2-NEXT: vpsravd %xmm2, %xmm0, %xmm0 534; X64-AVX2-NEXT: vpsravd %xmm2, %xmm1, %xmm1 535; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 536; X64-AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 537; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 538; X64-AVX2-NEXT: retq 539 %1 = ashr <4 x i32> %a0, <i32 25, i32 26, i32 27, i32 0> 540 %2 = ashr <4 x i32> %a1, <i32 25, i32 26, i32 27, i32 0> 541 %3 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> %2) 542 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer 543 %5 = ashr <4 x i32> %4, <i32 1, i32 2, i32 3, i32 4> 544 %6 = and <4 x i32> %5, <i32 -32768, i32 -65536, i32 -32768, i32 -65536> 545 ret <4 x i32> %6 546} 547declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 548 549define <4 x i32> @signbits_mask_ashr_smin(<4 x i32> %a0, <4 x i32> %a1) { 550; X86-LABEL: signbits_mask_ashr_smin: 551; X86: # %bb.0: 552; X86-NEXT: vpsrad $25, %xmm0, %xmm0 553; X86-NEXT: vpsrad $25, %xmm1, %xmm1 554; X86-NEXT: vpminsd %xmm1, %xmm0, %xmm0 555; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 556; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 557; X86-NEXT: retl 558; 559; X64-AVX1-LABEL: signbits_mask_ashr_smin: 560; X64-AVX1: # %bb.0: 561; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0 562; X64-AVX1-NEXT: vpsrad $25, %xmm1, %xmm1 563; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 564; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 565; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 566; X64-AVX1-NEXT: retq 567; 568; X64-AVX2-LABEL: signbits_mask_ashr_smin: 569; X64-AVX2: # %bb.0: 570; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [25,26,27,0] 571; X64-AVX2-NEXT: vpsravd %xmm2, %xmm0, %xmm0 572; X64-AVX2-NEXT: vpsravd %xmm2, %xmm1, %xmm1 573; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 574; X64-AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 575; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 576; X64-AVX2-NEXT: retq 577 %1 = ashr <4 x i32> %a0, <i32 25, i32 26, i32 27, i32 0> 578 %2 = ashr <4 x i32> %a1, <i32 25, i32 26, i32 27, i32 0> 579 %3 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %1, <4 x i32> %2) 580 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer 581 %5 = ashr <4 x i32> %4, <i32 1, i32 2, i32 3, i32 4> 582 %6 = and <4 x i32> %5, <i32 -32768, i32 -65536, i32 -32768, i32 -65536> 583 ret <4 x i32> %6 584} 585declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 586 587define <4 x i32> @signbits_mask_ashr_umax(<4 x i32> %a0, <4 x i32> %a1) { 588; X86-LABEL: signbits_mask_ashr_umax: 589; X86: # %bb.0: 590; X86-NEXT: vpsrad $25, %xmm0, %xmm0 591; X86-NEXT: vpsrad $25, %xmm1, %xmm1 592; X86-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 593; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 594; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 595; X86-NEXT: retl 596; 597; X64-AVX1-LABEL: signbits_mask_ashr_umax: 598; X64-AVX1: # %bb.0: 599; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0 600; X64-AVX1-NEXT: vpsrad $25, %xmm1, %xmm1 601; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 602; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 603; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 604; X64-AVX1-NEXT: retq 605; 606; X64-AVX2-LABEL: signbits_mask_ashr_umax: 607; X64-AVX2: # %bb.0: 608; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [25,26,27,0] 609; X64-AVX2-NEXT: vpsravd %xmm2, %xmm0, %xmm0 610; X64-AVX2-NEXT: vpsravd %xmm2, %xmm1, %xmm1 611; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 612; X64-AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 613; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 614; X64-AVX2-NEXT: retq 615 %1 = ashr <4 x i32> %a0, <i32 25, i32 26, i32 27, i32 0> 616 %2 = ashr <4 x i32> %a1, <i32 25, i32 26, i32 27, i32 0> 617 %3 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %1, <4 x i32> %2) 618 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer 619 %5 = ashr <4 x i32> %4, <i32 1, i32 2, i32 3, i32 4> 620 %6 = and <4 x i32> %5, <i32 -32768, i32 -65536, i32 -32768, i32 -65536> 621 ret <4 x i32> %6 622} 623declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 624 625define <4 x i32> @signbits_mask_ashr_umin(<4 x i32> %a0, <4 x i32> %a1) { 626; X86-LABEL: signbits_mask_ashr_umin: 627; X86: # %bb.0: 628; X86-NEXT: vpsrad $25, %xmm0, %xmm0 629; X86-NEXT: vpsrad $25, %xmm1, %xmm1 630; X86-NEXT: vpminud %xmm1, %xmm0, %xmm0 631; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 632; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 633; X86-NEXT: retl 634; 635; X64-AVX1-LABEL: signbits_mask_ashr_umin: 636; X64-AVX1: # %bb.0: 637; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0 638; X64-AVX1-NEXT: vpsrad $25, %xmm1, %xmm1 639; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 640; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 641; X64-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 642; X64-AVX1-NEXT: retq 643; 644; X64-AVX2-LABEL: signbits_mask_ashr_umin: 645; X64-AVX2: # %bb.0: 646; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [25,26,27,0] 647; X64-AVX2-NEXT: vpsravd %xmm2, %xmm0, %xmm0 648; X64-AVX2-NEXT: vpsravd %xmm2, %xmm1, %xmm1 649; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 650; X64-AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 651; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 652; X64-AVX2-NEXT: retq 653 %1 = ashr <4 x i32> %a0, <i32 25, i32 26, i32 27, i32 0> 654 %2 = ashr <4 x i32> %a1, <i32 25, i32 26, i32 27, i32 0> 655 %3 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %1, <4 x i32> %2) 656 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer 657 %5 = ashr <4 x i32> %4, <i32 1, i32 2, i32 3, i32 4> 658 %6 = and <4 x i32> %5, <i32 -32768, i32 -65536, i32 -32768, i32 -65536> 659 ret <4 x i32> %6 660} 661declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 662 663define i32 @signbits_cmpss(float %0, float %1) { 664; X86-LABEL: signbits_cmpss: 665; X86: # %bb.0: 666; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 667; X86-NEXT: vcmpeqss {{[0-9]+}}(%esp), %xmm0, %xmm0 668; X86-NEXT: vmovd %xmm0, %eax 669; X86-NEXT: retl 670; 671; X64-LABEL: signbits_cmpss: 672; X64: # %bb.0: 673; X64-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 674; X64-NEXT: vmovd %xmm0, %eax 675; X64-NEXT: retq 676 %3 = fcmp oeq float %0, %1 677 %4 = sext i1 %3 to i32 678 ret i32 %4 679} 680 681define i32 @signbits_cmpss_int(<4 x float> %0, <4 x float> %1) { 682; CHECK-LABEL: signbits_cmpss_int: 683; CHECK: # %bb.0: 684; CHECK-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 685; CHECK-NEXT: vextractps $0, %xmm0, %eax 686; CHECK-NEXT: ret{{[l|q]}} 687 %3 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %0, <4 x float> %1, i8 0) 688 %4 = bitcast <4 x float> %3 to <4 x i32> 689 %5 = extractelement <4 x i32> %4, i32 0 690 %6 = ashr i32 %5, 31 691 ret i32 %6 692} 693declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8 immarg) 694 695define i64 @signbits_cmpsd(double %0, double %1) { 696; X86-LABEL: signbits_cmpsd: 697; X86: # %bb.0: 698; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 699; X86-NEXT: vcmpeqsd {{[0-9]+}}(%esp), %xmm0, %xmm0 700; X86-NEXT: vmovd %xmm0, %eax 701; X86-NEXT: andl $1, %eax 702; X86-NEXT: negl %eax 703; X86-NEXT: movl %eax, %edx 704; X86-NEXT: retl 705; 706; X64-LABEL: signbits_cmpsd: 707; X64: # %bb.0: 708; X64-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 709; X64-NEXT: vmovq %xmm0, %rax 710; X64-NEXT: retq 711 %3 = fcmp oeq double %0, %1 712 %4 = sext i1 %3 to i64 713 ret i64 %4 714} 715 716define i64 @signbits_cmpsd_int(<2 x double> %0, <2 x double> %1) { 717; X86-LABEL: signbits_cmpsd_int: 718; X86: # %bb.0: 719; X86-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 720; X86-NEXT: vextractps $1, %xmm0, %eax 721; X86-NEXT: movl %eax, %edx 722; X86-NEXT: retl 723; 724; X64-LABEL: signbits_cmpsd_int: 725; X64: # %bb.0: 726; X64-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 727; X64-NEXT: vmovq %xmm0, %rax 728; X64-NEXT: retq 729 %3 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %0, <2 x double> %1, i8 0) 730 %4 = bitcast <2 x double> %3 to <2 x i64> 731 %5 = extractelement <2 x i64> %4, i32 0 732 %6 = ashr i64 %5, 63 733 ret i64 %6 734} 735declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8 immarg) 736 737; Make sure we can preserve sign bit information into the second basic block 738; so we can avoid having to shift bit 0 into bit 7 for each element due to 739; v32i1->v32i8 promotion and the splitting of v32i8 into 2xv16i8. This requires 740; ComputeNumSignBits handling for insert_subvector. 741define void @cross_bb_signbits_insert_subvec(<32 x i8>* %ptr, <32 x i8> %x, <32 x i8> %z) { 742; X86-LABEL: cross_bb_signbits_insert_subvec: 743; X86: # %bb.0: 744; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 745; X86-NEXT: vextractf128 $1, %ymm0, %xmm2 746; X86-NEXT: vpxor %xmm3, %xmm3, %xmm3 747; X86-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2 748; X86-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 749; X86-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 750; X86-NEXT: vandnps %ymm1, %ymm0, %ymm1 751; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 752; X86-NEXT: vorps %ymm1, %ymm0, %ymm0 753; X86-NEXT: vmovaps %ymm0, (%eax) 754; X86-NEXT: vzeroupper 755; X86-NEXT: retl 756; 757; X64-AVX1-LABEL: cross_bb_signbits_insert_subvec: 758; X64-AVX1: # %bb.0: 759; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 760; X64-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 761; X64-AVX1-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2 762; X64-AVX1-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 763; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 764; X64-AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm1 765; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 766; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 767; X64-AVX1-NEXT: vmovaps %ymm0, (%rdi) 768; X64-AVX1-NEXT: vzeroupper 769; X64-AVX1-NEXT: retq 770; 771; X64-AVX2-LABEL: cross_bb_signbits_insert_subvec: 772; X64-AVX2: # %bb.0: 773; X64-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 774; X64-AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 775; X64-AVX2-NEXT: vpblendvb %ymm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 776; X64-AVX2-NEXT: vmovdqa %ymm0, (%rdi) 777; X64-AVX2-NEXT: vzeroupper 778; X64-AVX2-NEXT: retq 779 %a = icmp eq <32 x i8> %x, zeroinitializer 780 %b = icmp eq <32 x i8> %x, zeroinitializer 781 %c = and <32 x i1> %a, %b 782 br label %block 783 784block: 785 %d = select <32 x i1> %c, <32 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <32 x i8> %z 786 store <32 x i8> %d, <32 x i8>* %ptr, align 32 787 br label %exit 788 789exit: 790 ret void 791} 792 793