1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,X86-SSE,SSE2,X86-SSE2 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,X86-SSE,SSE41,X86-SSE41 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,X86-AVX,X86-AVX1 5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,X86-AVX,X86-AVX512 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,X64-SSE,SSE2,X64-SSE2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,X64-SSE,SSE41,X64-SSE41 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,X64-AVX,X64-AVX1 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,X64-AVX,X64-AVX512 10 11; Ensure that the backend no longer emits unnecessary vector insert 12; instructions immediately after SSE scalar fp instructions 13; like addss or mulss. 14 15define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) { 16; SSE-LABEL: test_add_ss: 17; SSE: # %bb.0: 18; SSE-NEXT: addss %xmm1, %xmm0 19; SSE-NEXT: ret{{[l|q]}} 20; 21; AVX-LABEL: test_add_ss: 22; AVX: # %bb.0: 23; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 24; AVX-NEXT: ret{{[l|q]}} 25 %1 = extractelement <4 x float> %b, i32 0 26 %2 = extractelement <4 x float> %a, i32 0 27 %add = fadd float %2, %1 28 %3 = insertelement <4 x float> %a, float %add, i32 0 29 ret <4 x float> %3 30} 31 32define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) { 33; SSE-LABEL: test_sub_ss: 34; SSE: # %bb.0: 35; SSE-NEXT: subss %xmm1, %xmm0 36; SSE-NEXT: ret{{[l|q]}} 37; 38; AVX-LABEL: test_sub_ss: 39; AVX: # %bb.0: 40; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 41; AVX-NEXT: ret{{[l|q]}} 42 %1 = extractelement <4 x float> %b, i32 0 43 %2 = extractelement <4 x float> %a, i32 0 44 %sub = fsub float %2, %1 45 %3 = insertelement <4 x float> %a, float %sub, i32 0 46 ret <4 x float> %3 47} 48 49define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) { 50; SSE-LABEL: test_mul_ss: 51; SSE: # %bb.0: 52; SSE-NEXT: mulss %xmm1, %xmm0 53; SSE-NEXT: ret{{[l|q]}} 54; 55; AVX-LABEL: test_mul_ss: 56; AVX: # %bb.0: 57; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 58; AVX-NEXT: ret{{[l|q]}} 59 %1 = extractelement <4 x float> %b, i32 0 60 %2 = extractelement <4 x float> %a, i32 0 61 %mul = fmul float %2, %1 62 %3 = insertelement <4 x float> %a, float %mul, i32 0 63 ret <4 x float> %3 64} 65 66define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) { 67; SSE-LABEL: test_div_ss: 68; SSE: # %bb.0: 69; SSE-NEXT: divss %xmm1, %xmm0 70; SSE-NEXT: ret{{[l|q]}} 71; 72; AVX-LABEL: test_div_ss: 73; AVX: # %bb.0: 74; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 75; AVX-NEXT: ret{{[l|q]}} 76 %1 = extractelement <4 x float> %b, i32 0 77 %2 = extractelement <4 x float> %a, i32 0 78 %div = fdiv float %2, %1 79 %3 = insertelement <4 x float> %a, float %div, i32 0 80 ret <4 x float> %3 81} 82 83define <4 x float> @test_sqrt_ss(<4 x float> %a) { 84; SSE-LABEL: test_sqrt_ss: 85; SSE: # %bb.0: 86; SSE-NEXT: sqrtss %xmm0, %xmm0 87; SSE-NEXT: ret{{[l|q]}} 88; 89; AVX-LABEL: test_sqrt_ss: 90; AVX: # %bb.0: 91; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 92; AVX-NEXT: ret{{[l|q]}} 93 %1 = extractelement <4 x float> %a, i32 0 94 %2 = call float @llvm.sqrt.f32(float %1) 95 %3 = insertelement <4 x float> %a, float %2, i32 0 96 ret <4 x float> %3 97} 98declare float @llvm.sqrt.f32(float) 99 100define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) { 101; SSE-LABEL: test_add_sd: 102; SSE: # %bb.0: 103; SSE-NEXT: addsd %xmm1, %xmm0 104; SSE-NEXT: ret{{[l|q]}} 105; 106; AVX-LABEL: test_add_sd: 107; AVX: # %bb.0: 108; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 109; AVX-NEXT: ret{{[l|q]}} 110 %1 = extractelement <2 x double> %b, i32 0 111 %2 = extractelement <2 x double> %a, i32 0 112 %add = fadd double %2, %1 113 %3 = insertelement <2 x double> %a, double %add, i32 0 114 ret <2 x double> %3 115} 116 117define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) { 118; SSE-LABEL: test_sub_sd: 119; SSE: # %bb.0: 120; SSE-NEXT: subsd %xmm1, %xmm0 121; SSE-NEXT: ret{{[l|q]}} 122; 123; AVX-LABEL: test_sub_sd: 124; AVX: # %bb.0: 125; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 126; AVX-NEXT: ret{{[l|q]}} 127 %1 = extractelement <2 x double> %b, i32 0 128 %2 = extractelement <2 x double> %a, i32 0 129 %sub = fsub double %2, %1 130 %3 = insertelement <2 x double> %a, double %sub, i32 0 131 ret <2 x double> %3 132} 133 134define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) { 135; SSE-LABEL: test_mul_sd: 136; SSE: # %bb.0: 137; SSE-NEXT: mulsd %xmm1, %xmm0 138; SSE-NEXT: ret{{[l|q]}} 139; 140; AVX-LABEL: test_mul_sd: 141; AVX: # %bb.0: 142; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 143; AVX-NEXT: ret{{[l|q]}} 144 %1 = extractelement <2 x double> %b, i32 0 145 %2 = extractelement <2 x double> %a, i32 0 146 %mul = fmul double %2, %1 147 %3 = insertelement <2 x double> %a, double %mul, i32 0 148 ret <2 x double> %3 149} 150 151define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) { 152; SSE-LABEL: test_div_sd: 153; SSE: # %bb.0: 154; SSE-NEXT: divsd %xmm1, %xmm0 155; SSE-NEXT: ret{{[l|q]}} 156; 157; AVX-LABEL: test_div_sd: 158; AVX: # %bb.0: 159; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 160; AVX-NEXT: ret{{[l|q]}} 161 %1 = extractelement <2 x double> %b, i32 0 162 %2 = extractelement <2 x double> %a, i32 0 163 %div = fdiv double %2, %1 164 %3 = insertelement <2 x double> %a, double %div, i32 0 165 ret <2 x double> %3 166} 167 168define <2 x double> @test_sqrt_sd(<2 x double> %a) { 169; SSE-LABEL: test_sqrt_sd: 170; SSE: # %bb.0: 171; SSE-NEXT: sqrtsd %xmm0, %xmm0 172; SSE-NEXT: ret{{[l|q]}} 173; 174; AVX-LABEL: test_sqrt_sd: 175; AVX: # %bb.0: 176; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 177; AVX-NEXT: ret{{[l|q]}} 178 %1 = extractelement <2 x double> %a, i32 0 179 %2 = call double @llvm.sqrt.f64(double %1) 180 %3 = insertelement <2 x double> %a, double %2, i32 0 181 ret <2 x double> %3 182} 183declare double @llvm.sqrt.f64(double) 184 185define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) { 186; SSE-LABEL: test2_add_ss: 187; SSE: # %bb.0: 188; SSE-NEXT: addss %xmm0, %xmm1 189; SSE-NEXT: movaps %xmm1, %xmm0 190; SSE-NEXT: ret{{[l|q]}} 191; 192; AVX-LABEL: test2_add_ss: 193; AVX: # %bb.0: 194; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 195; AVX-NEXT: ret{{[l|q]}} 196 %1 = extractelement <4 x float> %a, i32 0 197 %2 = extractelement <4 x float> %b, i32 0 198 %add = fadd float %1, %2 199 %3 = insertelement <4 x float> %b, float %add, i32 0 200 ret <4 x float> %3 201} 202 203define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) { 204; SSE-LABEL: test2_sub_ss: 205; SSE: # %bb.0: 206; SSE-NEXT: subss %xmm0, %xmm1 207; SSE-NEXT: movaps %xmm1, %xmm0 208; SSE-NEXT: ret{{[l|q]}} 209; 210; AVX-LABEL: test2_sub_ss: 211; AVX: # %bb.0: 212; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 213; AVX-NEXT: ret{{[l|q]}} 214 %1 = extractelement <4 x float> %a, i32 0 215 %2 = extractelement <4 x float> %b, i32 0 216 %sub = fsub float %2, %1 217 %3 = insertelement <4 x float> %b, float %sub, i32 0 218 ret <4 x float> %3 219} 220 221define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) { 222; SSE-LABEL: test2_mul_ss: 223; SSE: # %bb.0: 224; SSE-NEXT: mulss %xmm0, %xmm1 225; SSE-NEXT: movaps %xmm1, %xmm0 226; SSE-NEXT: ret{{[l|q]}} 227; 228; AVX-LABEL: test2_mul_ss: 229; AVX: # %bb.0: 230; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 231; AVX-NEXT: ret{{[l|q]}} 232 %1 = extractelement <4 x float> %a, i32 0 233 %2 = extractelement <4 x float> %b, i32 0 234 %mul = fmul float %1, %2 235 %3 = insertelement <4 x float> %b, float %mul, i32 0 236 ret <4 x float> %3 237} 238 239define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) { 240; SSE-LABEL: test2_div_ss: 241; SSE: # %bb.0: 242; SSE-NEXT: divss %xmm0, %xmm1 243; SSE-NEXT: movaps %xmm1, %xmm0 244; SSE-NEXT: ret{{[l|q]}} 245; 246; AVX-LABEL: test2_div_ss: 247; AVX: # %bb.0: 248; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 249; AVX-NEXT: ret{{[l|q]}} 250 %1 = extractelement <4 x float> %a, i32 0 251 %2 = extractelement <4 x float> %b, i32 0 252 %div = fdiv float %2, %1 253 %3 = insertelement <4 x float> %b, float %div, i32 0 254 ret <4 x float> %3 255} 256 257define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) { 258; SSE-LABEL: test2_add_sd: 259; SSE: # %bb.0: 260; SSE-NEXT: addsd %xmm0, %xmm1 261; SSE-NEXT: movapd %xmm1, %xmm0 262; SSE-NEXT: ret{{[l|q]}} 263; 264; AVX-LABEL: test2_add_sd: 265; AVX: # %bb.0: 266; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 267; AVX-NEXT: ret{{[l|q]}} 268 %1 = extractelement <2 x double> %a, i32 0 269 %2 = extractelement <2 x double> %b, i32 0 270 %add = fadd double %1, %2 271 %3 = insertelement <2 x double> %b, double %add, i32 0 272 ret <2 x double> %3 273} 274 275define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) { 276; SSE-LABEL: test2_sub_sd: 277; SSE: # %bb.0: 278; SSE-NEXT: subsd %xmm0, %xmm1 279; SSE-NEXT: movapd %xmm1, %xmm0 280; SSE-NEXT: ret{{[l|q]}} 281; 282; AVX-LABEL: test2_sub_sd: 283; AVX: # %bb.0: 284; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 285; AVX-NEXT: ret{{[l|q]}} 286 %1 = extractelement <2 x double> %a, i32 0 287 %2 = extractelement <2 x double> %b, i32 0 288 %sub = fsub double %2, %1 289 %3 = insertelement <2 x double> %b, double %sub, i32 0 290 ret <2 x double> %3 291} 292 293define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) { 294; SSE-LABEL: test2_mul_sd: 295; SSE: # %bb.0: 296; SSE-NEXT: mulsd %xmm0, %xmm1 297; SSE-NEXT: movapd %xmm1, %xmm0 298; SSE-NEXT: ret{{[l|q]}} 299; 300; AVX-LABEL: test2_mul_sd: 301; AVX: # %bb.0: 302; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 303; AVX-NEXT: ret{{[l|q]}} 304 %1 = extractelement <2 x double> %a, i32 0 305 %2 = extractelement <2 x double> %b, i32 0 306 %mul = fmul double %1, %2 307 %3 = insertelement <2 x double> %b, double %mul, i32 0 308 ret <2 x double> %3 309} 310 311define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) { 312; SSE-LABEL: test2_div_sd: 313; SSE: # %bb.0: 314; SSE-NEXT: divsd %xmm0, %xmm1 315; SSE-NEXT: movapd %xmm1, %xmm0 316; SSE-NEXT: ret{{[l|q]}} 317; 318; AVX-LABEL: test2_div_sd: 319; AVX: # %bb.0: 320; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 321; AVX-NEXT: ret{{[l|q]}} 322 %1 = extractelement <2 x double> %a, i32 0 323 %2 = extractelement <2 x double> %b, i32 0 324 %div = fdiv double %2, %1 325 %3 = insertelement <2 x double> %b, double %div, i32 0 326 ret <2 x double> %3 327} 328 329define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) { 330; SSE-LABEL: test_multiple_add_ss: 331; SSE: # %bb.0: 332; SSE-NEXT: addss %xmm0, %xmm1 333; SSE-NEXT: addss %xmm1, %xmm0 334; SSE-NEXT: ret{{[l|q]}} 335; 336; AVX-LABEL: test_multiple_add_ss: 337; AVX: # %bb.0: 338; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm1 339; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 340; AVX-NEXT: ret{{[l|q]}} 341 %1 = extractelement <4 x float> %b, i32 0 342 %2 = extractelement <4 x float> %a, i32 0 343 %add = fadd float %2, %1 344 %add2 = fadd float %2, %add 345 %3 = insertelement <4 x float> %a, float %add2, i32 0 346 ret <4 x float> %3 347} 348 349define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) { 350; SSE-LABEL: test_multiple_sub_ss: 351; SSE: # %bb.0: 352; SSE-NEXT: movaps %xmm0, %xmm2 353; SSE-NEXT: subss %xmm1, %xmm2 354; SSE-NEXT: subss %xmm2, %xmm0 355; SSE-NEXT: ret{{[l|q]}} 356; 357; AVX-LABEL: test_multiple_sub_ss: 358; AVX: # %bb.0: 359; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm1 360; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 361; AVX-NEXT: ret{{[l|q]}} 362 %1 = extractelement <4 x float> %b, i32 0 363 %2 = extractelement <4 x float> %a, i32 0 364 %sub = fsub float %2, %1 365 %sub2 = fsub float %2, %sub 366 %3 = insertelement <4 x float> %a, float %sub2, i32 0 367 ret <4 x float> %3 368} 369 370define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) { 371; SSE-LABEL: test_multiple_mul_ss: 372; SSE: # %bb.0: 373; SSE-NEXT: mulss %xmm0, %xmm1 374; SSE-NEXT: mulss %xmm1, %xmm0 375; SSE-NEXT: ret{{[l|q]}} 376; 377; AVX-LABEL: test_multiple_mul_ss: 378; AVX: # %bb.0: 379; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm1 380; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 381; AVX-NEXT: ret{{[l|q]}} 382 %1 = extractelement <4 x float> %b, i32 0 383 %2 = extractelement <4 x float> %a, i32 0 384 %mul = fmul float %2, %1 385 %mul2 = fmul float %2, %mul 386 %3 = insertelement <4 x float> %a, float %mul2, i32 0 387 ret <4 x float> %3 388} 389 390define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) { 391; SSE-LABEL: test_multiple_div_ss: 392; SSE: # %bb.0: 393; SSE-NEXT: movaps %xmm0, %xmm2 394; SSE-NEXT: divss %xmm1, %xmm2 395; SSE-NEXT: divss %xmm2, %xmm0 396; SSE-NEXT: ret{{[l|q]}} 397; 398; AVX-LABEL: test_multiple_div_ss: 399; AVX: # %bb.0: 400; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm1 401; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 402; AVX-NEXT: ret{{[l|q]}} 403 %1 = extractelement <4 x float> %b, i32 0 404 %2 = extractelement <4 x float> %a, i32 0 405 %div = fdiv float %2, %1 406 %div2 = fdiv float %2, %div 407 %3 = insertelement <4 x float> %a, float %div2, i32 0 408 ret <4 x float> %3 409} 410 411; With SSE4.1 or greater, the shuffles in the following tests may 412; be lowered to X86Blendi nodes. 413 414define <4 x float> @blend_add_ss(<4 x float> %a, float %b) { 415; X86-SSE-LABEL: blend_add_ss: 416; X86-SSE: # %bb.0: 417; X86-SSE-NEXT: addss {{[0-9]+}}(%esp), %xmm0 418; X86-SSE-NEXT: retl 419; 420; X86-AVX-LABEL: blend_add_ss: 421; X86-AVX: # %bb.0: 422; X86-AVX-NEXT: vaddss {{[0-9]+}}(%esp), %xmm0, %xmm0 423; X86-AVX-NEXT: retl 424; 425; X64-SSE-LABEL: blend_add_ss: 426; X64-SSE: # %bb.0: 427; X64-SSE-NEXT: addss %xmm1, %xmm0 428; X64-SSE-NEXT: retq 429; 430; X64-AVX-LABEL: blend_add_ss: 431; X64-AVX: # %bb.0: 432; X64-AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 433; X64-AVX-NEXT: retq 434 435 %ext = extractelement <4 x float> %a, i32 0 436 %op = fadd float %b, %ext 437 %ins = insertelement <4 x float> undef, float %op, i32 0 438 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 439 ret <4 x float> %shuf 440} 441 442define <4 x float> @blend_sub_ss(<4 x float> %a, float %b) { 443; X86-SSE-LABEL: blend_sub_ss: 444; X86-SSE: # %bb.0: 445; X86-SSE-NEXT: subss {{[0-9]+}}(%esp), %xmm0 446; X86-SSE-NEXT: retl 447; 448; X86-AVX-LABEL: blend_sub_ss: 449; X86-AVX: # %bb.0: 450; X86-AVX-NEXT: vsubss {{[0-9]+}}(%esp), %xmm0, %xmm0 451; X86-AVX-NEXT: retl 452; 453; X64-SSE-LABEL: blend_sub_ss: 454; X64-SSE: # %bb.0: 455; X64-SSE-NEXT: subss %xmm1, %xmm0 456; X64-SSE-NEXT: retq 457; 458; X64-AVX-LABEL: blend_sub_ss: 459; X64-AVX: # %bb.0: 460; X64-AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 461; X64-AVX-NEXT: retq 462 463 %ext = extractelement <4 x float> %a, i32 0 464 %op = fsub float %ext, %b 465 %ins = insertelement <4 x float> undef, float %op, i32 0 466 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 467 ret <4 x float> %shuf 468} 469 470define <4 x float> @blend_mul_ss(<4 x float> %a, float %b) { 471; X86-SSE-LABEL: blend_mul_ss: 472; X86-SSE: # %bb.0: 473; X86-SSE-NEXT: mulss {{[0-9]+}}(%esp), %xmm0 474; X86-SSE-NEXT: retl 475; 476; X86-AVX-LABEL: blend_mul_ss: 477; X86-AVX: # %bb.0: 478; X86-AVX-NEXT: vmulss {{[0-9]+}}(%esp), %xmm0, %xmm0 479; X86-AVX-NEXT: retl 480; 481; X64-SSE-LABEL: blend_mul_ss: 482; X64-SSE: # %bb.0: 483; X64-SSE-NEXT: mulss %xmm1, %xmm0 484; X64-SSE-NEXT: retq 485; 486; X64-AVX-LABEL: blend_mul_ss: 487; X64-AVX: # %bb.0: 488; X64-AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 489; X64-AVX-NEXT: retq 490 491 %ext = extractelement <4 x float> %a, i32 0 492 %op = fmul float %b, %ext 493 %ins = insertelement <4 x float> undef, float %op, i32 0 494 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 495 ret <4 x float> %shuf 496} 497 498define <4 x float> @blend_div_ss(<4 x float> %a, float %b) { 499; X86-SSE-LABEL: blend_div_ss: 500; X86-SSE: # %bb.0: 501; X86-SSE-NEXT: divss {{[0-9]+}}(%esp), %xmm0 502; X86-SSE-NEXT: retl 503; 504; X86-AVX-LABEL: blend_div_ss: 505; X86-AVX: # %bb.0: 506; X86-AVX-NEXT: vdivss {{[0-9]+}}(%esp), %xmm0, %xmm0 507; X86-AVX-NEXT: retl 508; 509; X64-SSE-LABEL: blend_div_ss: 510; X64-SSE: # %bb.0: 511; X64-SSE-NEXT: divss %xmm1, %xmm0 512; X64-SSE-NEXT: retq 513; 514; X64-AVX-LABEL: blend_div_ss: 515; X64-AVX: # %bb.0: 516; X64-AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 517; X64-AVX-NEXT: retq 518 519 %ext = extractelement <4 x float> %a, i32 0 520 %op = fdiv float %ext, %b 521 %ins = insertelement <4 x float> undef, float %op, i32 0 522 %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 523 ret <4 x float> %shuf 524} 525 526define <2 x double> @blend_add_sd(<2 x double> %a, double %b) { 527; X86-SSE-LABEL: blend_add_sd: 528; X86-SSE: # %bb.0: 529; X86-SSE-NEXT: addsd {{[0-9]+}}(%esp), %xmm0 530; X86-SSE-NEXT: retl 531; 532; X86-AVX-LABEL: blend_add_sd: 533; X86-AVX: # %bb.0: 534; X86-AVX-NEXT: vaddsd {{[0-9]+}}(%esp), %xmm0, %xmm0 535; X86-AVX-NEXT: retl 536; 537; X64-SSE-LABEL: blend_add_sd: 538; X64-SSE: # %bb.0: 539; X64-SSE-NEXT: addsd %xmm1, %xmm0 540; X64-SSE-NEXT: retq 541; 542; X64-AVX-LABEL: blend_add_sd: 543; X64-AVX: # %bb.0: 544; X64-AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 545; X64-AVX-NEXT: retq 546 547 %ext = extractelement <2 x double> %a, i32 0 548 %op = fadd double %b, %ext 549 %ins = insertelement <2 x double> undef, double %op, i32 0 550 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 551 ret <2 x double> %shuf 552} 553 554define <2 x double> @blend_sub_sd(<2 x double> %a, double %b) { 555; X86-SSE-LABEL: blend_sub_sd: 556; X86-SSE: # %bb.0: 557; X86-SSE-NEXT: subsd {{[0-9]+}}(%esp), %xmm0 558; X86-SSE-NEXT: retl 559; 560; X86-AVX-LABEL: blend_sub_sd: 561; X86-AVX: # %bb.0: 562; X86-AVX-NEXT: vsubsd {{[0-9]+}}(%esp), %xmm0, %xmm0 563; X86-AVX-NEXT: retl 564; 565; X64-SSE-LABEL: blend_sub_sd: 566; X64-SSE: # %bb.0: 567; X64-SSE-NEXT: subsd %xmm1, %xmm0 568; X64-SSE-NEXT: retq 569; 570; X64-AVX-LABEL: blend_sub_sd: 571; X64-AVX: # %bb.0: 572; X64-AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 573; X64-AVX-NEXT: retq 574 575 %ext = extractelement <2 x double> %a, i32 0 576 %op = fsub double %ext, %b 577 %ins = insertelement <2 x double> undef, double %op, i32 0 578 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 579 ret <2 x double> %shuf 580} 581 582define <2 x double> @blend_mul_sd(<2 x double> %a, double %b) { 583; X86-SSE-LABEL: blend_mul_sd: 584; X86-SSE: # %bb.0: 585; X86-SSE-NEXT: mulsd {{[0-9]+}}(%esp), %xmm0 586; X86-SSE-NEXT: retl 587; 588; X86-AVX-LABEL: blend_mul_sd: 589; X86-AVX: # %bb.0: 590; X86-AVX-NEXT: vmulsd {{[0-9]+}}(%esp), %xmm0, %xmm0 591; X86-AVX-NEXT: retl 592; 593; X64-SSE-LABEL: blend_mul_sd: 594; X64-SSE: # %bb.0: 595; X64-SSE-NEXT: mulsd %xmm1, %xmm0 596; X64-SSE-NEXT: retq 597; 598; X64-AVX-LABEL: blend_mul_sd: 599; X64-AVX: # %bb.0: 600; X64-AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 601; X64-AVX-NEXT: retq 602 603 %ext = extractelement <2 x double> %a, i32 0 604 %op = fmul double %b, %ext 605 %ins = insertelement <2 x double> undef, double %op, i32 0 606 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 607 ret <2 x double> %shuf 608} 609 610define <2 x double> @blend_div_sd(<2 x double> %a, double %b) { 611; X86-SSE-LABEL: blend_div_sd: 612; X86-SSE: # %bb.0: 613; X86-SSE-NEXT: divsd {{[0-9]+}}(%esp), %xmm0 614; X86-SSE-NEXT: retl 615; 616; X86-AVX-LABEL: blend_div_sd: 617; X86-AVX: # %bb.0: 618; X86-AVX-NEXT: vdivsd {{[0-9]+}}(%esp), %xmm0, %xmm0 619; X86-AVX-NEXT: retl 620; 621; X64-SSE-LABEL: blend_div_sd: 622; X64-SSE: # %bb.0: 623; X64-SSE-NEXT: divsd %xmm1, %xmm0 624; X64-SSE-NEXT: retq 625; 626; X64-AVX-LABEL: blend_div_sd: 627; X64-AVX: # %bb.0: 628; X64-AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 629; X64-AVX-NEXT: retq 630 631 %ext = extractelement <2 x double> %a, i32 0 632 %op = fdiv double %ext, %b 633 %ins = insertelement <2 x double> undef, double %op, i32 0 634 %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3> 635 ret <2 x double> %shuf 636} 637 638; Ensure that the backend selects SSE/AVX scalar fp instructions 639; from a packed fp instruction plus a vector insert. 640 641define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) { 642; SSE-LABEL: insert_test_add_ss: 643; SSE: # %bb.0: 644; SSE-NEXT: addss %xmm1, %xmm0 645; SSE-NEXT: ret{{[l|q]}} 646; 647; AVX-LABEL: insert_test_add_ss: 648; AVX: # %bb.0: 649; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 650; AVX-NEXT: ret{{[l|q]}} 651 %1 = fadd <4 x float> %a, %b 652 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 653 ret <4 x float> %2 654} 655 656define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) { 657; SSE-LABEL: insert_test_sub_ss: 658; SSE: # %bb.0: 659; SSE-NEXT: subss %xmm1, %xmm0 660; SSE-NEXT: ret{{[l|q]}} 661; 662; AVX-LABEL: insert_test_sub_ss: 663; AVX: # %bb.0: 664; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 665; AVX-NEXT: ret{{[l|q]}} 666 %1 = fsub <4 x float> %a, %b 667 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 668 ret <4 x float> %2 669} 670 671define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) { 672; SSE-LABEL: insert_test_mul_ss: 673; SSE: # %bb.0: 674; SSE-NEXT: mulss %xmm1, %xmm0 675; SSE-NEXT: ret{{[l|q]}} 676; 677; AVX-LABEL: insert_test_mul_ss: 678; AVX: # %bb.0: 679; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 680; AVX-NEXT: ret{{[l|q]}} 681 %1 = fmul <4 x float> %a, %b 682 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 683 ret <4 x float> %2 684} 685 686define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) { 687; SSE-LABEL: insert_test_div_ss: 688; SSE: # %bb.0: 689; SSE-NEXT: divss %xmm1, %xmm0 690; SSE-NEXT: ret{{[l|q]}} 691; 692; AVX-LABEL: insert_test_div_ss: 693; AVX: # %bb.0: 694; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 695; AVX-NEXT: ret{{[l|q]}} 696 %1 = fdiv <4 x float> %a, %b 697 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 698 ret <4 x float> %2 699} 700 701define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) { 702; SSE-LABEL: insert_test_add_sd: 703; SSE: # %bb.0: 704; SSE-NEXT: addsd %xmm1, %xmm0 705; SSE-NEXT: ret{{[l|q]}} 706; 707; AVX-LABEL: insert_test_add_sd: 708; AVX: # %bb.0: 709; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 710; AVX-NEXT: ret{{[l|q]}} 711 %1 = fadd <2 x double> %a, %b 712 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 713 ret <2 x double> %2 714} 715 716define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) { 717; SSE-LABEL: insert_test_sub_sd: 718; SSE: # %bb.0: 719; SSE-NEXT: subsd %xmm1, %xmm0 720; SSE-NEXT: ret{{[l|q]}} 721; 722; AVX-LABEL: insert_test_sub_sd: 723; AVX: # %bb.0: 724; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 725; AVX-NEXT: ret{{[l|q]}} 726 %1 = fsub <2 x double> %a, %b 727 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 728 ret <2 x double> %2 729} 730 731define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) { 732; SSE-LABEL: insert_test_mul_sd: 733; SSE: # %bb.0: 734; SSE-NEXT: mulsd %xmm1, %xmm0 735; SSE-NEXT: ret{{[l|q]}} 736; 737; AVX-LABEL: insert_test_mul_sd: 738; AVX: # %bb.0: 739; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 740; AVX-NEXT: ret{{[l|q]}} 741 %1 = fmul <2 x double> %a, %b 742 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 743 ret <2 x double> %2 744} 745 746define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) { 747; SSE-LABEL: insert_test_div_sd: 748; SSE: # %bb.0: 749; SSE-NEXT: divsd %xmm1, %xmm0 750; SSE-NEXT: ret{{[l|q]}} 751; 752; AVX-LABEL: insert_test_div_sd: 753; AVX: # %bb.0: 754; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 755; AVX-NEXT: ret{{[l|q]}} 756 %1 = fdiv <2 x double> %a, %b 757 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 758 ret <2 x double> %2 759} 760 761define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) { 762; SSE-LABEL: insert_test2_add_ss: 763; SSE: # %bb.0: 764; SSE-NEXT: addss %xmm0, %xmm1 765; SSE-NEXT: movaps %xmm1, %xmm0 766; SSE-NEXT: ret{{[l|q]}} 767; 768; AVX-LABEL: insert_test2_add_ss: 769; AVX: # %bb.0: 770; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 771; AVX-NEXT: ret{{[l|q]}} 772 %1 = fadd <4 x float> %b, %a 773 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 774 ret <4 x float> %2 775} 776 777define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) { 778; SSE-LABEL: insert_test2_sub_ss: 779; SSE: # %bb.0: 780; SSE-NEXT: subss %xmm0, %xmm1 781; SSE-NEXT: movaps %xmm1, %xmm0 782; SSE-NEXT: ret{{[l|q]}} 783; 784; AVX-LABEL: insert_test2_sub_ss: 785; AVX: # %bb.0: 786; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 787; AVX-NEXT: ret{{[l|q]}} 788 %1 = fsub <4 x float> %b, %a 789 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 790 ret <4 x float> %2 791} 792 793define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) { 794; SSE-LABEL: insert_test2_mul_ss: 795; SSE: # %bb.0: 796; SSE-NEXT: mulss %xmm0, %xmm1 797; SSE-NEXT: movaps %xmm1, %xmm0 798; SSE-NEXT: ret{{[l|q]}} 799; 800; AVX-LABEL: insert_test2_mul_ss: 801; AVX: # %bb.0: 802; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 803; AVX-NEXT: ret{{[l|q]}} 804 %1 = fmul <4 x float> %b, %a 805 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 806 ret <4 x float> %2 807} 808 809define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) { 810; SSE-LABEL: insert_test2_div_ss: 811; SSE: # %bb.0: 812; SSE-NEXT: divss %xmm0, %xmm1 813; SSE-NEXT: movaps %xmm1, %xmm0 814; SSE-NEXT: ret{{[l|q]}} 815; 816; AVX-LABEL: insert_test2_div_ss: 817; AVX: # %bb.0: 818; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 819; AVX-NEXT: ret{{[l|q]}} 820 %1 = fdiv <4 x float> %b, %a 821 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 822 ret <4 x float> %2 823} 824 825define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) { 826; SSE-LABEL: insert_test2_add_sd: 827; SSE: # %bb.0: 828; SSE-NEXT: addsd %xmm0, %xmm1 829; SSE-NEXT: movapd %xmm1, %xmm0 830; SSE-NEXT: ret{{[l|q]}} 831; 832; AVX-LABEL: insert_test2_add_sd: 833; AVX: # %bb.0: 834; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 835; AVX-NEXT: ret{{[l|q]}} 836 %1 = fadd <2 x double> %b, %a 837 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 838 ret <2 x double> %2 839} 840 841define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) { 842; SSE-LABEL: insert_test2_sub_sd: 843; SSE: # %bb.0: 844; SSE-NEXT: subsd %xmm0, %xmm1 845; SSE-NEXT: movapd %xmm1, %xmm0 846; SSE-NEXT: ret{{[l|q]}} 847; 848; AVX-LABEL: insert_test2_sub_sd: 849; AVX: # %bb.0: 850; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 851; AVX-NEXT: ret{{[l|q]}} 852 %1 = fsub <2 x double> %b, %a 853 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 854 ret <2 x double> %2 855} 856 857define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) { 858; SSE-LABEL: insert_test2_mul_sd: 859; SSE: # %bb.0: 860; SSE-NEXT: mulsd %xmm0, %xmm1 861; SSE-NEXT: movapd %xmm1, %xmm0 862; SSE-NEXT: ret{{[l|q]}} 863; 864; AVX-LABEL: insert_test2_mul_sd: 865; AVX: # %bb.0: 866; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 867; AVX-NEXT: ret{{[l|q]}} 868 %1 = fmul <2 x double> %b, %a 869 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 870 ret <2 x double> %2 871} 872 873define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) { 874; SSE-LABEL: insert_test2_div_sd: 875; SSE: # %bb.0: 876; SSE-NEXT: divsd %xmm0, %xmm1 877; SSE-NEXT: movapd %xmm1, %xmm0 878; SSE-NEXT: ret{{[l|q]}} 879; 880; AVX-LABEL: insert_test2_div_sd: 881; AVX: # %bb.0: 882; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 883; AVX-NEXT: ret{{[l|q]}} 884 %1 = fdiv <2 x double> %b, %a 885 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 886 ret <2 x double> %2 887} 888 889define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) { 890; SSE-LABEL: insert_test3_add_ss: 891; SSE: # %bb.0: 892; SSE-NEXT: addss %xmm1, %xmm0 893; SSE-NEXT: ret{{[l|q]}} 894; 895; AVX-LABEL: insert_test3_add_ss: 896; AVX: # %bb.0: 897; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 898; AVX-NEXT: ret{{[l|q]}} 899 %1 = fadd <4 x float> %a, %b 900 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 901 ret <4 x float> %2 902} 903 904define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) { 905; SSE-LABEL: insert_test3_sub_ss: 906; SSE: # %bb.0: 907; SSE-NEXT: subss %xmm1, %xmm0 908; SSE-NEXT: ret{{[l|q]}} 909; 910; AVX-LABEL: insert_test3_sub_ss: 911; AVX: # %bb.0: 912; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 913; AVX-NEXT: ret{{[l|q]}} 914 %1 = fsub <4 x float> %a, %b 915 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 916 ret <4 x float> %2 917} 918 919define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) { 920; SSE-LABEL: insert_test3_mul_ss: 921; SSE: # %bb.0: 922; SSE-NEXT: mulss %xmm1, %xmm0 923; SSE-NEXT: ret{{[l|q]}} 924; 925; AVX-LABEL: insert_test3_mul_ss: 926; AVX: # %bb.0: 927; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 928; AVX-NEXT: ret{{[l|q]}} 929 %1 = fmul <4 x float> %a, %b 930 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 931 ret <4 x float> %2 932} 933 934define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) { 935; SSE-LABEL: insert_test3_div_ss: 936; SSE: # %bb.0: 937; SSE-NEXT: divss %xmm1, %xmm0 938; SSE-NEXT: ret{{[l|q]}} 939; 940; AVX-LABEL: insert_test3_div_ss: 941; AVX: # %bb.0: 942; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 943; AVX-NEXT: ret{{[l|q]}} 944 %1 = fdiv <4 x float> %a, %b 945 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 946 ret <4 x float> %2 947} 948 949define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) { 950; SSE-LABEL: insert_test3_add_sd: 951; SSE: # %bb.0: 952; SSE-NEXT: addsd %xmm1, %xmm0 953; SSE-NEXT: ret{{[l|q]}} 954; 955; AVX-LABEL: insert_test3_add_sd: 956; AVX: # %bb.0: 957; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 958; AVX-NEXT: ret{{[l|q]}} 959 %1 = fadd <2 x double> %a, %b 960 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 961 ret <2 x double> %2 962} 963 964define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) { 965; SSE-LABEL: insert_test3_sub_sd: 966; SSE: # %bb.0: 967; SSE-NEXT: subsd %xmm1, %xmm0 968; SSE-NEXT: ret{{[l|q]}} 969; 970; AVX-LABEL: insert_test3_sub_sd: 971; AVX: # %bb.0: 972; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 973; AVX-NEXT: ret{{[l|q]}} 974 %1 = fsub <2 x double> %a, %b 975 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 976 ret <2 x double> %2 977} 978 979define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) { 980; SSE-LABEL: insert_test3_mul_sd: 981; SSE: # %bb.0: 982; SSE-NEXT: mulsd %xmm1, %xmm0 983; SSE-NEXT: ret{{[l|q]}} 984; 985; AVX-LABEL: insert_test3_mul_sd: 986; AVX: # %bb.0: 987; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 988; AVX-NEXT: ret{{[l|q]}} 989 %1 = fmul <2 x double> %a, %b 990 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 991 ret <2 x double> %2 992} 993 994define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) { 995; SSE-LABEL: insert_test3_div_sd: 996; SSE: # %bb.0: 997; SSE-NEXT: divsd %xmm1, %xmm0 998; SSE-NEXT: ret{{[l|q]}} 999; 1000; AVX-LABEL: insert_test3_div_sd: 1001; AVX: # %bb.0: 1002; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 1003; AVX-NEXT: ret{{[l|q]}} 1004 %1 = fdiv <2 x double> %a, %b 1005 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 1006 ret <2 x double> %2 1007} 1008 1009define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) { 1010; SSE-LABEL: insert_test4_add_ss: 1011; SSE: # %bb.0: 1012; SSE-NEXT: addss %xmm0, %xmm1 1013; SSE-NEXT: movaps %xmm1, %xmm0 1014; SSE-NEXT: ret{{[l|q]}} 1015; 1016; AVX-LABEL: insert_test4_add_ss: 1017; AVX: # %bb.0: 1018; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 1019; AVX-NEXT: ret{{[l|q]}} 1020 %1 = fadd <4 x float> %b, %a 1021 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 1022 ret <4 x float> %2 1023} 1024 1025define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) { 1026; SSE-LABEL: insert_test4_sub_ss: 1027; SSE: # %bb.0: 1028; SSE-NEXT: subss %xmm0, %xmm1 1029; SSE-NEXT: movaps %xmm1, %xmm0 1030; SSE-NEXT: ret{{[l|q]}} 1031; 1032; AVX-LABEL: insert_test4_sub_ss: 1033; AVX: # %bb.0: 1034; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 1035; AVX-NEXT: ret{{[l|q]}} 1036 %1 = fsub <4 x float> %b, %a 1037 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 1038 ret <4 x float> %2 1039} 1040 1041define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) { 1042; SSE-LABEL: insert_test4_mul_ss: 1043; SSE: # %bb.0: 1044; SSE-NEXT: mulss %xmm0, %xmm1 1045; SSE-NEXT: movaps %xmm1, %xmm0 1046; SSE-NEXT: ret{{[l|q]}} 1047; 1048; AVX-LABEL: insert_test4_mul_ss: 1049; AVX: # %bb.0: 1050; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 1051; AVX-NEXT: ret{{[l|q]}} 1052 %1 = fmul <4 x float> %b, %a 1053 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 1054 ret <4 x float> %2 1055} 1056 1057define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) { 1058; SSE-LABEL: insert_test4_div_ss: 1059; SSE: # %bb.0: 1060; SSE-NEXT: divss %xmm0, %xmm1 1061; SSE-NEXT: movaps %xmm1, %xmm0 1062; SSE-NEXT: ret{{[l|q]}} 1063; 1064; AVX-LABEL: insert_test4_div_ss: 1065; AVX: # %bb.0: 1066; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 1067; AVX-NEXT: ret{{[l|q]}} 1068 %1 = fdiv <4 x float> %b, %a 1069 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 1070 ret <4 x float> %2 1071} 1072 1073define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) { 1074; SSE-LABEL: insert_test4_add_sd: 1075; SSE: # %bb.0: 1076; SSE-NEXT: addsd %xmm0, %xmm1 1077; SSE-NEXT: movapd %xmm1, %xmm0 1078; SSE-NEXT: ret{{[l|q]}} 1079; 1080; AVX-LABEL: insert_test4_add_sd: 1081; AVX: # %bb.0: 1082; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 1083; AVX-NEXT: ret{{[l|q]}} 1084 %1 = fadd <2 x double> %b, %a 1085 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1086 ret <2 x double> %2 1087} 1088 1089define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) { 1090; SSE-LABEL: insert_test4_sub_sd: 1091; SSE: # %bb.0: 1092; SSE-NEXT: subsd %xmm0, %xmm1 1093; SSE-NEXT: movapd %xmm1, %xmm0 1094; SSE-NEXT: ret{{[l|q]}} 1095; 1096; AVX-LABEL: insert_test4_sub_sd: 1097; AVX: # %bb.0: 1098; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 1099; AVX-NEXT: ret{{[l|q]}} 1100 %1 = fsub <2 x double> %b, %a 1101 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1102 ret <2 x double> %2 1103} 1104 1105define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) { 1106; SSE-LABEL: insert_test4_mul_sd: 1107; SSE: # %bb.0: 1108; SSE-NEXT: mulsd %xmm0, %xmm1 1109; SSE-NEXT: movapd %xmm1, %xmm0 1110; SSE-NEXT: ret{{[l|q]}} 1111; 1112; AVX-LABEL: insert_test4_mul_sd: 1113; AVX: # %bb.0: 1114; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 1115; AVX-NEXT: ret{{[l|q]}} 1116 %1 = fmul <2 x double> %b, %a 1117 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1118 ret <2 x double> %2 1119} 1120 1121define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) { 1122; SSE-LABEL: insert_test4_div_sd: 1123; SSE: # %bb.0: 1124; SSE-NEXT: divsd %xmm0, %xmm1 1125; SSE-NEXT: movapd %xmm1, %xmm0 1126; SSE-NEXT: ret{{[l|q]}} 1127; 1128; AVX-LABEL: insert_test4_div_sd: 1129; AVX: # %bb.0: 1130; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 1131; AVX-NEXT: ret{{[l|q]}} 1132 %1 = fdiv <2 x double> %b, %a 1133 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 1134 ret <2 x double> %2 1135} 1136 1137define <4 x float> @insert_test5_add_ss(<4 x float> %a, <4 x float> %b) { 1138; SSE-LABEL: insert_test5_add_ss: 1139; SSE: # %bb.0: 1140; SSE-NEXT: addss %xmm1, %xmm0 1141; SSE-NEXT: ret{{[l|q]}} 1142; 1143; AVX-LABEL: insert_test5_add_ss: 1144; AVX: # %bb.0: 1145; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 1146; AVX-NEXT: ret{{[l|q]}} 1147 %1 = fadd <4 x float> %b, %a 1148 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1149 ret <4 x float> %2 1150} 1151 1152define <4 x float> @insert_test5_sub_ss(<4 x float> %a, <4 x float> %b) { 1153; SSE2-LABEL: insert_test5_sub_ss: 1154; SSE2: # %bb.0: 1155; SSE2-NEXT: subps %xmm0, %xmm1 1156; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1157; SSE2-NEXT: ret{{[l|q]}} 1158; 1159; SSE41-LABEL: insert_test5_sub_ss: 1160; SSE41: # %bb.0: 1161; SSE41-NEXT: subps %xmm0, %xmm1 1162; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1163; SSE41-NEXT: ret{{[l|q]}} 1164; 1165; AVX-LABEL: insert_test5_sub_ss: 1166; AVX: # %bb.0: 1167; AVX-NEXT: vsubps %xmm0, %xmm1, %xmm1 1168; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1169; AVX-NEXT: ret{{[l|q]}} 1170 %1 = fsub <4 x float> %b, %a 1171 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1172 ret <4 x float> %2 1173} 1174 1175define <4 x float> @insert_test5_mul_ss(<4 x float> %a, <4 x float> %b) { 1176; SSE-LABEL: insert_test5_mul_ss: 1177; SSE: # %bb.0: 1178; SSE-NEXT: mulss %xmm1, %xmm0 1179; SSE-NEXT: ret{{[l|q]}} 1180; 1181; AVX-LABEL: insert_test5_mul_ss: 1182; AVX: # %bb.0: 1183; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 1184; AVX-NEXT: ret{{[l|q]}} 1185 %1 = fmul <4 x float> %b, %a 1186 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1187 ret <4 x float> %2 1188} 1189 1190define <4 x float> @insert_test5_div_ss(<4 x float> %a, <4 x float> %b) { 1191; SSE2-LABEL: insert_test5_div_ss: 1192; SSE2: # %bb.0: 1193; SSE2-NEXT: divps %xmm0, %xmm1 1194; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1195; SSE2-NEXT: ret{{[l|q]}} 1196; 1197; SSE41-LABEL: insert_test5_div_ss: 1198; SSE41: # %bb.0: 1199; SSE41-NEXT: divps %xmm0, %xmm1 1200; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1201; SSE41-NEXT: ret{{[l|q]}} 1202; 1203; AVX-LABEL: insert_test5_div_ss: 1204; AVX: # %bb.0: 1205; AVX-NEXT: vdivps %xmm0, %xmm1, %xmm1 1206; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1207; AVX-NEXT: ret{{[l|q]}} 1208 %1 = fdiv <4 x float> %b, %a 1209 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1210 ret <4 x float> %2 1211} 1212 1213define <2 x double> @insert_test5_add_sd(<2 x double> %a, <2 x double> %b) { 1214; SSE-LABEL: insert_test5_add_sd: 1215; SSE: # %bb.0: 1216; SSE-NEXT: addsd %xmm1, %xmm0 1217; SSE-NEXT: ret{{[l|q]}} 1218; 1219; AVX-LABEL: insert_test5_add_sd: 1220; AVX: # %bb.0: 1221; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 1222; AVX-NEXT: ret{{[l|q]}} 1223 %1 = fadd <2 x double> %b, %a 1224 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 1225 ret <2 x double> %2 1226} 1227 1228define <2 x double> @insert_test5_sub_sd(<2 x double> %a, <2 x double> %b) { 1229; SSE2-LABEL: insert_test5_sub_sd: 1230; SSE2: # %bb.0: 1231; SSE2-NEXT: subpd %xmm0, %xmm1 1232; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1233; SSE2-NEXT: ret{{[l|q]}} 1234; 1235; SSE41-LABEL: insert_test5_sub_sd: 1236; SSE41: # %bb.0: 1237; SSE41-NEXT: subpd %xmm0, %xmm1 1238; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1239; SSE41-NEXT: ret{{[l|q]}} 1240; 1241; AVX-LABEL: insert_test5_sub_sd: 1242; AVX: # %bb.0: 1243; AVX-NEXT: vsubpd %xmm0, %xmm1, %xmm1 1244; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1245; AVX-NEXT: ret{{[l|q]}} 1246 %1 = fsub <2 x double> %b, %a 1247 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 1248 ret <2 x double> %2 1249} 1250 1251define <2 x double> @insert_test5_mul_sd(<2 x double> %a, <2 x double> %b) { 1252; SSE-LABEL: insert_test5_mul_sd: 1253; SSE: # %bb.0: 1254; SSE-NEXT: mulsd %xmm1, %xmm0 1255; SSE-NEXT: ret{{[l|q]}} 1256; 1257; AVX-LABEL: insert_test5_mul_sd: 1258; AVX: # %bb.0: 1259; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 1260; AVX-NEXT: ret{{[l|q]}} 1261 %1 = fmul <2 x double> %b, %a 1262 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 1263 ret <2 x double> %2 1264} 1265 1266define <2 x double> @insert_test5_div_sd(<2 x double> %a, <2 x double> %b) { 1267; SSE2-LABEL: insert_test5_div_sd: 1268; SSE2: # %bb.0: 1269; SSE2-NEXT: divpd %xmm0, %xmm1 1270; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1271; SSE2-NEXT: ret{{[l|q]}} 1272; 1273; SSE41-LABEL: insert_test5_div_sd: 1274; SSE41: # %bb.0: 1275; SSE41-NEXT: divpd %xmm0, %xmm1 1276; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1277; SSE41-NEXT: ret{{[l|q]}} 1278; 1279; AVX-LABEL: insert_test5_div_sd: 1280; AVX: # %bb.0: 1281; AVX-NEXT: vdivpd %xmm0, %xmm1, %xmm1 1282; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1283; AVX-NEXT: ret{{[l|q]}} 1284 %1 = fdiv <2 x double> %b, %a 1285 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 1286 ret <2 x double> %2 1287} 1288 1289define <4 x float> @add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 1290; X86-SSE2-LABEL: add_ss_mask: 1291; X86-SSE2: # %bb.0: 1292; X86-SSE2-NEXT: testb $1, {{[0-9]+}}(%esp) 1293; X86-SSE2-NEXT: jne .LBB70_1 1294; X86-SSE2-NEXT: # %bb.2: 1295; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 1296; X86-SSE2-NEXT: retl 1297; X86-SSE2-NEXT: .LBB70_1: 1298; X86-SSE2-NEXT: addss %xmm0, %xmm1 1299; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1300; X86-SSE2-NEXT: retl 1301; 1302; X86-SSE41-LABEL: add_ss_mask: 1303; X86-SSE41: # %bb.0: 1304; X86-SSE41-NEXT: testb $1, {{[0-9]+}}(%esp) 1305; X86-SSE41-NEXT: jne .LBB70_1 1306; X86-SSE41-NEXT: # %bb.2: 1307; X86-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 1308; X86-SSE41-NEXT: retl 1309; X86-SSE41-NEXT: .LBB70_1: 1310; X86-SSE41-NEXT: addss %xmm0, %xmm1 1311; X86-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1312; X86-SSE41-NEXT: retl 1313; 1314; X86-AVX1-LABEL: add_ss_mask: 1315; X86-AVX1: # %bb.0: 1316; X86-AVX1-NEXT: testb $1, {{[0-9]+}}(%esp) 1317; X86-AVX1-NEXT: je .LBB70_2 1318; X86-AVX1-NEXT: # %bb.1: 1319; X86-AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm2 1320; X86-AVX1-NEXT: .LBB70_2: 1321; X86-AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 1322; X86-AVX1-NEXT: retl 1323; 1324; X86-AVX512-LABEL: add_ss_mask: 1325; X86-AVX512: # %bb.0: 1326; X86-AVX512-NEXT: movb {{[0-9]+}}(%esp), %al 1327; X86-AVX512-NEXT: kmovw %eax, %k1 1328; X86-AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1} 1329; X86-AVX512-NEXT: vmovaps %xmm2, %xmm0 1330; X86-AVX512-NEXT: retl 1331; 1332; X64-SSE2-LABEL: add_ss_mask: 1333; X64-SSE2: # %bb.0: 1334; X64-SSE2-NEXT: testb $1, %dil 1335; X64-SSE2-NEXT: jne .LBB70_1 1336; X64-SSE2-NEXT: # %bb.2: 1337; X64-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 1338; X64-SSE2-NEXT: retq 1339; X64-SSE2-NEXT: .LBB70_1: 1340; X64-SSE2-NEXT: addss %xmm0, %xmm1 1341; X64-SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1342; X64-SSE2-NEXT: retq 1343; 1344; X64-SSE41-LABEL: add_ss_mask: 1345; X64-SSE41: # %bb.0: 1346; X64-SSE41-NEXT: testb $1, %dil 1347; X64-SSE41-NEXT: jne .LBB70_1 1348; X64-SSE41-NEXT: # %bb.2: 1349; X64-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 1350; X64-SSE41-NEXT: retq 1351; X64-SSE41-NEXT: .LBB70_1: 1352; X64-SSE41-NEXT: addss %xmm0, %xmm1 1353; X64-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1354; X64-SSE41-NEXT: retq 1355; 1356; X64-AVX1-LABEL: add_ss_mask: 1357; X64-AVX1: # %bb.0: 1358; X64-AVX1-NEXT: testb $1, %dil 1359; X64-AVX1-NEXT: je .LBB70_2 1360; X64-AVX1-NEXT: # %bb.1: 1361; X64-AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm2 1362; X64-AVX1-NEXT: .LBB70_2: 1363; X64-AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 1364; X64-AVX1-NEXT: retq 1365; 1366; X64-AVX512-LABEL: add_ss_mask: 1367; X64-AVX512: # %bb.0: 1368; X64-AVX512-NEXT: kmovw %edi, %k1 1369; X64-AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1} 1370; X64-AVX512-NEXT: vmovaps %xmm2, %xmm0 1371; X64-AVX512-NEXT: retq 1372 %1 = extractelement <4 x float> %a, i64 0 1373 %2 = extractelement <4 x float> %b, i64 0 1374 %3 = fadd float %1, %2 1375 %4 = extractelement <4 x float> %c, i32 0 1376 %5 = bitcast i8 %mask to <8 x i1> 1377 %6 = extractelement <8 x i1> %5, i64 0 1378 %7 = select i1 %6, float %3, float %4 1379 %8 = insertelement <4 x float> %a, float %7, i64 0 1380 ret <4 x float> %8 1381} 1382 1383define <2 x double> @add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 1384; X86-SSE2-LABEL: add_sd_mask: 1385; X86-SSE2: # %bb.0: 1386; X86-SSE2-NEXT: testb $1, {{[0-9]+}}(%esp) 1387; X86-SSE2-NEXT: jne .LBB71_1 1388; X86-SSE2-NEXT: # %bb.2: 1389; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1390; X86-SSE2-NEXT: retl 1391; X86-SSE2-NEXT: .LBB71_1: 1392; X86-SSE2-NEXT: addsd %xmm0, %xmm1 1393; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1394; X86-SSE2-NEXT: retl 1395; 1396; X86-SSE41-LABEL: add_sd_mask: 1397; X86-SSE41: # %bb.0: 1398; X86-SSE41-NEXT: testb $1, {{[0-9]+}}(%esp) 1399; X86-SSE41-NEXT: jne .LBB71_1 1400; X86-SSE41-NEXT: # %bb.2: 1401; X86-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] 1402; X86-SSE41-NEXT: retl 1403; X86-SSE41-NEXT: .LBB71_1: 1404; X86-SSE41-NEXT: addsd %xmm0, %xmm1 1405; X86-SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1406; X86-SSE41-NEXT: retl 1407; 1408; X86-AVX1-LABEL: add_sd_mask: 1409; X86-AVX1: # %bb.0: 1410; X86-AVX1-NEXT: testb $1, {{[0-9]+}}(%esp) 1411; X86-AVX1-NEXT: je .LBB71_2 1412; X86-AVX1-NEXT: # %bb.1: 1413; X86-AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm2 1414; X86-AVX1-NEXT: .LBB71_2: 1415; X86-AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1416; X86-AVX1-NEXT: retl 1417; 1418; X86-AVX512-LABEL: add_sd_mask: 1419; X86-AVX512: # %bb.0: 1420; X86-AVX512-NEXT: movb {{[0-9]+}}(%esp), %al 1421; X86-AVX512-NEXT: kmovw %eax, %k1 1422; X86-AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1} 1423; X86-AVX512-NEXT: vmovapd %xmm2, %xmm0 1424; X86-AVX512-NEXT: retl 1425; 1426; X64-SSE2-LABEL: add_sd_mask: 1427; X64-SSE2: # %bb.0: 1428; X64-SSE2-NEXT: testb $1, %dil 1429; X64-SSE2-NEXT: jne .LBB71_1 1430; X64-SSE2-NEXT: # %bb.2: 1431; X64-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1432; X64-SSE2-NEXT: retq 1433; X64-SSE2-NEXT: .LBB71_1: 1434; X64-SSE2-NEXT: addsd %xmm0, %xmm1 1435; X64-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1436; X64-SSE2-NEXT: retq 1437; 1438; X64-SSE41-LABEL: add_sd_mask: 1439; X64-SSE41: # %bb.0: 1440; X64-SSE41-NEXT: testb $1, %dil 1441; X64-SSE41-NEXT: jne .LBB71_1 1442; X64-SSE41-NEXT: # %bb.2: 1443; X64-SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] 1444; X64-SSE41-NEXT: retq 1445; X64-SSE41-NEXT: .LBB71_1: 1446; X64-SSE41-NEXT: addsd %xmm0, %xmm1 1447; X64-SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1448; X64-SSE41-NEXT: retq 1449; 1450; X64-AVX1-LABEL: add_sd_mask: 1451; X64-AVX1: # %bb.0: 1452; X64-AVX1-NEXT: testb $1, %dil 1453; X64-AVX1-NEXT: je .LBB71_2 1454; X64-AVX1-NEXT: # %bb.1: 1455; X64-AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm2 1456; X64-AVX1-NEXT: .LBB71_2: 1457; X64-AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1458; X64-AVX1-NEXT: retq 1459; 1460; X64-AVX512-LABEL: add_sd_mask: 1461; X64-AVX512: # %bb.0: 1462; X64-AVX512-NEXT: kmovw %edi, %k1 1463; X64-AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1} 1464; X64-AVX512-NEXT: vmovapd %xmm2, %xmm0 1465; X64-AVX512-NEXT: retq 1466 %1 = extractelement <2 x double> %a, i64 0 1467 %2 = extractelement <2 x double> %b, i64 0 1468 %3 = fadd double %1, %2 1469 %4 = extractelement <2 x double> %c, i32 0 1470 %5 = bitcast i8 %mask to <8 x i1> 1471 %6 = extractelement <8 x i1> %5, i64 0 1472 %7 = select i1 %6, double %3, double %4 1473 %8 = insertelement <2 x double> %a, double %7, i64 0 1474 ret <2 x double> %8 1475} 1476