1; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s 2; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s 3; RUN: llc -mcpu=x86-64 -mattr=+avx < %s | FileCheck --check-prefix=AVX %s 4 5target triple = "x86_64-unknown-unknown" 6 7; Ensure that the backend no longer emits unnecessary vector insert 8; instructions immediately after SSE scalar fp instructions 9; like addss or mulss. 10 11define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) { 12; SSE-LABEL: test_add_ss: 13; SSE: # BB#0: 14; SSE-NEXT: addss %xmm1, %xmm0 15; SSE-NEXT: retq 16; 17; AVX-LABEL: test_add_ss: 18; AVX: # BB#0: 19; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 20; AVX-NEXT: retq 21 %1 = extractelement <4 x float> %b, i32 0 22 %2 = extractelement <4 x float> %a, i32 0 23 %add = fadd float %2, %1 24 %3 = insertelement <4 x float> %a, float %add, i32 0 25 ret <4 x float> %3 26} 27 28define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) { 29; SSE-LABEL: test_sub_ss: 30; SSE: # BB#0: 31; SSE-NEXT: subss %xmm1, %xmm0 32; SSE-NEXT: retq 33; 34; AVX-LABEL: test_sub_ss: 35; AVX: # BB#0: 36; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 37; AVX-NEXT: retq 38 %1 = extractelement <4 x float> %b, i32 0 39 %2 = extractelement <4 x float> %a, i32 0 40 %sub = fsub float %2, %1 41 %3 = insertelement <4 x float> %a, float %sub, i32 0 42 ret <4 x float> %3 43} 44 45define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) { 46; SSE-LABEL: test_mul_ss: 47; SSE: # BB#0: 48; SSE-NEXT: mulss %xmm1, %xmm0 49; SSE-NEXT: retq 50; 51; AVX-LABEL: test_mul_ss: 52; AVX: # BB#0: 53; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 54; AVX-NEXT: retq 55 %1 = extractelement <4 x float> %b, i32 0 56 %2 = extractelement <4 x float> %a, i32 0 57 %mul = fmul float %2, %1 58 %3 = insertelement <4 x float> %a, float %mul, i32 0 59 ret <4 x float> %3 60} 61 62define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) { 63; SSE-LABEL: test_div_ss: 64; SSE: # BB#0: 65; SSE-NEXT: divss %xmm1, %xmm0 66; SSE-NEXT: retq 67; 68; AVX-LABEL: test_div_ss: 69; AVX: # BB#0: 70; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 71; AVX-NEXT: retq 72 %1 = extractelement <4 x float> %b, i32 0 73 %2 = extractelement <4 x float> %a, i32 0 74 %div = fdiv float %2, %1 75 %3 = insertelement <4 x float> %a, float %div, i32 0 76 ret <4 x float> %3 77} 78 79define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) { 80; SSE-LABEL: test_add_sd: 81; SSE: # BB#0: 82; SSE-NEXT: addsd %xmm1, %xmm0 83; SSE-NEXT: retq 84; 85; AVX-LABEL: test_add_sd: 86; AVX: # BB#0: 87; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 88; AVX-NEXT: retq 89 %1 = extractelement <2 x double> %b, i32 0 90 %2 = extractelement <2 x double> %a, i32 0 91 %add = fadd double %2, %1 92 %3 = insertelement <2 x double> %a, double %add, i32 0 93 ret <2 x double> %3 94} 95 96define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) { 97; SSE-LABEL: test_sub_sd: 98; SSE: # BB#0: 99; SSE-NEXT: subsd %xmm1, %xmm0 100; SSE-NEXT: retq 101; 102; AVX-LABEL: test_sub_sd: 103; AVX: # BB#0: 104; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 105; AVX-NEXT: retq 106 %1 = extractelement <2 x double> %b, i32 0 107 %2 = extractelement <2 x double> %a, i32 0 108 %sub = fsub double %2, %1 109 %3 = insertelement <2 x double> %a, double %sub, i32 0 110 ret <2 x double> %3 111} 112 113define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) { 114; SSE-LABEL: test_mul_sd: 115; SSE: # BB#0: 116; SSE-NEXT: mulsd %xmm1, %xmm0 117; SSE-NEXT: retq 118; 119; AVX-LABEL: test_mul_sd: 120; AVX: # BB#0: 121; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 122; AVX-NEXT: retq 123 %1 = extractelement <2 x double> %b, i32 0 124 %2 = extractelement <2 x double> %a, i32 0 125 %mul = fmul double %2, %1 126 %3 = insertelement <2 x double> %a, double %mul, i32 0 127 ret <2 x double> %3 128} 129 130define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) { 131; SSE-LABEL: test_div_sd: 132; SSE: # BB#0: 133; SSE-NEXT: divsd %xmm1, %xmm0 134; SSE-NEXT: retq 135; 136; AVX-LABEL: test_div_sd: 137; AVX: # BB#0: 138; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 139; AVX-NEXT: retq 140 %1 = extractelement <2 x double> %b, i32 0 141 %2 = extractelement <2 x double> %a, i32 0 142 %div = fdiv double %2, %1 143 %3 = insertelement <2 x double> %a, double %div, i32 0 144 ret <2 x double> %3 145} 146 147define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) { 148; SSE-LABEL: test2_add_ss: 149; SSE: # BB#0: 150; SSE-NEXT: addss %xmm0, %xmm1 151; SSE-NEXT: movaps %xmm1, %xmm0 152; SSE-NEXT: retq 153; 154; AVX-LABEL: test2_add_ss: 155; AVX: # BB#0: 156; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 157; AVX-NEXT: retq 158 %1 = extractelement <4 x float> %a, i32 0 159 %2 = extractelement <4 x float> %b, i32 0 160 %add = fadd float %1, %2 161 %3 = insertelement <4 x float> %b, float %add, i32 0 162 ret <4 x float> %3 163} 164 165define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) { 166; SSE-LABEL: test2_sub_ss: 167; SSE: # BB#0: 168; SSE-NEXT: subss %xmm0, %xmm1 169; SSE-NEXT: movaps %xmm1, %xmm0 170; SSE-NEXT: retq 171; 172; AVX-LABEL: test2_sub_ss: 173; AVX: # BB#0: 174; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 175; AVX-NEXT: retq 176 %1 = extractelement <4 x float> %a, i32 0 177 %2 = extractelement <4 x float> %b, i32 0 178 %sub = fsub float %2, %1 179 %3 = insertelement <4 x float> %b, float %sub, i32 0 180 ret <4 x float> %3 181} 182 183define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) { 184; SSE-LABEL: test2_mul_ss: 185; SSE: # BB#0: 186; SSE-NEXT: mulss %xmm0, %xmm1 187; SSE-NEXT: movaps %xmm1, %xmm0 188; SSE-NEXT: retq 189; 190; AVX-LABEL: test2_mul_ss: 191; AVX: # BB#0: 192; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 193; AVX-NEXT: retq 194 %1 = extractelement <4 x float> %a, i32 0 195 %2 = extractelement <4 x float> %b, i32 0 196 %mul = fmul float %1, %2 197 %3 = insertelement <4 x float> %b, float %mul, i32 0 198 ret <4 x float> %3 199} 200 201define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) { 202; SSE-LABEL: test2_div_ss: 203; SSE: # BB#0: 204; SSE-NEXT: divss %xmm0, %xmm1 205; SSE-NEXT: movaps %xmm1, %xmm0 206; SSE-NEXT: retq 207; 208; AVX-LABEL: test2_div_ss: 209; AVX: # BB#0: 210; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 211; AVX-NEXT: retq 212 %1 = extractelement <4 x float> %a, i32 0 213 %2 = extractelement <4 x float> %b, i32 0 214 %div = fdiv float %2, %1 215 %3 = insertelement <4 x float> %b, float %div, i32 0 216 ret <4 x float> %3 217} 218 219define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) { 220; SSE-LABEL: test2_add_sd: 221; SSE: # BB#0: 222; SSE-NEXT: addsd %xmm0, %xmm1 223; SSE-NEXT: movaps %xmm1, %xmm0 224; SSE-NEXT: retq 225; 226; AVX-LABEL: test2_add_sd: 227; AVX: # BB#0: 228; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 229; AVX-NEXT: retq 230 %1 = extractelement <2 x double> %a, i32 0 231 %2 = extractelement <2 x double> %b, i32 0 232 %add = fadd double %1, %2 233 %3 = insertelement <2 x double> %b, double %add, i32 0 234 ret <2 x double> %3 235} 236 237define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) { 238; SSE-LABEL: test2_sub_sd: 239; SSE: # BB#0: 240; SSE-NEXT: subsd %xmm0, %xmm1 241; SSE-NEXT: movaps %xmm1, %xmm0 242; SSE-NEXT: retq 243; 244; AVX-LABEL: test2_sub_sd: 245; AVX: # BB#0: 246; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 247; AVX-NEXT: retq 248 %1 = extractelement <2 x double> %a, i32 0 249 %2 = extractelement <2 x double> %b, i32 0 250 %sub = fsub double %2, %1 251 %3 = insertelement <2 x double> %b, double %sub, i32 0 252 ret <2 x double> %3 253} 254 255define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) { 256; SSE-LABEL: test2_mul_sd: 257; SSE: # BB#0: 258; SSE-NEXT: mulsd %xmm0, %xmm1 259; SSE-NEXT: movaps %xmm1, %xmm0 260; SSE-NEXT: retq 261; 262; AVX-LABEL: test2_mul_sd: 263; AVX: # BB#0: 264; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 265; AVX-NEXT: retq 266 %1 = extractelement <2 x double> %a, i32 0 267 %2 = extractelement <2 x double> %b, i32 0 268 %mul = fmul double %1, %2 269 %3 = insertelement <2 x double> %b, double %mul, i32 0 270 ret <2 x double> %3 271} 272 273define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) { 274; SSE-LABEL: test2_div_sd: 275; SSE: # BB#0: 276; SSE-NEXT: divsd %xmm0, %xmm1 277; SSE-NEXT: movaps %xmm1, %xmm0 278; SSE-NEXT: retq 279; 280; AVX-LABEL: test2_div_sd: 281; AVX: # BB#0: 282; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 283; AVX-NEXT: retq 284 %1 = extractelement <2 x double> %a, i32 0 285 %2 = extractelement <2 x double> %b, i32 0 286 %div = fdiv double %2, %1 287 %3 = insertelement <2 x double> %b, double %div, i32 0 288 ret <2 x double> %3 289} 290 291define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) { 292; SSE-LABEL: test_multiple_add_ss: 293; SSE: # BB#0: 294; SSE-NEXT: addss %xmm0, %xmm1 295; SSE-NEXT: addss %xmm1, %xmm0 296; SSE-NEXT: retq 297; 298; AVX-LABEL: test_multiple_add_ss: 299; AVX: # BB#0: 300; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm1 301; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 302; AVX-NEXT: retq 303 %1 = extractelement <4 x float> %b, i32 0 304 %2 = extractelement <4 x float> %a, i32 0 305 %add = fadd float %2, %1 306 %add2 = fadd float %2, %add 307 %3 = insertelement <4 x float> %a, float %add2, i32 0 308 ret <4 x float> %3 309} 310 311define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) { 312; SSE-LABEL: test_multiple_sub_ss: 313; SSE: # BB#0: 314; SSE-NEXT: movaps %xmm0, %xmm2 315; SSE-NEXT: subss %xmm1, %xmm2 316; SSE-NEXT: subss %xmm2, %xmm0 317; SSE-NEXT: retq 318; 319; AVX-LABEL: test_multiple_sub_ss: 320; AVX: # BB#0: 321; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm1 322; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 323; AVX-NEXT: retq 324 %1 = extractelement <4 x float> %b, i32 0 325 %2 = extractelement <4 x float> %a, i32 0 326 %sub = fsub float %2, %1 327 %sub2 = fsub float %2, %sub 328 %3 = insertelement <4 x float> %a, float %sub2, i32 0 329 ret <4 x float> %3 330} 331 332define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) { 333; SSE-LABEL: test_multiple_mul_ss: 334; SSE: # BB#0: 335; SSE-NEXT: mulss %xmm0, %xmm1 336; SSE-NEXT: mulss %xmm1, %xmm0 337; SSE-NEXT: retq 338; 339; AVX-LABEL: test_multiple_mul_ss: 340; AVX: # BB#0: 341; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm1 342; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 343; AVX-NEXT: retq 344 %1 = extractelement <4 x float> %b, i32 0 345 %2 = extractelement <4 x float> %a, i32 0 346 %mul = fmul float %2, %1 347 %mul2 = fmul float %2, %mul 348 %3 = insertelement <4 x float> %a, float %mul2, i32 0 349 ret <4 x float> %3 350} 351 352define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) { 353; SSE-LABEL: test_multiple_div_ss: 354; SSE: # BB#0: 355; SSE-NEXT: movaps %xmm0, %xmm2 356; SSE-NEXT: divss %xmm1, %xmm2 357; SSE-NEXT: divss %xmm2, %xmm0 358; SSE-NEXT: retq 359; 360; AVX-LABEL: test_multiple_div_ss: 361; AVX: # BB#0: 362; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm1 363; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 364; AVX-NEXT: retq 365 %1 = extractelement <4 x float> %b, i32 0 366 %2 = extractelement <4 x float> %a, i32 0 367 %div = fdiv float %2, %1 368 %div2 = fdiv float %2, %div 369 %3 = insertelement <4 x float> %a, float %div2, i32 0 370 ret <4 x float> %3 371} 372 373; Ensure that the backend selects SSE/AVX scalar fp instructions 374; from a packed fp instrution plus a vector insert. 375 376define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) { 377; SSE-LABEL: insert_test_add_ss: 378; SSE: # BB#0: 379; SSE-NEXT: addss %xmm1, %xmm0 380; SSE-NEXT: retq 381; 382; AVX-LABEL: insert_test_add_ss: 383; AVX: # BB#0: 384; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 385; AVX-NEXT: retq 386 %1 = fadd <4 x float> %a, %b 387 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 388 ret <4 x float> %2 389} 390 391define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) { 392; SSE-LABEL: insert_test_sub_ss: 393; SSE: # BB#0: 394; SSE-NEXT: subss %xmm1, %xmm0 395; SSE-NEXT: retq 396; 397; AVX-LABEL: insert_test_sub_ss: 398; AVX: # BB#0: 399; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 400; AVX-NEXT: retq 401 %1 = fsub <4 x float> %a, %b 402 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 403 ret <4 x float> %2 404} 405 406define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) { 407; SSE-LABEL: insert_test_mul_ss: 408; SSE: # BB#0: 409; SSE-NEXT: mulss %xmm1, %xmm0 410; SSE-NEXT: retq 411; 412; AVX-LABEL: insert_test_mul_ss: 413; AVX: # BB#0: 414; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 415; AVX-NEXT: retq 416 %1 = fmul <4 x float> %a, %b 417 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 418 ret <4 x float> %2 419} 420 421define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) { 422; SSE-LABEL: insert_test_div_ss: 423; SSE: # BB#0: 424; SSE-NEXT: divss %xmm1, %xmm0 425; SSE-NEXT: retq 426; 427; AVX-LABEL: insert_test_div_ss: 428; AVX: # BB#0: 429; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 430; AVX-NEXT: retq 431 %1 = fdiv <4 x float> %a, %b 432 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 433 ret <4 x float> %2 434} 435 436define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) { 437; SSE-LABEL: insert_test_add_sd: 438; SSE: # BB#0: 439; SSE-NEXT: addsd %xmm1, %xmm0 440; SSE-NEXT: retq 441; 442; AVX-LABEL: insert_test_add_sd: 443; AVX: # BB#0: 444; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 445; AVX-NEXT: retq 446 %1 = fadd <2 x double> %a, %b 447 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 448 ret <2 x double> %2 449} 450 451define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) { 452; SSE-LABEL: insert_test_sub_sd: 453; SSE: # BB#0: 454; SSE-NEXT: subsd %xmm1, %xmm0 455; SSE-NEXT: retq 456; 457; AVX-LABEL: insert_test_sub_sd: 458; AVX: # BB#0: 459; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 460; AVX-NEXT: retq 461 %1 = fsub <2 x double> %a, %b 462 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 463 ret <2 x double> %2 464} 465 466define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) { 467; SSE-LABEL: insert_test_mul_sd: 468; SSE: # BB#0: 469; SSE-NEXT: mulsd %xmm1, %xmm0 470; SSE-NEXT: retq 471; 472; AVX-LABEL: insert_test_mul_sd: 473; AVX: # BB#0: 474; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 475; AVX-NEXT: retq 476 %1 = fmul <2 x double> %a, %b 477 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 478 ret <2 x double> %2 479} 480 481define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) { 482; SSE-LABEL: insert_test_div_sd: 483; SSE: # BB#0: 484; SSE-NEXT: divsd %xmm1, %xmm0 485; SSE-NEXT: retq 486; 487; AVX-LABEL: insert_test_div_sd: 488; AVX: # BB#0: 489; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 490; AVX-NEXT: retq 491 %1 = fdiv <2 x double> %a, %b 492 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 493 ret <2 x double> %2 494} 495 496define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) { 497; SSE-LABEL: insert_test2_add_ss: 498; SSE: # BB#0: 499; SSE-NEXT: addss %xmm0, %xmm1 500; SSE-NEXT: movaps %xmm1, %xmm0 501; SSE-NEXT: retq 502; 503; AVX-LABEL: insert_test2_add_ss: 504; AVX: # BB#0: 505; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 506; AVX-NEXT: retq 507 %1 = fadd <4 x float> %b, %a 508 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 509 ret <4 x float> %2 510} 511 512define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) { 513; SSE-LABEL: insert_test2_sub_ss: 514; SSE: # BB#0: 515; SSE-NEXT: subss %xmm0, %xmm1 516; SSE-NEXT: movaps %xmm1, %xmm0 517; SSE-NEXT: retq 518; 519; AVX-LABEL: insert_test2_sub_ss: 520; AVX: # BB#0: 521; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 522; AVX-NEXT: retq 523 %1 = fsub <4 x float> %b, %a 524 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 525 ret <4 x float> %2 526} 527 528define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) { 529; SSE-LABEL: insert_test2_mul_ss: 530; SSE: # BB#0: 531; SSE-NEXT: mulss %xmm0, %xmm1 532; SSE-NEXT: movaps %xmm1, %xmm0 533; SSE-NEXT: retq 534; 535; AVX-LABEL: insert_test2_mul_ss: 536; AVX: # BB#0: 537; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 538; AVX-NEXT: retq 539 %1 = fmul <4 x float> %b, %a 540 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 541 ret <4 x float> %2 542} 543 544define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) { 545; SSE-LABEL: insert_test2_div_ss: 546; SSE: # BB#0: 547; SSE-NEXT: divss %xmm0, %xmm1 548; SSE-NEXT: movaps %xmm1, %xmm0 549; SSE-NEXT: retq 550; 551; AVX-LABEL: insert_test2_div_ss: 552; AVX: # BB#0: 553; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 554; AVX-NEXT: retq 555 %1 = fdiv <4 x float> %b, %a 556 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 557 ret <4 x float> %2 558} 559 560define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) { 561; SSE-LABEL: insert_test2_add_sd: 562; SSE: # BB#0: 563; SSE-NEXT: addsd %xmm0, %xmm1 564; SSE-NEXT: movaps %xmm1, %xmm0 565; SSE-NEXT: retq 566; 567; AVX-LABEL: insert_test2_add_sd: 568; AVX: # BB#0: 569; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 570; AVX-NEXT: retq 571 %1 = fadd <2 x double> %b, %a 572 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 573 ret <2 x double> %2 574} 575 576define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) { 577; SSE-LABEL: insert_test2_sub_sd: 578; SSE: # BB#0: 579; SSE-NEXT: subsd %xmm0, %xmm1 580; SSE-NEXT: movaps %xmm1, %xmm0 581; SSE-NEXT: retq 582; 583; AVX-LABEL: insert_test2_sub_sd: 584; AVX: # BB#0: 585; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 586; AVX-NEXT: retq 587 %1 = fsub <2 x double> %b, %a 588 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 589 ret <2 x double> %2 590} 591 592define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) { 593; SSE-LABEL: insert_test2_mul_sd: 594; SSE: # BB#0: 595; SSE-NEXT: mulsd %xmm0, %xmm1 596; SSE-NEXT: movaps %xmm1, %xmm0 597; SSE-NEXT: retq 598; 599; AVX-LABEL: insert_test2_mul_sd: 600; AVX: # BB#0: 601; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 602; AVX-NEXT: retq 603 %1 = fmul <2 x double> %b, %a 604 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 605 ret <2 x double> %2 606} 607 608define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) { 609; SSE-LABEL: insert_test2_div_sd: 610; SSE: # BB#0: 611; SSE-NEXT: divsd %xmm0, %xmm1 612; SSE-NEXT: movaps %xmm1, %xmm0 613; SSE-NEXT: retq 614; 615; AVX-LABEL: insert_test2_div_sd: 616; AVX: # BB#0: 617; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 618; AVX-NEXT: retq 619 %1 = fdiv <2 x double> %b, %a 620 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 621 ret <2 x double> %2 622} 623 624define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) { 625; SSE-LABEL: insert_test3_add_ss: 626; SSE: # BB#0: 627; SSE-NEXT: addss %xmm1, %xmm0 628; SSE-NEXT: retq 629; 630; AVX-LABEL: insert_test3_add_ss: 631; AVX: # BB#0: 632; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 633; AVX-NEXT: retq 634 %1 = fadd <4 x float> %a, %b 635 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 636 ret <4 x float> %2 637} 638 639define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) { 640; SSE-LABEL: insert_test3_sub_ss: 641; SSE: # BB#0: 642; SSE-NEXT: subss %xmm1, %xmm0 643; SSE-NEXT: retq 644; 645; AVX-LABEL: insert_test3_sub_ss: 646; AVX: # BB#0: 647; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 648; AVX-NEXT: retq 649 %1 = fsub <4 x float> %a, %b 650 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 651 ret <4 x float> %2 652} 653 654define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) { 655; SSE-LABEL: insert_test3_mul_ss: 656; SSE: # BB#0: 657; SSE-NEXT: mulss %xmm1, %xmm0 658; SSE-NEXT: retq 659; 660; AVX-LABEL: insert_test3_mul_ss: 661; AVX: # BB#0: 662; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 663; AVX-NEXT: retq 664 %1 = fmul <4 x float> %a, %b 665 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 666 ret <4 x float> %2 667} 668 669define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) { 670; SSE-LABEL: insert_test3_div_ss: 671; SSE: # BB#0: 672; SSE-NEXT: divss %xmm1, %xmm0 673; SSE-NEXT: retq 674; 675; AVX-LABEL: insert_test3_div_ss: 676; AVX: # BB#0: 677; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 678; AVX-NEXT: retq 679 %1 = fdiv <4 x float> %a, %b 680 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 681 ret <4 x float> %2 682} 683 684define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) { 685; SSE-LABEL: insert_test3_add_sd: 686; SSE: # BB#0: 687; SSE-NEXT: addsd %xmm1, %xmm0 688; SSE-NEXT: retq 689; 690; AVX-LABEL: insert_test3_add_sd: 691; AVX: # BB#0: 692; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 693; AVX-NEXT: retq 694 %1 = fadd <2 x double> %a, %b 695 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 696 ret <2 x double> %2 697} 698 699define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) { 700; SSE-LABEL: insert_test3_sub_sd: 701; SSE: # BB#0: 702; SSE-NEXT: subsd %xmm1, %xmm0 703; SSE-NEXT: retq 704; 705; AVX-LABEL: insert_test3_sub_sd: 706; AVX: # BB#0: 707; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 708; AVX-NEXT: retq 709 %1 = fsub <2 x double> %a, %b 710 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 711 ret <2 x double> %2 712} 713 714define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) { 715; SSE-LABEL: insert_test3_mul_sd: 716; SSE: # BB#0: 717; SSE-NEXT: mulsd %xmm1, %xmm0 718; SSE-NEXT: retq 719; 720; AVX-LABEL: insert_test3_mul_sd: 721; AVX: # BB#0: 722; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 723; AVX-NEXT: retq 724 %1 = fmul <2 x double> %a, %b 725 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 726 ret <2 x double> %2 727} 728 729define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) { 730; SSE-LABEL: insert_test3_div_sd: 731; SSE: # BB#0: 732; SSE-NEXT: divsd %xmm1, %xmm0 733; SSE-NEXT: retq 734; 735; AVX-LABEL: insert_test3_div_sd: 736; AVX: # BB#0: 737; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 738; AVX-NEXT: retq 739 %1 = fdiv <2 x double> %a, %b 740 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 741 ret <2 x double> %2 742} 743 744define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) { 745; SSE-LABEL: insert_test4_add_ss: 746; SSE: # BB#0: 747; SSE-NEXT: addss %xmm0, %xmm1 748; SSE-NEXT: movaps %xmm1, %xmm0 749; SSE-NEXT: retq 750; 751; AVX-LABEL: insert_test4_add_ss: 752; AVX: # BB#0: 753; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 754; AVX-NEXT: retq 755 %1 = fadd <4 x float> %b, %a 756 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 757 ret <4 x float> %2 758} 759 760define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) { 761; SSE-LABEL: insert_test4_sub_ss: 762; SSE: # BB#0: 763; SSE-NEXT: subss %xmm0, %xmm1 764; SSE-NEXT: movaps %xmm1, %xmm0 765; SSE-NEXT: retq 766; 767; AVX-LABEL: insert_test4_sub_ss: 768; AVX: # BB#0: 769; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 770; AVX-NEXT: retq 771 %1 = fsub <4 x float> %b, %a 772 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 773 ret <4 x float> %2 774} 775 776define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) { 777; SSE-LABEL: insert_test4_mul_ss: 778; SSE: # BB#0: 779; SSE-NEXT: mulss %xmm0, %xmm1 780; SSE-NEXT: movaps %xmm1, %xmm0 781; SSE-NEXT: retq 782; 783; AVX-LABEL: insert_test4_mul_ss: 784; AVX: # BB#0: 785; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 786; AVX-NEXT: retq 787 %1 = fmul <4 x float> %b, %a 788 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 789 ret <4 x float> %2 790} 791 792define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) { 793; SSE-LABEL: insert_test4_div_ss: 794; SSE: # BB#0: 795; SSE-NEXT: divss %xmm0, %xmm1 796; SSE-NEXT: movaps %xmm1, %xmm0 797; SSE-NEXT: retq 798; 799; AVX-LABEL: insert_test4_div_ss: 800; AVX: # BB#0: 801; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 802; AVX-NEXT: retq 803 %1 = fdiv <4 x float> %b, %a 804 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 805 ret <4 x float> %2 806} 807 808define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) { 809; SSE-LABEL: insert_test4_add_sd: 810; SSE: # BB#0: 811; SSE-NEXT: addsd %xmm0, %xmm1 812; SSE-NEXT: movaps %xmm1, %xmm0 813; SSE-NEXT: retq 814; 815; AVX-LABEL: insert_test4_add_sd: 816; AVX: # BB#0: 817; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 818; AVX-NEXT: retq 819 %1 = fadd <2 x double> %b, %a 820 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 821 ret <2 x double> %2 822} 823 824define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) { 825; SSE-LABEL: insert_test4_sub_sd: 826; SSE: # BB#0: 827; SSE-NEXT: subsd %xmm0, %xmm1 828; SSE-NEXT: movaps %xmm1, %xmm0 829; SSE-NEXT: retq 830; 831; AVX-LABEL: insert_test4_sub_sd: 832; AVX: # BB#0: 833; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 834; AVX-NEXT: retq 835 %1 = fsub <2 x double> %b, %a 836 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 837 ret <2 x double> %2 838} 839 840define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) { 841; SSE-LABEL: insert_test4_mul_sd: 842; SSE: # BB#0: 843; SSE-NEXT: mulsd %xmm0, %xmm1 844; SSE-NEXT: movaps %xmm1, %xmm0 845; SSE-NEXT: retq 846; 847; AVX-LABEL: insert_test4_mul_sd: 848; AVX: # BB#0: 849; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 850; AVX-NEXT: retq 851 %1 = fmul <2 x double> %b, %a 852 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 853 ret <2 x double> %2 854} 855 856define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) { 857; SSE-LABEL: insert_test4_div_sd: 858; SSE: # BB#0: 859; SSE-NEXT: divsd %xmm0, %xmm1 860; SSE-NEXT: movaps %xmm1, %xmm0 861; SSE-NEXT: retq 862; 863; AVX-LABEL: insert_test4_div_sd: 864; AVX: # BB#0: 865; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 866; AVX-NEXT: retq 867 %1 = fdiv <2 x double> %b, %a 868 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 869 ret <2 x double> %2 870} 871