1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefix=AVX 4 5define <4 x float> @fadd_op1_constant_v4f32(float %x) nounwind { 6; SSE-LABEL: fadd_op1_constant_v4f32: 7; SSE: # %bb.0: 8; SSE-NEXT: addss {{.*}}(%rip), %xmm0 9; SSE-NEXT: retq 10; 11; AVX-LABEL: fadd_op1_constant_v4f32: 12; AVX: # %bb.0: 13; AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 14; AVX-NEXT: retq 15 %v = insertelement <4 x float> undef, float %x, i32 0 16 %b = fadd <4 x float> %v, <float 42.0, float undef, float undef, float undef> 17 ret <4 x float> %b 18} 19 20define <4 x float> @load_fadd_op1_constant_v4f32(float* %p) nounwind { 21; SSE-LABEL: load_fadd_op1_constant_v4f32: 22; SSE: # %bb.0: 23; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 24; SSE-NEXT: addss {{.*}}(%rip), %xmm0 25; SSE-NEXT: retq 26; 27; AVX-LABEL: load_fadd_op1_constant_v4f32: 28; AVX: # %bb.0: 29; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 30; AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 31; AVX-NEXT: retq 32 %x = load float, float* %p 33 %v = insertelement <4 x float> undef, float %x, i32 0 34 %b = fadd <4 x float> %v, <float 42.0, float undef, float undef, float undef> 35 ret <4 x float> %b 36} 37 38define <4 x float> @fsub_op0_constant_v4f32(float %x) nounwind { 39; SSE-LABEL: fsub_op0_constant_v4f32: 40; SSE: # %bb.0: 41; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 42; SSE-NEXT: subss %xmm0, %xmm1 43; SSE-NEXT: movaps %xmm1, %xmm0 44; SSE-NEXT: retq 45; 46; AVX-LABEL: fsub_op0_constant_v4f32: 47; AVX: # %bb.0: 48; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 49; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 50; AVX-NEXT: retq 51 %v = insertelement <4 x float> undef, float %x, i32 0 52 %b = fsub <4 x float> <float 42.0, float undef, float undef, float undef>, %v 53 ret <4 x float> %b 54} 55 56define <4 x float> @load_fsub_op0_constant_v4f32(float* %p) nounwind { 57; SSE-LABEL: load_fsub_op0_constant_v4f32: 58; SSE: # %bb.0: 59; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 60; SSE-NEXT: subss (%rdi), %xmm0 61; SSE-NEXT: retq 62; 63; AVX-LABEL: load_fsub_op0_constant_v4f32: 64; AVX: # %bb.0: 65; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 66; AVX-NEXT: vsubss (%rdi), %xmm0, %xmm0 67; AVX-NEXT: retq 68 %x = load float, float* %p 69 %v = insertelement <4 x float> undef, float %x, i32 0 70 %b = fsub <4 x float> <float 42.0, float undef, float undef, float undef>, %v 71 ret <4 x float> %b 72} 73 74define <4 x float> @fmul_op1_constant_v4f32(float %x) nounwind { 75; SSE-LABEL: fmul_op1_constant_v4f32: 76; SSE: # %bb.0: 77; SSE-NEXT: mulss {{.*}}(%rip), %xmm0 78; SSE-NEXT: retq 79; 80; AVX-LABEL: fmul_op1_constant_v4f32: 81; AVX: # %bb.0: 82; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 83; AVX-NEXT: retq 84 %v = insertelement <4 x float> undef, float %x, i32 0 85 %b = fmul <4 x float> %v, <float 42.0, float undef, float undef, float undef> 86 ret <4 x float> %b 87} 88 89define <4 x float> @load_fmul_op1_constant_v4f32(float* %p) nounwind { 90; SSE-LABEL: load_fmul_op1_constant_v4f32: 91; SSE: # %bb.0: 92; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 93; SSE-NEXT: mulss {{.*}}(%rip), %xmm0 94; SSE-NEXT: retq 95; 96; AVX-LABEL: load_fmul_op1_constant_v4f32: 97; AVX: # %bb.0: 98; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 99; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 100; AVX-NEXT: retq 101 %x = load float, float* %p 102 %v = insertelement <4 x float> undef, float %x, i32 0 103 %b = fmul <4 x float> %v, <float 42.0, float undef, float undef, float undef> 104 ret <4 x float> %b 105} 106 107define <4 x float> @fdiv_op1_constant_v4f32(float %x) nounwind { 108; SSE-LABEL: fdiv_op1_constant_v4f32: 109; SSE: # %bb.0: 110; SSE-NEXT: divss {{.*}}(%rip), %xmm0 111; SSE-NEXT: retq 112; 113; AVX-LABEL: fdiv_op1_constant_v4f32: 114; AVX: # %bb.0: 115; AVX-NEXT: vdivss {{.*}}(%rip), %xmm0, %xmm0 116; AVX-NEXT: retq 117 %v = insertelement <4 x float> undef, float %x, i32 0 118 %b = fdiv <4 x float> %v, <float 42.0, float undef, float undef, float undef> 119 ret <4 x float> %b 120} 121 122define <4 x float> @load_fdiv_op1_constant_v4f32(float* %p) nounwind { 123; SSE-LABEL: load_fdiv_op1_constant_v4f32: 124; SSE: # %bb.0: 125; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 126; SSE-NEXT: divss {{.*}}(%rip), %xmm0 127; SSE-NEXT: retq 128; 129; AVX-LABEL: load_fdiv_op1_constant_v4f32: 130; AVX: # %bb.0: 131; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 132; AVX-NEXT: vdivss {{.*}}(%rip), %xmm0, %xmm0 133; AVX-NEXT: retq 134 %x = load float, float* %p 135 %v = insertelement <4 x float> undef, float %x, i32 0 136 %b = fdiv <4 x float> %v, <float 42.0, float undef, float undef, float undef> 137 ret <4 x float> %b 138} 139 140define <4 x float> @fdiv_op0_constant_v4f32(float %x) nounwind { 141; SSE-LABEL: fdiv_op0_constant_v4f32: 142; SSE: # %bb.0: 143; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 144; SSE-NEXT: divss %xmm0, %xmm1 145; SSE-NEXT: movaps %xmm1, %xmm0 146; SSE-NEXT: retq 147; 148; AVX-LABEL: fdiv_op0_constant_v4f32: 149; AVX: # %bb.0: 150; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 151; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 152; AVX-NEXT: retq 153 %v = insertelement <4 x float> undef, float %x, i32 0 154 %b = fdiv <4 x float> <float 42.0, float undef, float undef, float undef>, %v 155 ret <4 x float> %b 156} 157 158define <4 x float> @load_fdiv_op0_constant_v4f32(float* %p) nounwind { 159; SSE-LABEL: load_fdiv_op0_constant_v4f32: 160; SSE: # %bb.0: 161; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 162; SSE-NEXT: divss (%rdi), %xmm0 163; SSE-NEXT: retq 164; 165; AVX-LABEL: load_fdiv_op0_constant_v4f32: 166; AVX: # %bb.0: 167; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 168; AVX-NEXT: vdivss (%rdi), %xmm0, %xmm0 169; AVX-NEXT: retq 170 %x = load float, float* %p 171 %v = insertelement <4 x float> undef, float %x, i32 0 172 %b = fdiv <4 x float> <float 42.0, float undef, float undef, float undef>, %v 173 ret <4 x float> %b 174} 175 176define <4 x double> @fadd_op1_constant_v4f64(double %x) nounwind { 177; SSE-LABEL: fadd_op1_constant_v4f64: 178; SSE: # %bb.0: 179; SSE-NEXT: addsd {{.*}}(%rip), %xmm0 180; SSE-NEXT: retq 181; 182; AVX-LABEL: fadd_op1_constant_v4f64: 183; AVX: # %bb.0: 184; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 185; AVX-NEXT: retq 186 %v = insertelement <4 x double> undef, double %x, i32 0 187 %b = fadd <4 x double> %v, <double 42.0, double undef, double undef, double undef> 188 ret <4 x double> %b 189} 190 191define <4 x double> @load_fadd_op1_constant_v4f64(double* %p) nounwind { 192; SSE-LABEL: load_fadd_op1_constant_v4f64: 193; SSE: # %bb.0: 194; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 195; SSE-NEXT: addsd {{.*}}(%rip), %xmm0 196; SSE-NEXT: retq 197; 198; AVX-LABEL: load_fadd_op1_constant_v4f64: 199; AVX: # %bb.0: 200; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 201; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 202; AVX-NEXT: retq 203 %x = load double, double* %p 204 %v = insertelement <4 x double> undef, double %x, i32 0 205 %b = fadd <4 x double> %v, <double 42.0, double undef, double undef, double undef> 206 ret <4 x double> %b 207} 208 209define <4 x double> @fsub_op0_constant_v4f64(double %x) nounwind { 210; SSE-LABEL: fsub_op0_constant_v4f64: 211; SSE: # %bb.0: 212; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 213; SSE-NEXT: subsd %xmm0, %xmm1 214; SSE-NEXT: movapd %xmm1, %xmm0 215; SSE-NEXT: retq 216; 217; AVX-LABEL: fsub_op0_constant_v4f64: 218; AVX: # %bb.0: 219; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 220; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 221; AVX-NEXT: retq 222 %v = insertelement <4 x double> undef, double %x, i32 0 223 %b = fsub <4 x double> <double 42.0, double undef, double undef, double undef>, %v 224 ret <4 x double> %b 225} 226 227define <4 x double> @load_fsub_op0_constant_v4f64(double* %p) nounwind { 228; SSE-LABEL: load_fsub_op0_constant_v4f64: 229; SSE: # %bb.0: 230; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 231; SSE-NEXT: subsd (%rdi), %xmm0 232; SSE-NEXT: retq 233; 234; AVX-LABEL: load_fsub_op0_constant_v4f64: 235; AVX: # %bb.0: 236; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 237; AVX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 238; AVX-NEXT: retq 239 %x = load double, double* %p 240 %v = insertelement <4 x double> undef, double %x, i32 0 241 %b = fsub <4 x double> <double 42.0, double undef, double undef, double undef>, %v 242 ret <4 x double> %b 243} 244 245define <4 x double> @fmul_op1_constant_v4f64(double %x) nounwind { 246; SSE-LABEL: fmul_op1_constant_v4f64: 247; SSE: # %bb.0: 248; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 249; SSE-NEXT: retq 250; 251; AVX-LABEL: fmul_op1_constant_v4f64: 252; AVX: # %bb.0: 253; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0 254; AVX-NEXT: retq 255 %v = insertelement <4 x double> undef, double %x, i32 0 256 %b = fmul <4 x double> %v, <double 42.0, double undef, double undef, double undef> 257 ret <4 x double> %b 258} 259 260define <4 x double> @load_fmul_op1_constant_v4f64(double* %p) nounwind { 261; SSE-LABEL: load_fmul_op1_constant_v4f64: 262; SSE: # %bb.0: 263; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 264; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 265; SSE-NEXT: retq 266; 267; AVX-LABEL: load_fmul_op1_constant_v4f64: 268; AVX: # %bb.0: 269; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 270; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0 271; AVX-NEXT: retq 272 %x = load double, double* %p 273 %v = insertelement <4 x double> undef, double %x, i32 0 274 %b = fmul <4 x double> %v, <double 42.0, double undef, double undef, double undef> 275 ret <4 x double> %b 276} 277 278define <4 x double> @fdiv_op1_constant_v4f64(double %x) nounwind { 279; SSE-LABEL: fdiv_op1_constant_v4f64: 280; SSE: # %bb.0: 281; SSE-NEXT: divsd {{.*}}(%rip), %xmm0 282; SSE-NEXT: retq 283; 284; AVX-LABEL: fdiv_op1_constant_v4f64: 285; AVX: # %bb.0: 286; AVX-NEXT: vdivsd {{.*}}(%rip), %xmm0, %xmm0 287; AVX-NEXT: retq 288 %v = insertelement <4 x double> undef, double %x, i32 0 289 %b = fdiv <4 x double> %v, <double 42.0, double undef, double undef, double undef> 290 ret <4 x double> %b 291} 292 293define <4 x double> @load_fdiv_op1_constant_v4f64(double* %p) nounwind { 294; SSE-LABEL: load_fdiv_op1_constant_v4f64: 295; SSE: # %bb.0: 296; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 297; SSE-NEXT: divsd {{.*}}(%rip), %xmm0 298; SSE-NEXT: retq 299; 300; AVX-LABEL: load_fdiv_op1_constant_v4f64: 301; AVX: # %bb.0: 302; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 303; AVX-NEXT: vdivsd {{.*}}(%rip), %xmm0, %xmm0 304; AVX-NEXT: retq 305 %x = load double, double* %p 306 %v = insertelement <4 x double> undef, double %x, i32 0 307 %b = fdiv <4 x double> %v, <double 42.0, double undef, double undef, double undef> 308 ret <4 x double> %b 309} 310 311define <4 x double> @fdiv_op0_constant_v4f64(double %x) nounwind { 312; SSE-LABEL: fdiv_op0_constant_v4f64: 313; SSE: # %bb.0: 314; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 315; SSE-NEXT: divsd %xmm0, %xmm1 316; SSE-NEXT: movapd %xmm1, %xmm0 317; SSE-NEXT: retq 318; 319; AVX-LABEL: fdiv_op0_constant_v4f64: 320; AVX: # %bb.0: 321; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 322; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 323; AVX-NEXT: retq 324 %v = insertelement <4 x double> undef, double %x, i32 0 325 %b = fdiv <4 x double> <double 42.0, double undef, double undef, double undef>, %v 326 ret <4 x double> %b 327} 328 329define <4 x double> @load_fdiv_op0_constant_v4f64(double* %p) nounwind { 330; SSE-LABEL: load_fdiv_op0_constant_v4f64: 331; SSE: # %bb.0: 332; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 333; SSE-NEXT: divsd (%rdi), %xmm0 334; SSE-NEXT: retq 335; 336; AVX-LABEL: load_fdiv_op0_constant_v4f64: 337; AVX: # %bb.0: 338; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 339; AVX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 340; AVX-NEXT: retq 341 %x = load double, double* %p 342 %v = insertelement <4 x double> undef, double %x, i32 0 343 %b = fdiv <4 x double> <double 42.0, double undef, double undef, double undef>, %v 344 ret <4 x double> %b 345} 346 347define <2 x double> @fadd_splat_splat_v2f64(<2 x double> %vx, <2 x double> %vy) { 348; SSE-LABEL: fadd_splat_splat_v2f64: 349; SSE: # %bb.0: 350; SSE-NEXT: addsd %xmm1, %xmm0 351; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] 352; SSE-NEXT: retq 353; 354; AVX-LABEL: fadd_splat_splat_v2f64: 355; AVX: # %bb.0: 356; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 357; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 358; AVX-NEXT: retq 359 %splatx = shufflevector <2 x double> %vx, <2 x double> undef, <2 x i32> zeroinitializer 360 %splaty = shufflevector <2 x double> %vy, <2 x double> undef, <2 x i32> zeroinitializer 361 %r = fadd <2 x double> %splatx, %splaty 362 ret <2 x double> %r 363} 364 365define <4 x double> @fsub_splat_splat_v4f64(double %x, double %y) { 366; SSE-LABEL: fsub_splat_splat_v4f64: 367; SSE: # %bb.0: 368; SSE-NEXT: subsd %xmm1, %xmm0 369; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] 370; SSE-NEXT: movapd %xmm0, %xmm1 371; SSE-NEXT: retq 372; 373; AVX-LABEL: fsub_splat_splat_v4f64: 374; AVX: # %bb.0: 375; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 376; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 377; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 378; AVX-NEXT: retq 379 %vx = insertelement <4 x double> undef, double %x, i32 0 380 %vy = insertelement <4 x double> undef, double %y, i32 0 381 %splatx = shufflevector <4 x double> %vx, <4 x double> undef, <4 x i32> zeroinitializer 382 %splaty = shufflevector <4 x double> %vy, <4 x double> undef, <4 x i32> zeroinitializer 383 %r = fsub <4 x double> %splatx, %splaty 384 ret <4 x double> %r 385} 386 387define <4 x float> @fmul_splat_splat_v4f32(<4 x float> %vx, <4 x float> %vy) { 388; SSE-LABEL: fmul_splat_splat_v4f32: 389; SSE: # %bb.0: 390; SSE-NEXT: mulss %xmm1, %xmm0 391; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 392; SSE-NEXT: retq 393; 394; AVX-LABEL: fmul_splat_splat_v4f32: 395; AVX: # %bb.0: 396; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 397; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 398; AVX-NEXT: retq 399 %splatx = shufflevector <4 x float> %vx, <4 x float> undef, <4 x i32> zeroinitializer 400 %splaty = shufflevector <4 x float> %vy, <4 x float> undef, <4 x i32> zeroinitializer 401 %r = fmul fast <4 x float> %splatx, %splaty 402 ret <4 x float> %r 403} 404 405define <8 x float> @fdiv_splat_splat_v8f32(<8 x float> %vx, <8 x float> %vy) { 406; SSE-LABEL: fdiv_splat_splat_v8f32: 407; SSE: # %bb.0: 408; SSE-NEXT: divss %xmm2, %xmm0 409; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 410; SSE-NEXT: movaps %xmm0, %xmm1 411; SSE-NEXT: retq 412; 413; AVX-LABEL: fdiv_splat_splat_v8f32: 414; AVX: # %bb.0: 415; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 416; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 417; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 418; AVX-NEXT: retq 419 %splatx = shufflevector <8 x float> %vx, <8 x float> undef, <8 x i32> zeroinitializer 420 %splaty = shufflevector <8 x float> %vy, <8 x float> undef, <8 x i32> zeroinitializer 421 %r = fdiv fast <8 x float> %splatx, %splaty 422 ret <8 x float> %r 423} 424 425; Negative test - splat of non-zero indexes (still sink the splat). 426 427define <2 x double> @fadd_splat_splat_nonzero_v2f64(<2 x double> %vx, <2 x double> %vy) { 428; SSE-LABEL: fadd_splat_splat_nonzero_v2f64: 429; SSE: # %bb.0: 430; SSE-NEXT: addpd %xmm1, %xmm0 431; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 432; SSE-NEXT: retq 433; 434; AVX-LABEL: fadd_splat_splat_nonzero_v2f64: 435; AVX: # %bb.0: 436; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 437; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1] 438; AVX-NEXT: retq 439 %splatx = shufflevector <2 x double> %vx, <2 x double> undef, <2 x i32> <i32 1, i32 1> 440 %splaty = shufflevector <2 x double> %vy, <2 x double> undef, <2 x i32> <i32 1, i32 1> 441 %r = fadd <2 x double> %splatx, %splaty 442 ret <2 x double> %r 443} 444 445; Negative test - splat of non-zero index and mismatched indexes. 446 447define <2 x double> @fadd_splat_splat_mismatch_v2f64(<2 x double> %vx, <2 x double> %vy) { 448; SSE-LABEL: fadd_splat_splat_mismatch_v2f64: 449; SSE: # %bb.0: 450; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] 451; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] 452; SSE-NEXT: addpd %xmm1, %xmm0 453; SSE-NEXT: retq 454; 455; AVX-LABEL: fadd_splat_splat_mismatch_v2f64: 456; AVX: # %bb.0: 457; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 458; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,1] 459; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 460; AVX-NEXT: retq 461 %splatx = shufflevector <2 x double> %vx, <2 x double> undef, <2 x i32> <i32 0, i32 0> 462 %splaty = shufflevector <2 x double> %vy, <2 x double> undef, <2 x i32> <i32 1, i32 1> 463 %r = fadd <2 x double> %splatx, %splaty 464 ret <2 x double> %r 465} 466 467; Negative test - non-splat. 468 469define <2 x double> @fadd_splat_nonsplat_v2f64(<2 x double> %vx, <2 x double> %vy) { 470; SSE-LABEL: fadd_splat_nonsplat_v2f64: 471; SSE: # %bb.0: 472; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] 473; SSE-NEXT: addpd %xmm1, %xmm0 474; SSE-NEXT: retq 475; 476; AVX-LABEL: fadd_splat_nonsplat_v2f64: 477; AVX: # %bb.0: 478; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 479; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 480; AVX-NEXT: retq 481 %splatx = shufflevector <2 x double> %vx, <2 x double> undef, <2 x i32> <i32 0, i32 0> 482 %splaty = shufflevector <2 x double> %vy, <2 x double> undef, <2 x i32> <i32 0, i32 1> 483 %r = fadd <2 x double> %splatx, %splaty 484 ret <2 x double> %r 485} 486 487; Negative test - non-FP. 488 489define <2 x i64> @add_splat_splat_v2i64(<2 x i64> %vx, <2 x i64> %vy) { 490; SSE-LABEL: add_splat_splat_v2i64: 491; SSE: # %bb.0: 492; SSE-NEXT: paddq %xmm1, %xmm0 493; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 494; SSE-NEXT: retq 495; 496; AVX-LABEL: add_splat_splat_v2i64: 497; AVX: # %bb.0: 498; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 499; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 500; AVX-NEXT: retq 501 %splatx = shufflevector <2 x i64> %vx, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 502 %splaty = shufflevector <2 x i64> %vy, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 503 %r = add <2 x i64> %splatx, %splaty 504 ret <2 x i64> %r 505} 506 507define <2 x double> @fadd_splat_const_op1_v2f64(<2 x double> %vx) { 508; SSE-LABEL: fadd_splat_const_op1_v2f64: 509; SSE: # %bb.0: 510; SSE-NEXT: addsd {{.*}}(%rip), %xmm0 511; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] 512; SSE-NEXT: retq 513; 514; AVX-LABEL: fadd_splat_const_op1_v2f64: 515; AVX: # %bb.0: 516; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 517; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 518; AVX-NEXT: retq 519 %splatx = shufflevector <2 x double> %vx, <2 x double> undef, <2 x i32> zeroinitializer 520 %r = fadd <2 x double> %splatx, <double 42.0, double 42.0> 521 ret <2 x double> %r 522} 523 524define <4 x double> @fsub_const_op0_splat_v4f64(double %x) { 525; SSE-LABEL: fsub_const_op0_splat_v4f64: 526; SSE: # %bb.0: 527; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 528; SSE-NEXT: subsd %xmm0, %xmm1 529; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0] 530; SSE-NEXT: movapd %xmm1, %xmm0 531; SSE-NEXT: retq 532; 533; AVX-LABEL: fsub_const_op0_splat_v4f64: 534; AVX: # %bb.0: 535; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 536; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 537; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 538; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 539; AVX-NEXT: retq 540 %vx = insertelement <4 x double> undef, double 8.0, i32 0 541 %vy = insertelement <4 x double> undef, double %x, i32 0 542 %splatx = shufflevector <4 x double> %vx, <4 x double> undef, <4 x i32> zeroinitializer 543 %splaty = shufflevector <4 x double> %vy, <4 x double> undef, <4 x i32> zeroinitializer 544 %r = fsub <4 x double> %splatx, %splaty 545 ret <4 x double> %r 546} 547 548define <4 x float> @fmul_splat_const_op1_v4f32(<4 x float> %vx, <4 x float> %vy) { 549; SSE-LABEL: fmul_splat_const_op1_v4f32: 550; SSE: # %bb.0: 551; SSE-NEXT: mulss {{.*}}(%rip), %xmm0 552; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 553; SSE-NEXT: retq 554; 555; AVX-LABEL: fmul_splat_const_op1_v4f32: 556; AVX: # %bb.0: 557; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 558; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 559; AVX-NEXT: retq 560 %splatx = shufflevector <4 x float> %vx, <4 x float> undef, <4 x i32> zeroinitializer 561 %r = fmul fast <4 x float> %splatx, <float 17.0, float 17.0, float 17.0, float 17.0> 562 ret <4 x float> %r 563} 564 565define <8 x float> @fdiv_splat_const_op0_v8f32(<8 x float> %vy) { 566; SSE-LABEL: fdiv_splat_const_op0_v8f32: 567; SSE: # %bb.0: 568; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 569; SSE-NEXT: divss %xmm0, %xmm1 570; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0] 571; SSE-NEXT: movaps %xmm1, %xmm0 572; SSE-NEXT: retq 573; 574; AVX-LABEL: fdiv_splat_const_op0_v8f32: 575; AVX: # %bb.0: 576; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 577; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 578; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 579; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 580; AVX-NEXT: retq 581 %splatx = shufflevector <8 x float> <float 4.5, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>, <8 x float> undef, <8 x i32> zeroinitializer 582 %splaty = shufflevector <8 x float> %vy, <8 x float> undef, <8 x i32> zeroinitializer 583 %r = fdiv fast <8 x float> %splatx, %splaty 584 ret <8 x float> %r 585} 586 587define <8 x float> @fdiv_const_op1_splat_v8f32(<8 x float> %vx) { 588; SSE-LABEL: fdiv_const_op1_splat_v8f32: 589; SSE: # %bb.0: 590; SSE-NEXT: xorps %xmm1, %xmm1 591; SSE-NEXT: divss %xmm1, %xmm0 592; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 593; SSE-NEXT: movaps %xmm0, %xmm1 594; SSE-NEXT: retq 595; 596; AVX-LABEL: fdiv_const_op1_splat_v8f32: 597; AVX: # %bb.0: 598; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 599; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 600; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 601; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 602; AVX-NEXT: retq 603 %splatx = shufflevector <8 x float> %vx, <8 x float> undef, <8 x i32> zeroinitializer 604 %splaty = shufflevector <8 x float> <float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>, <8 x float> undef, <8 x i32> zeroinitializer 605 %r = fdiv fast <8 x float> %splatx, %splaty 606 ret <8 x float> %r 607} 608 609define <2 x double> @splat0_fadd_v2f64(<2 x double> %vx, <2 x double> %vy) { 610; SSE-LABEL: splat0_fadd_v2f64: 611; SSE: # %bb.0: 612; SSE-NEXT: addsd %xmm1, %xmm0 613; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] 614; SSE-NEXT: retq 615; 616; AVX-LABEL: splat0_fadd_v2f64: 617; AVX: # %bb.0: 618; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 619; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 620; AVX-NEXT: retq 621 %b = fadd <2 x double> %vx, %vy 622 %r = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer 623 ret <2 x double> %r 624} 625 626define <4 x double> @splat0_fsub_v4f64(double %x, double %y) { 627; SSE-LABEL: splat0_fsub_v4f64: 628; SSE: # %bb.0: 629; SSE-NEXT: subsd %xmm1, %xmm0 630; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] 631; SSE-NEXT: movapd %xmm0, %xmm1 632; SSE-NEXT: retq 633; 634; AVX-LABEL: splat0_fsub_v4f64: 635; AVX: # %bb.0: 636; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 637; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 638; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 639; AVX-NEXT: retq 640 %vx = insertelement <4 x double> undef, double %x, i32 0 641 %vy = insertelement <4 x double> undef, double %y, i32 0 642 %b = fsub <4 x double> %vx, %vy 643 %r = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer 644 ret <4 x double> %r 645} 646 647define <4 x float> @splat0_fmul_v4f32(<4 x float> %vx, <4 x float> %vy) { 648; SSE-LABEL: splat0_fmul_v4f32: 649; SSE: # %bb.0: 650; SSE-NEXT: mulss %xmm1, %xmm0 651; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 652; SSE-NEXT: retq 653; 654; AVX-LABEL: splat0_fmul_v4f32: 655; AVX: # %bb.0: 656; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 657; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 658; AVX-NEXT: retq 659 %b = fmul fast <4 x float> %vx, %vy 660 %r = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer 661 ret <4 x float> %r 662} 663 664define <8 x float> @splat0_fdiv_v8f32(<8 x float> %vx, <8 x float> %vy) { 665; SSE-LABEL: splat0_fdiv_v8f32: 666; SSE: # %bb.0: 667; SSE-NEXT: divss %xmm2, %xmm0 668; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 669; SSE-NEXT: movaps %xmm0, %xmm1 670; SSE-NEXT: retq 671; 672; AVX-LABEL: splat0_fdiv_v8f32: 673; AVX: # %bb.0: 674; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 675; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 676; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 677; AVX-NEXT: retq 678 %b = fdiv fast <8 x float> %vx, %vy 679 %r = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer 680 ret <8 x float> %r 681} 682 683define <2 x double> @splat0_fadd_const_op1_v2f64(<2 x double> %vx) { 684; SSE-LABEL: splat0_fadd_const_op1_v2f64: 685; SSE: # %bb.0: 686; SSE-NEXT: addsd {{.*}}(%rip), %xmm0 687; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0] 688; SSE-NEXT: retq 689; 690; AVX-LABEL: splat0_fadd_const_op1_v2f64: 691; AVX: # %bb.0: 692; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 693; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 694; AVX-NEXT: retq 695 %b = fadd <2 x double> %vx, <double 42.0, double 12.0> 696 %r = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer 697 ret <2 x double> %r 698} 699 700define <4 x double> @splat0_fsub_const_op0_v4f64(double %x) { 701; SSE-LABEL: splat0_fsub_const_op0_v4f64: 702; SSE: # %bb.0: 703; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 704; SSE-NEXT: subsd %xmm0, %xmm1 705; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0] 706; SSE-NEXT: movapd %xmm1, %xmm0 707; SSE-NEXT: retq 708; 709; AVX-LABEL: splat0_fsub_const_op0_v4f64: 710; AVX: # %bb.0: 711; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 712; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 713; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 714; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 715; AVX-NEXT: retq 716 %vx = insertelement <4 x double> undef, double %x, i32 0 717 %b = fsub <4 x double> <double -42.0, double 42.0, double 0.0, double 1.0>, %vx 718 %r = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer 719 ret <4 x double> %r 720} 721 722define <4 x float> @splat0_fmul_const_op1_v4f32(<4 x float> %vx) { 723; SSE-LABEL: splat0_fmul_const_op1_v4f32: 724; SSE: # %bb.0: 725; SSE-NEXT: mulss {{.*}}(%rip), %xmm0 726; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 727; SSE-NEXT: retq 728; 729; AVX-LABEL: splat0_fmul_const_op1_v4f32: 730; AVX: # %bb.0: 731; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 732; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 733; AVX-NEXT: retq 734 %b = fmul fast <4 x float> %vx, <float 6.0, float -1.0, float 1.0, float 7.0> 735 %r = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer 736 ret <4 x float> %r 737} 738 739define <8 x float> @splat0_fdiv_const_op1_v8f32(<8 x float> %vx) { 740; SSE-LABEL: splat0_fdiv_const_op1_v8f32: 741; SSE: # %bb.0: 742; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 743; SSE-NEXT: movaps %xmm0, %xmm1 744; SSE-NEXT: retq 745; 746; AVX-LABEL: splat0_fdiv_const_op1_v8f32: 747; AVX: # %bb.0: 748; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 749; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 750; AVX-NEXT: retq 751 %b = fdiv fast <8 x float> %vx, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0> 752 %r = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer 753 ret <8 x float> %r 754} 755 756define <8 x float> @splat0_fdiv_const_op0_v8f32(<8 x float> %vx) { 757; SSE-LABEL: splat0_fdiv_const_op0_v8f32: 758; SSE: # %bb.0: 759; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 760; SSE-NEXT: divss %xmm0, %xmm1 761; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0] 762; SSE-NEXT: movaps %xmm1, %xmm0 763; SSE-NEXT: retq 764; 765; AVX-LABEL: splat0_fdiv_const_op0_v8f32: 766; AVX: # %bb.0: 767; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 768; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 769; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 770; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 771; AVX-NEXT: retq 772 %b = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %vx 773 %r = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer 774 ret <8 x float> %r 775} 776 777define <4 x float> @multi_use_binop(<4 x float> %x, <4 x float> %y) { 778; SSE-LABEL: multi_use_binop: 779; SSE: # %bb.0: 780; SSE-NEXT: mulps %xmm1, %xmm0 781; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 782; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 783; SSE-NEXT: addps %xmm1, %xmm0 784; SSE-NEXT: retq 785; 786; AVX-LABEL: multi_use_binop: 787; AVX: # %bb.0: 788; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 789; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 790; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 791; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm0 792; AVX-NEXT: retq 793 %mul = fmul <4 x float> %x, %y 794 %mul0 = shufflevector <4 x float> %mul, <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 0> 795 %mul1 = shufflevector <4 x float> %mul, <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 1> 796 %r = fadd <4 x float> %mul0, %mul1 797 ret <4 x float> %r 798} 799