1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+fma | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma | FileCheck %s --check-prefix=X64 4 5define float @f1(float %a, float %b, float %c) { 6; X86-LABEL: f1: 7; X86: # %bb.0: 8; X86-NEXT: pushl %eax 9; X86-NEXT: .cfi_def_cfa_offset 8 10; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 11; X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 12; X86-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm0 * xmm1) + mem 13; X86-NEXT: vmovss %xmm1, (%esp) 14; X86-NEXT: flds (%esp) 15; X86-NEXT: popl %eax 16; X86-NEXT: .cfi_def_cfa_offset 4 17; X86-NEXT: retl 18; 19; X64-LABEL: f1: 20; X64: # %bb.0: 21; X64-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 22; X64-NEXT: retq 23 %mul = fmul fast float %b, %a 24 %add = fadd fast float %mul, %c 25 ret float %add 26} 27 28define float @f2(float %a, float %b, float %c) { 29; X86-LABEL: f2: 30; X86: # %bb.0: 31; X86-NEXT: pushl %eax 32; X86-NEXT: .cfi_def_cfa_offset 8 33; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 34; X86-NEXT: vmulss {{[0-9]+}}(%esp), %xmm0, %xmm0 35; X86-NEXT: #ARITH_FENCE 36; X86-NEXT: vaddss {{[0-9]+}}(%esp), %xmm0, %xmm0 37; X86-NEXT: vmovss %xmm0, (%esp) 38; X86-NEXT: flds (%esp) 39; X86-NEXT: popl %eax 40; X86-NEXT: .cfi_def_cfa_offset 4 41; X86-NEXT: retl 42; 43; X64-LABEL: f2: 44; X64: # %bb.0: 45; X64-NEXT: vmulss %xmm0, %xmm1, %xmm0 46; X64-NEXT: #ARITH_FENCE 47; X64-NEXT: vaddss %xmm2, %xmm0, %xmm0 48; X64-NEXT: retq 49 %mul = fmul fast float %b, %a 50 %tmp = call float @llvm.arithmetic.fence.f32(float %mul) 51 %add = fadd fast float %tmp, %c 52 ret float %add 53} 54 55define double @f3(double %a) { 56; X86-LABEL: f3: 57; X86: # %bb.0: 58; X86-NEXT: pushl %ebp 59; X86-NEXT: .cfi_def_cfa_offset 8 60; X86-NEXT: .cfi_offset %ebp, -8 61; X86-NEXT: movl %esp, %ebp 62; X86-NEXT: .cfi_def_cfa_register %ebp 63; X86-NEXT: andl $-8, %esp 64; X86-NEXT: subl $8, %esp 65; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 66; X86-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 67; X86-NEXT: vmovsd %xmm0, (%esp) 68; X86-NEXT: fldl (%esp) 69; X86-NEXT: movl %ebp, %esp 70; X86-NEXT: popl %ebp 71; X86-NEXT: .cfi_def_cfa %esp, 4 72; X86-NEXT: retl 73; 74; X64-LABEL: f3: 75; X64: # %bb.0: 76; X64-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 77; X64-NEXT: retq 78 %1 = fadd fast double %a, %a 79 %2 = fadd fast double %a, %a 80 %3 = fadd fast double %1, %2 81 ret double %3 82} 83 84define double @f4(double %a) { 85; X86-LABEL: f4: 86; X86: # %bb.0: 87; X86-NEXT: pushl %ebp 88; X86-NEXT: .cfi_def_cfa_offset 8 89; X86-NEXT: .cfi_offset %ebp, -8 90; X86-NEXT: movl %esp, %ebp 91; X86-NEXT: .cfi_def_cfa_register %ebp 92; X86-NEXT: andl $-8, %esp 93; X86-NEXT: subl $8, %esp 94; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 95; X86-NEXT: vaddsd %xmm0, %xmm0, %xmm0 96; X86-NEXT: vmovapd %xmm0, %xmm1 97; X86-NEXT: #ARITH_FENCE 98; X86-NEXT: vaddsd %xmm0, %xmm1, %xmm0 99; X86-NEXT: vmovsd %xmm0, (%esp) 100; X86-NEXT: fldl (%esp) 101; X86-NEXT: movl %ebp, %esp 102; X86-NEXT: popl %ebp 103; X86-NEXT: .cfi_def_cfa %esp, 4 104; X86-NEXT: retl 105; 106; X64-LABEL: f4: 107; X64: # %bb.0: 108; X64-NEXT: vaddsd %xmm0, %xmm0, %xmm0 109; X64-NEXT: vmovapd %xmm0, %xmm1 110; X64-NEXT: #ARITH_FENCE 111; X64-NEXT: vaddsd %xmm0, %xmm1, %xmm0 112; X64-NEXT: retq 113 %1 = fadd fast double %a, %a 114 %t = call double @llvm.arithmetic.fence.f64(double %1) 115 %2 = fadd fast double %a, %a 116 %3 = fadd fast double %t, %2 117 ret double %3 118} 119 120define <2 x float> @f5(<2 x float> %a) { 121; X86-LABEL: f5: 122; X86: # %bb.0: 123; X86-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 124; X86-NEXT: retl 125; 126; X64-LABEL: f5: 127; X64: # %bb.0: 128; X64-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 129; X64-NEXT: retq 130 %1 = fadd fast <2 x float> %a, %a 131 %2 = fadd fast <2 x float> %a, %a 132 %3 = fadd fast <2 x float> %1, %2 133 ret <2 x float> %3 134} 135 136define <2 x float> @f6(<2 x float> %a) { 137; X86-LABEL: f6: 138; X86: # %bb.0: 139; X86-NEXT: vaddps %xmm0, %xmm0, %xmm0 140; X86-NEXT: vmovaps %xmm0, %xmm1 141; X86-NEXT: #ARITH_FENCE 142; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 143; X86-NEXT: retl 144; 145; X64-LABEL: f6: 146; X64: # %bb.0: 147; X64-NEXT: vaddps %xmm0, %xmm0, %xmm0 148; X64-NEXT: vmovaps %xmm0, %xmm1 149; X64-NEXT: #ARITH_FENCE 150; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 151; X64-NEXT: retq 152 %1 = fadd fast <2 x float> %a, %a 153 %t = call <2 x float> @llvm.arithmetic.fence.v2f32(<2 x float> %1) 154 %2 = fadd fast <2 x float> %a, %a 155 %3 = fadd fast <2 x float> %t, %2 156 ret <2 x float> %3 157} 158 159declare float @llvm.arithmetic.fence.f32(float) 160declare double @llvm.arithmetic.fence.f64(double) 161declare <2 x float> @llvm.arithmetic.fence.v2f32(<2 x float>) 162