1; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
2
3declare float @llvm.fma.f32(float, float, float)
4declare double @llvm.fma.f64(double, double, double)
5
6define float @test_fmla_ss4S(float %a, float %b, <4 x float> %v) {
7  ; CHECK: test_fmla_ss4S
8  ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3]
9  %tmp1 = extractelement <4 x float> %v, i32 3
10  %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
11  ret float %tmp2
12}
13
14define float @test_fmla_ss4S_swap(float %a, float %b, <4 x float> %v) {
15  ; CHECK: test_fmla_ss4S_swap
16  ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3]
17  %tmp1 = extractelement <4 x float> %v, i32 3
18  %tmp2 = call float @llvm.fma.f32(float %tmp1, float %a, float %a)
19  ret float %tmp2
20}
21
22define float @test_fmla_ss2S(float %a, float %b, <2 x float> %v) {
23  ; CHECK: test_fmla_ss2S
24  ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[1]
25  %tmp1 = extractelement <2 x float> %v, i32 1
26  %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
27  ret float %tmp2
28}
29
30define double @test_fmla_ddD(double %a, double %b, <1 x double> %v) {
31  ; CHECK: test_fmla_ddD
32  ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[0]
33  %tmp1 = extractelement <1 x double> %v, i32 0
34  %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
35  ret double %tmp2
36}
37
38define double @test_fmla_dd2D(double %a, double %b, <2 x double> %v) {
39  ; CHECK: test_fmla_dd2D
40  ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1]
41  %tmp1 = extractelement <2 x double> %v, i32 1
42  %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
43  ret double %tmp2
44}
45
46define double @test_fmla_dd2D_swap(double %a, double %b, <2 x double> %v) {
47  ; CHECK: test_fmla_dd2D_swap
48  ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1]
49  %tmp1 = extractelement <2 x double> %v, i32 1
50  %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a)
51  ret double %tmp2
52}
53
54define float @test_fmls_ss4S(float %a, float %b, <4 x float> %v) {
55  ; CHECK: test_fmls_ss4S
56  ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3]
57  %tmp1 = extractelement <4 x float> %v, i32 3
58  %tmp2 = fsub float -0.0, %tmp1
59  %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a)
60  ret float %tmp3
61}
62
63define float @test_fmls_ss4S_swap(float %a, float %b, <4 x float> %v) {
64  ; CHECK: test_fmls_ss4S_swap
65  ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3]
66  %tmp1 = extractelement <4 x float> %v, i32 3
67  %tmp2 = fsub float -0.0, %tmp1
68  %tmp3 = call float @llvm.fma.f32(float %tmp1, float %tmp2, float %a)
69  ret float %tmp3
70}
71
72
73define float @test_fmls_ss2S(float %a, float %b, <2 x float> %v) {
74  ; CHECK: test_fmls_ss2S
75  ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[1]
76  %tmp1 = extractelement <2 x float> %v, i32 1
77  %tmp2 = fsub float -0.0, %tmp1
78  %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a)
79  ret float %tmp3
80}
81
82define double @test_fmls_ddD(double %a, double %b, <1 x double> %v) {
83  ; CHECK: test_fmls_ddD
84  ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[0]
85  %tmp1 = extractelement <1 x double> %v, i32 0
86  %tmp2 = fsub double -0.0, %tmp1
87  %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a)
88  ret double %tmp3
89}
90
91define double @test_fmls_dd2D(double %a, double %b, <2 x double> %v) {
92  ; CHECK: test_fmls_dd2D
93  ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1]
94  %tmp1 = extractelement <2 x double> %v, i32 1
95  %tmp2 = fsub double -0.0, %tmp1
96  %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a)
97  ret double %tmp3
98}
99
100define double @test_fmls_dd2D_swap(double %a, double %b, <2 x double> %v) {
101  ; CHECK: test_fmls_dd2D_swap
102  ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1]
103  %tmp1 = extractelement <2 x double> %v, i32 1
104  %tmp2 = fsub double -0.0, %tmp1
105  %tmp3 = call double @llvm.fma.f64(double %tmp1, double %tmp2, double %a)
106  ret double %tmp3
107}
108
109