1 // RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - -emit-llvm %s | FileCheck %s
2 // Test ARM64 SIMD fused multiply add intrinsics
3
4 #include <arm_neon.h>
5
test_vfma_f32(float32x2_t a1,float32x2_t a2,float32x2_t a3)6 float32x2_t test_vfma_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
7 // CHECK: test_vfma_f32
8 return vfma_f32(a1, a2, a3);
9 // CHECK: llvm.fma.v2f32({{.*a2, .*a3, .*a1}})
10 // CHECK-NEXT: ret
11 }
12
test_vfmaq_f32(float32x4_t a1,float32x4_t a2,float32x4_t a3)13 float32x4_t test_vfmaq_f32(float32x4_t a1, float32x4_t a2, float32x4_t a3) {
14 // CHECK: test_vfmaq_f32
15 return vfmaq_f32(a1, a2, a3);
16 // CHECK: llvm.fma.v4f32({{.*a2, .*a3, .*a1}})
17 // CHECK-NEXT: ret
18 }
19
test_vfmaq_f64(float64x2_t a1,float64x2_t a2,float64x2_t a3)20 float64x2_t test_vfmaq_f64(float64x2_t a1, float64x2_t a2, float64x2_t a3) {
21 // CHECK: test_vfmaq_f64
22 return vfmaq_f64(a1, a2, a3);
23 // CHECK: llvm.fma.v2f64({{.*a2, .*a3, .*a1}})
24 // CHECK-NEXT: ret
25 }
26
test_vfma_lane_f32(float32x2_t a1,float32x2_t a2,float32x2_t a3)27 float32x2_t test_vfma_lane_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
28 // CHECK: test_vfma_lane_f32
29 return vfma_lane_f32(a1, a2, a3, 1);
30 // NB: the test below is deliberately lose, so that we don't depend too much
31 // upon the exact IR used to select lane 1 (usually a shufflevector)
32 // CHECK: llvm.fma.v2f32(<2 x float> %a2, <2 x float> {{.*}}, <2 x float> %a1)
33 // CHECK-NEXT: ret
34 }
35
test_vfmaq_lane_f32(float32x4_t a1,float32x4_t a2,float32x2_t a3)36 float32x4_t test_vfmaq_lane_f32(float32x4_t a1, float32x4_t a2, float32x2_t a3) {
37 // CHECK: test_vfmaq_lane_f32
38 return vfmaq_lane_f32(a1, a2, a3, 1);
39 // NB: the test below is deliberately lose, so that we don't depend too much
40 // upon the exact IR used to select lane 1 (usually a shufflevector)
41 // CHECK: llvm.fma.v4f32(<4 x float> %a2, <4 x float> {{.*}}, <4 x float> %a1)
42 // CHECK-NEXT: ret
43 }
44
test_vfmaq_lane_f64(float64x2_t a1,float64x2_t a2,float64x1_t a3)45 float64x2_t test_vfmaq_lane_f64(float64x2_t a1, float64x2_t a2, float64x1_t a3) {
46 // CHECK: test_vfmaq_lane_f64
47 return vfmaq_lane_f64(a1, a2, a3, 0);
48 // NB: the test below is deliberately lose, so that we don't depend too much
49 // upon the exact IR used to select lane 1 (usually a shufflevector)
50 // CHECK: llvm.fma.v2f64(<2 x double> %a2, <2 x double> {{.*}}, <2 x double> %a1)
51 // CHECK-NEXT: ret
52 }
53
test_vfma_n_f32(float32x2_t a1,float32x2_t a2,float32_t a3)54 float32x2_t test_vfma_n_f32(float32x2_t a1, float32x2_t a2, float32_t a3) {
55 // CHECK: test_vfma_n_f32
56 return vfma_n_f32(a1, a2, a3);
57 // NB: the test below is deliberately lose, so that we don't depend too much
58 // upon the exact IR used to select lane 0 (usually two insertelements)
59 // CHECK: llvm.fma.v2f32
60 // CHECK-NEXT: ret
61 }
62
test_vfmaq_n_f32(float32x4_t a1,float32x4_t a2,float32_t a3)63 float32x4_t test_vfmaq_n_f32(float32x4_t a1, float32x4_t a2, float32_t a3) {
64 // CHECK: test_vfmaq_n_f32
65 return vfmaq_n_f32(a1, a2, a3);
66 // NB: the test below is deliberately lose, so that we don't depend too much
67 // upon the exact IR used to select lane 0 (usually four insertelements)
68 // CHECK: llvm.fma.v4f32
69 // CHECK-NEXT: ret
70 }
71
test_vfmaq_n_f64(float64x2_t a1,float64x2_t a2,float64_t a3)72 float64x2_t test_vfmaq_n_f64(float64x2_t a1, float64x2_t a2, float64_t a3) {
73 // CHECK: test_vfmaq_n_f64
74 return vfmaq_n_f64(a1, a2, a3);
75 // NB: the test below is deliberately lose, so that we don't depend too much
76 // upon the exact IR used to select lane 0 (usually two insertelements)
77 // CHECK: llvm.fma.v2f64
78 // CHECK-NEXT: ret
79 }
80
test_vfms_f32(float32x2_t a1,float32x2_t a2,float32x2_t a3)81 float32x2_t test_vfms_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
82 // CHECK: test_vfms_f32
83 return vfms_f32(a1, a2, a3);
84 // CHECK: [[NEG:%.*]] = fsub <2 x float> {{.*}}, %a2
85 // CHECK: llvm.fma.v2f32(<2 x float> %a3, <2 x float> [[NEG]], <2 x float> %a1)
86 // CHECK-NEXT: ret
87 }
88
test_vfmsq_f32(float32x4_t a1,float32x4_t a2,float32x4_t a3)89 float32x4_t test_vfmsq_f32(float32x4_t a1, float32x4_t a2, float32x4_t a3) {
90 // CHECK: test_vfmsq_f32
91 return vfmsq_f32(a1, a2, a3);
92 // CHECK: [[NEG:%.*]] = fsub <4 x float> {{.*}}, %a2
93 // CHECK: llvm.fma.v4f32(<4 x float> %a3, <4 x float> [[NEG]], <4 x float> %a1)
94 // CHECK-NEXT: ret
95 }
96
test_vfmsq_f64(float64x2_t a1,float64x2_t a2,float64x2_t a3)97 float64x2_t test_vfmsq_f64(float64x2_t a1, float64x2_t a2, float64x2_t a3) {
98 // CHECK: test_vfmsq_f64
99 return vfmsq_f64(a1, a2, a3);
100 // CHECK: [[NEG:%.*]] = fsub <2 x double> {{.*}}, %a2
101 // CHECK: llvm.fma.v2f64(<2 x double> %a3, <2 x double> [[NEG]], <2 x double> %a1)
102 // CHECK-NEXT: ret
103 }
104
test_vfms_lane_f32(float32x2_t a1,float32x2_t a2,float32x2_t a3)105 float32x2_t test_vfms_lane_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
106 // CHECK: test_vfms_lane_f32
107 return vfms_lane_f32(a1, a2, a3, 1);
108 // NB: the test below is deliberately lose, so that we don't depend too much
109 // upon the exact IR used to select lane 1 (usually a shufflevector)
110 // CHECK: [[NEG:%.*]] = fsub <2 x float> {{.*}}, %a3
111 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[NEG]]
112 // CHECK: llvm.fma.v2f32(<2 x float> {{.*}}, <2 x float> [[LANE]], <2 x float> %a1)
113 // CHECK-NEXT: ret
114 }
115
test_vfmsq_lane_f32(float32x4_t a1,float32x4_t a2,float32x2_t a3)116 float32x4_t test_vfmsq_lane_f32(float32x4_t a1, float32x4_t a2, float32x2_t a3) {
117 // CHECK: test_vfmsq_lane_f32
118 return vfmsq_lane_f32(a1, a2, a3, 1);
119 // NB: the test below is deliberately lose, so that we don't depend too much
120 // upon the exact IR used to select lane 1 (usually a shufflevector)
121 // CHECK: [[NEG:%.*]] = fsub <2 x float> {{.*}}, %a3
122 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[NEG]]
123 // CHECK: llvm.fma.v4f32(<4 x float> {{.*}}, <4 x float> [[LANE]], <4 x float> %a1)
124 // CHECK-NEXT: ret
125 }
126
test_vfmsq_lane_f64(float64x2_t a1,float64x2_t a2,float64x1_t a3)127 float64x2_t test_vfmsq_lane_f64(float64x2_t a1, float64x2_t a2, float64x1_t a3) {
128 // CHECK: test_vfmsq_lane_f64
129 return vfmsq_lane_f64(a1, a2, a3, 0);
130 // NB: the test below is deliberately lose, so that we don't depend too much
131 // upon the exact IR used to select lane 1 (usually a shufflevector)
132 // CHECK: [[NEG:%.*]] = fsub <1 x double> {{.*}}, %a3
133 // CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[NEG]]
134 // CHECK: llvm.fma.v2f64(<2 x double> {{.*}}, <2 x double> [[LANE]], <2 x double> %a1)
135 // CHECK-NEXT: ret
136 }
137