1 // RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - -emit-llvm %s | FileCheck %s
2 // Test ARM64 SIMD fused multiply add intrinsics
3 
4 #include <arm_neon.h>
5 
test_vfma_f32(float32x2_t a1,float32x2_t a2,float32x2_t a3)6 float32x2_t test_vfma_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
7   // CHECK: test_vfma_f32
8   return vfma_f32(a1, a2, a3);
9   // CHECK: llvm.fma.v2f32({{.*a2, .*a3, .*a1}})
10   // CHECK-NEXT: ret
11 }
12 
test_vfmaq_f32(float32x4_t a1,float32x4_t a2,float32x4_t a3)13 float32x4_t test_vfmaq_f32(float32x4_t a1, float32x4_t a2, float32x4_t a3) {
14   // CHECK: test_vfmaq_f32
15   return vfmaq_f32(a1, a2, a3);
16   // CHECK: llvm.fma.v4f32({{.*a2, .*a3, .*a1}})
17   // CHECK-NEXT: ret
18 }
19 
test_vfmaq_f64(float64x2_t a1,float64x2_t a2,float64x2_t a3)20 float64x2_t test_vfmaq_f64(float64x2_t a1, float64x2_t a2, float64x2_t a3) {
21   // CHECK: test_vfmaq_f64
22   return vfmaq_f64(a1, a2, a3);
23   // CHECK: llvm.fma.v2f64({{.*a2, .*a3, .*a1}})
24   // CHECK-NEXT: ret
25 }
26 
test_vfma_lane_f32(float32x2_t a1,float32x2_t a2,float32x2_t a3)27 float32x2_t test_vfma_lane_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
28   // CHECK: test_vfma_lane_f32
29   return vfma_lane_f32(a1, a2, a3, 1);
30   // NB: the test below is deliberately lose, so that we don't depend too much
31   // upon the exact IR used to select lane 1 (usually a shufflevector)
32   // CHECK: llvm.fma.v2f32(<2 x float> %a2, <2 x float> {{.*}}, <2 x float> %a1)
33   // CHECK-NEXT: ret
34 }
35 
test_vfmaq_lane_f32(float32x4_t a1,float32x4_t a2,float32x2_t a3)36 float32x4_t test_vfmaq_lane_f32(float32x4_t a1, float32x4_t a2, float32x2_t a3) {
37   // CHECK: test_vfmaq_lane_f32
38   return vfmaq_lane_f32(a1, a2, a3, 1);
39   // NB: the test below is deliberately lose, so that we don't depend too much
40   // upon the exact IR used to select lane 1 (usually a shufflevector)
41   // CHECK: llvm.fma.v4f32(<4 x float> %a2, <4 x float> {{.*}}, <4 x float> %a1)
42   // CHECK-NEXT: ret
43 }
44 
test_vfmaq_lane_f64(float64x2_t a1,float64x2_t a2,float64x1_t a3)45 float64x2_t test_vfmaq_lane_f64(float64x2_t a1, float64x2_t a2, float64x1_t a3) {
46   // CHECK: test_vfmaq_lane_f64
47   return vfmaq_lane_f64(a1, a2, a3, 0);
48   // NB: the test below is deliberately lose, so that we don't depend too much
49   // upon the exact IR used to select lane 1 (usually a shufflevector)
50   // CHECK: llvm.fma.v2f64(<2 x double> %a2, <2 x double> {{.*}}, <2 x double> %a1)
51   // CHECK-NEXT: ret
52 }
53 
test_vfma_n_f32(float32x2_t a1,float32x2_t a2,float32_t a3)54 float32x2_t test_vfma_n_f32(float32x2_t a1, float32x2_t a2, float32_t a3) {
55   // CHECK: test_vfma_n_f32
56   return vfma_n_f32(a1, a2, a3);
57   // NB: the test below is deliberately lose, so that we don't depend too much
58   // upon the exact IR used to select lane 0 (usually two insertelements)
59   // CHECK: llvm.fma.v2f32
60   // CHECK-NEXT: ret
61 }
62 
test_vfmaq_n_f32(float32x4_t a1,float32x4_t a2,float32_t a3)63 float32x4_t test_vfmaq_n_f32(float32x4_t a1, float32x4_t a2, float32_t a3) {
64   // CHECK: test_vfmaq_n_f32
65   return vfmaq_n_f32(a1, a2, a3);
66   // NB: the test below is deliberately lose, so that we don't depend too much
67   // upon the exact IR used to select lane 0 (usually four insertelements)
68   // CHECK: llvm.fma.v4f32
69   // CHECK-NEXT: ret
70 }
71 
test_vfmaq_n_f64(float64x2_t a1,float64x2_t a2,float64_t a3)72 float64x2_t test_vfmaq_n_f64(float64x2_t a1, float64x2_t a2, float64_t a3) {
73   // CHECK: test_vfmaq_n_f64
74   return vfmaq_n_f64(a1, a2, a3);
75   // NB: the test below is deliberately lose, so that we don't depend too much
76   // upon the exact IR used to select lane 0 (usually two insertelements)
77   // CHECK: llvm.fma.v2f64
78   // CHECK-NEXT: ret
79 }
80 
test_vfms_f32(float32x2_t a1,float32x2_t a2,float32x2_t a3)81 float32x2_t test_vfms_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
82   // CHECK: test_vfms_f32
83   return vfms_f32(a1, a2, a3);
84   // CHECK: [[NEG:%.*]] = fsub <2 x float> {{.*}}, %a2
85   // CHECK: llvm.fma.v2f32(<2 x float> %a3, <2 x float> [[NEG]], <2 x float> %a1)
86   // CHECK-NEXT: ret
87 }
88 
test_vfmsq_f32(float32x4_t a1,float32x4_t a2,float32x4_t a3)89 float32x4_t test_vfmsq_f32(float32x4_t a1, float32x4_t a2, float32x4_t a3) {
90   // CHECK: test_vfmsq_f32
91   return vfmsq_f32(a1, a2, a3);
92   // CHECK: [[NEG:%.*]] = fsub <4 x float> {{.*}}, %a2
93   // CHECK: llvm.fma.v4f32(<4 x float> %a3, <4 x float> [[NEG]], <4 x float> %a1)
94   // CHECK-NEXT: ret
95 }
96 
test_vfmsq_f64(float64x2_t a1,float64x2_t a2,float64x2_t a3)97 float64x2_t test_vfmsq_f64(float64x2_t a1, float64x2_t a2, float64x2_t a3) {
98   // CHECK: test_vfmsq_f64
99   return vfmsq_f64(a1, a2, a3);
100   // CHECK: [[NEG:%.*]] = fsub <2 x double> {{.*}}, %a2
101   // CHECK: llvm.fma.v2f64(<2 x double> %a3, <2 x double> [[NEG]], <2 x double> %a1)
102   // CHECK-NEXT: ret
103 }
104 
test_vfms_lane_f32(float32x2_t a1,float32x2_t a2,float32x2_t a3)105 float32x2_t test_vfms_lane_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
106   // CHECK: test_vfms_lane_f32
107   return vfms_lane_f32(a1, a2, a3, 1);
108   // NB: the test below is deliberately lose, so that we don't depend too much
109   // upon the exact IR used to select lane 1 (usually a shufflevector)
110   // CHECK: [[NEG:%.*]] = fsub <2 x float> {{.*}}, %a3
111   // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[NEG]]
112   // CHECK: llvm.fma.v2f32(<2 x float> {{.*}}, <2 x float> [[LANE]], <2 x float> %a1)
113   // CHECK-NEXT: ret
114 }
115 
test_vfmsq_lane_f32(float32x4_t a1,float32x4_t a2,float32x2_t a3)116 float32x4_t test_vfmsq_lane_f32(float32x4_t a1, float32x4_t a2, float32x2_t a3) {
117   // CHECK: test_vfmsq_lane_f32
118   return vfmsq_lane_f32(a1, a2, a3, 1);
119   // NB: the test below is deliberately lose, so that we don't depend too much
120   // upon the exact IR used to select lane 1 (usually a shufflevector)
121   // CHECK: [[NEG:%.*]] = fsub <2 x float> {{.*}}, %a3
122   // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[NEG]]
123   // CHECK: llvm.fma.v4f32(<4 x float> {{.*}}, <4 x float> [[LANE]], <4 x float> %a1)
124   // CHECK-NEXT: ret
125 }
126 
test_vfmsq_lane_f64(float64x2_t a1,float64x2_t a2,float64x1_t a3)127 float64x2_t test_vfmsq_lane_f64(float64x2_t a1, float64x2_t a2, float64x1_t a3) {
128   // CHECK: test_vfmsq_lane_f64
129   return vfmsq_lane_f64(a1, a2, a3, 0);
130   // NB: the test below is deliberately lose, so that we don't depend too much
131   // upon the exact IR used to select lane 1 (usually a shufflevector)
132   // CHECK: [[NEG:%.*]] = fsub <1 x double> {{.*}}, %a3
133   // CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[NEG]]
134   // CHECK: llvm.fma.v2f64(<2 x double> {{.*}}, <2 x double> [[LANE]], <2 x double> %a1)
135   // CHECK-NEXT: ret
136 }
137