1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
2 // RUN: -fallow-half-arguments-and-returns -flax-vector-conversions=none -S -disable-O0-optnone -emit-llvm -o - %s \
3 // RUN: | opt -S -mem2reg \
4 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s
5 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
6 // RUN: -ffp-exception-behavior=strict \
7 // RUN: -fexperimental-strict-floating-point \
8 // RUN: -fallow-half-arguments-and-returns -flax-vector-conversions=none -S -disable-O0-optnone -emit-llvm -o - %s \
9 // RUN: | opt -S -mem2reg \
10 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s
11 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
12 // RUN: -fallow-half-arguments-and-returns -flax-vector-conversions=none -S -disable-O0-optnone -emit-llvm -o - %s \
13 // RUN: | opt -S -mem2reg | llc -o=- - \
14 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s
15 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
16 // RUN: -ffp-exception-behavior=strict \
17 // RUN: -fexperimental-strict-floating-point \
18 // RUN: -fallow-half-arguments-and-returns -flax-vector-conversions=none -S -disable-O0-optnone -emit-llvm -o - %s \
19 // RUN: | opt -S -mem2reg | llc -o=- - \
20 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s
21
22 // REQUIRES: aarch64-registered-target
23
24 #include <arm_neon.h>
25
26 // COMMON-LABEL: test_vsqrt_f16
27 // UNCONSTRAINED: [[SQR:%.*]] = call <4 x half> @llvm.sqrt.v4f16(<4 x half> %a)
28 // CONSTRAINED: [[SQR:%.*]] = call <4 x half> @llvm.experimental.constrained.sqrt.v4f16(<4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
29 // CHECK-ASM: fsqrt v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
30 // COMMONIR: ret <4 x half> [[SQR]]
test_vsqrt_f16(float16x4_t a)31 float16x4_t test_vsqrt_f16(float16x4_t a) {
32 return vsqrt_f16(a);
33 }
34
35 // COMMON-LABEL: test_vsqrtq_f16
36 // UNCONSTRAINED: [[SQR:%.*]] = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a)
37 // CONSTRAINED: [[SQR:%.*]] = call <8 x half> @llvm.experimental.constrained.sqrt.v8f16(<8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
38 // CHECK-ASM: fsqrt v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
39 // COMMONIR: ret <8 x half> [[SQR]]
test_vsqrtq_f16(float16x8_t a)40 float16x8_t test_vsqrtq_f16(float16x8_t a) {
41 return vsqrtq_f16(a);
42 }
43
44 // COMMON-LABEL: test_vfma_f16
45 // UNCONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a)
46 // CONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
47 // CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
48 // COMMONIR: ret <4 x half> [[ADD]]
test_vfma_f16(float16x4_t a,float16x4_t b,float16x4_t c)49 float16x4_t test_vfma_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
50 return vfma_f16(a, b, c);
51 }
52
53 // COMMON-LABEL: test_vfmaq_f16
54 // UNCONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
55 // CONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
56 // CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
57 // COMMONIR: ret <8 x half> [[ADD]]
test_vfmaq_f16(float16x8_t a,float16x8_t b,float16x8_t c)58 float16x8_t test_vfmaq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
59 return vfmaq_f16(a, b, c);
60 }
61
62 // COMMON-LABEL: test_vfms_f16
63 // COMMONIR: [[SUB:%.*]] = fneg <4 x half> %b
64 // CHECK-ASM: fneg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
65 // UNCONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a)
66 // CONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
67 // CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
68 // COMMONIR: ret <4 x half> [[ADD]]
test_vfms_f16(float16x4_t a,float16x4_t b,float16x4_t c)69 float16x4_t test_vfms_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
70 return vfms_f16(a, b, c);
71 }
72
73 // COMMON-LABEL: test_vfmsq_f16
74 // COMMONIR: [[SUB:%.*]] = fneg <8 x half> %b
75 // CHECK-ASM: fneg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
76 // UNCONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a)
77 // CONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
78 // CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
79 // COMMONIR: ret <8 x half> [[ADD]]
test_vfmsq_f16(float16x8_t a,float16x8_t b,float16x8_t c)80 float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
81 return vfmsq_f16(a, b, c);
82 }
83
84 // COMMON-LABEL: test_vfma_lane_f16
85 // COMMONIR: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
86 // COMMONIR: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
87 // COMMONIR: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
88 // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
89 // COMMONIR: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
90 // COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
91 // COMMONIR: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
92 // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]])
93 // CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict")
94 // CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
95 // COMMONIR: ret <4 x half> [[FMLA]]
test_vfma_lane_f16(float16x4_t a,float16x4_t b,float16x4_t c)96 float16x4_t test_vfma_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
97 return vfma_lane_f16(a, b, c, 3);
98 }
99
100 // COMMON-LABEL: test_vfmaq_lane_f16
101 // COMMONIR: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
102 // COMMONIR: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
103 // COMMONIR: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
104 // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
105 // COMMONIR: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
106 // COMMONIR: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
107 // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
108 // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]])
109 // CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict")
110 // CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
111 // COMMONIR: ret <8 x half> [[FMLA]]
test_vfmaq_lane_f16(float16x8_t a,float16x8_t b,float16x4_t c)112 float16x8_t test_vfmaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
113 return vfmaq_lane_f16(a, b, c, 3);
114 }
115
116 // COMMON-LABEL: test_vfma_laneq_f16
117 // COMMONIR: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
118 // COMMONIR: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
119 // COMMONIR: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
120 // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
121 // COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
122 // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
123 // COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
124 // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]])
125 // CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
126 // CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
127 // COMMONIR: ret <4 x half> [[FMLA]]
test_vfma_laneq_f16(float16x4_t a,float16x4_t b,float16x8_t c)128 float16x4_t test_vfma_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
129 return vfma_laneq_f16(a, b, c, 7);
130 }
131
132 // COMMON-LABEL: test_vfmaq_laneq_f16
133 // COMMONIR: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
134 // COMMONIR: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
135 // COMMONIR: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
136 // COMMONIR: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
137 // COMMONIR: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
138 // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
139 // COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
140 // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]])
141 // CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
142 // CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
143 // COMMONIR: ret <8 x half> [[FMLA]]
test_vfmaq_laneq_f16(float16x8_t a,float16x8_t b,float16x8_t c)144 float16x8_t test_vfmaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
145 return vfmaq_laneq_f16(a, b, c, 7);
146 }
147
148 // COMMON-LABEL: test_vfma_n_f16
149 // COMMONIR: [[TMP0:%.*]] = insertelement <4 x half> undef, half %c, i32 0
150 // COMMONIR: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %c, i32 1
151 // COMMONIR: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %c, i32 2
152 // COMMONIR: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %c, i32 3
153 // UNCONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> [[TMP3]], <4 x half> %a)
154 // CONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %b, <4 x half> [[TMP3]], <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
155 // CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
156 // COMMONIR: ret <4 x half> [[FMA]]
test_vfma_n_f16(float16x4_t a,float16x4_t b,float16_t c)157 float16x4_t test_vfma_n_f16(float16x4_t a, float16x4_t b, float16_t c) {
158 return vfma_n_f16(a, b, c);
159 }
160
161 // COMMON-LABEL: test_vfmaq_n_f16
162 // COMMONIR: [[TMP0:%.*]] = insertelement <8 x half> undef, half %c, i32 0
163 // COMMONIR: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %c, i32 1
164 // COMMONIR: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %c, i32 2
165 // COMMONIR: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %c, i32 3
166 // COMMONIR: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %c, i32 4
167 // COMMONIR: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %c, i32 5
168 // COMMONIR: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %c, i32 6
169 // COMMONIR: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %c, i32 7
170 // UNCONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> [[TMP7]], <8 x half> %a)
171 // CONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %b, <8 x half> [[TMP7]], <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
172 // CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
173 // COMMONIR: ret <8 x half> [[FMA]]
test_vfmaq_n_f16(float16x8_t a,float16x8_t b,float16_t c)174 float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
175 return vfmaq_n_f16(a, b, c);
176 }
177
178 // COMMON-LABEL: test_vfmah_lane_f16
179 // COMMONIR: [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3
180 // UNCONSTRAINED: [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a)
181 // CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half %b, half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
182 // CHECK-ASM: fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}]
183 // COMMONIR: ret half [[FMA]]
test_vfmah_lane_f16(float16_t a,float16_t b,float16x4_t c)184 float16_t test_vfmah_lane_f16(float16_t a, float16_t b, float16x4_t c) {
185 return vfmah_lane_f16(a, b, c, 3);
186 }
187
188 // COMMON-LABEL: test_vfmah_laneq_f16
189 // COMMONIR: [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7
190 // UNCONSTRAINED: [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a)
191 // CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half %b, half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
192 // CHECK-ASM: fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}]
193 // COMMONIR: ret half [[FMA]]
test_vfmah_laneq_f16(float16_t a,float16_t b,float16x8_t c)194 float16_t test_vfmah_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
195 return vfmah_laneq_f16(a, b, c, 7);
196 }
197
198 // COMMON-LABEL: test_vfms_lane_f16
199 // COMMONIR: [[SUB:%.*]] = fneg <4 x half> %b
200 // COMMONIR: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
201 // COMMONIR: [[TMP1:%.*]] = bitcast <4 x half> [[SUB]] to <8 x i8>
202 // COMMONIR: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
203 // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
204 // COMMONIR: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
205 // COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
206 // COMMONIR: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
207 // UNCONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]])
208 // CONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict")
209 // CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
210 // COMMONIR: ret <4 x half> [[FMA]]
test_vfms_lane_f16(float16x4_t a,float16x4_t b,float16x4_t c)211 float16x4_t test_vfms_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
212 return vfms_lane_f16(a, b, c, 3);
213 }
214
215 // COMMON-LABEL: test_vfmsq_lane_f16
216 // COMMONIR: [[SUB:%.*]] = fneg <8 x half> %b
217 // COMMONIR: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
218 // COMMONIR: [[TMP1:%.*]] = bitcast <8 x half> [[SUB]] to <16 x i8>
219 // COMMONIR: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
220 // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
221 // COMMONIR: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
222 // COMMONIR: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
223 // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
224 // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]])
225 // CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict")
226 // CHECK-ASM: fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
227 // COMMONIR: ret <8 x half> [[FMLA]]
test_vfmsq_lane_f16(float16x8_t a,float16x8_t b,float16x4_t c)228 float16x8_t test_vfmsq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
229 return vfmsq_lane_f16(a, b, c, 3);
230 }
231
232 // COMMON-LABEL: test_vfms_laneq_f16
233 // COMMONIR: [[SUB:%.*]] = fneg <4 x half> %b
234 // CHECK-ASM-NOT: fneg
235 // COMMONIR: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
236 // COMMONIR: [[TMP1:%.*]] = bitcast <4 x half> [[SUB]] to <8 x i8>
237 // COMMONIR: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
238 // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
239 // COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
240 // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
241 // COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
242 // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]])
243 // CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
244 // CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
245 // COMMONIR: ret <4 x half> [[FMLA]]
test_vfms_laneq_f16(float16x4_t a,float16x4_t b,float16x8_t c)246 float16x4_t test_vfms_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
247 return vfms_laneq_f16(a, b, c, 7);
248 }
249
250 // COMMON-LABEL: test_vfmsq_laneq_f16
251 // COMMONIR: [[SUB:%.*]] = fneg <8 x half> %b
252 // CHECK-ASM-NOT: fneg
253 // COMMONIR: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
254 // COMMONIR: [[TMP1:%.*]] = bitcast <8 x half> [[SUB]] to <16 x i8>
255 // COMMONIR: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
256 // COMMONIR: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
257 // COMMONIR: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
258 // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
259 // COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
260 // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]])
261 // CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
262 // CHECK-ASM: fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
263 // COMMONIR: ret <8 x half> [[FMLA]]
test_vfmsq_laneq_f16(float16x8_t a,float16x8_t b,float16x8_t c)264 float16x8_t test_vfmsq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
265 return vfmsq_laneq_f16(a, b, c, 7);
266 }
267
268 // COMMON-LABEL: test_vfms_n_f16
269 // COMMONIR: [[SUB:%.*]] = fneg <4 x half> %b
270 // COMMONIR: [[TMP0:%.*]] = insertelement <4 x half> undef, half %c, i32 0
271 // COMMONIR: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %c, i32 1
272 // COMMONIR: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %c, i32 2
273 // COMMONIR: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %c, i32 3
274 // UNCONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> [[TMP3]], <4 x half> %a)
275 // CONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[SUB]], <4 x half> [[TMP3]], <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
276 // CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
277 // COMMONIR: ret <4 x half> [[FMA]]
test_vfms_n_f16(float16x4_t a,float16x4_t b,float16_t c)278 float16x4_t test_vfms_n_f16(float16x4_t a, float16x4_t b, float16_t c) {
279 return vfms_n_f16(a, b, c);
280 }
281
282 // COMMON-LABEL: test_vfmsq_n_f16
283 // COMMONIR: [[SUB:%.*]] = fneg <8 x half> %b
284 // COMMONIR: [[TMP0:%.*]] = insertelement <8 x half> undef, half %c, i32 0
285 // COMMONIR: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %c, i32 1
286 // COMMONIR: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %c, i32 2
287 // COMMONIR: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %c, i32 3
288 // COMMONIR: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %c, i32 4
289 // COMMONIR: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %c, i32 5
290 // COMMONIR: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %c, i32 6
291 // COMMONIR: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %c, i32 7
292 // UNCONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> [[TMP7]], <8 x half> %a)
293 // CONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[SUB]], <8 x half> [[TMP7]], <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
294 // CHECK-ASM: fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
295 // COMMONIR: ret <8 x half> [[FMA]]
test_vfmsq_n_f16(float16x8_t a,float16x8_t b,float16_t c)296 float16x8_t test_vfmsq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
297 return vfmsq_n_f16(a, b, c);
298 }
299
300 // COMMON-LABEL: test_vfmsh_lane_f16
301 // UNCONSTRAINED: [[TMP0:%.*]] = fpext half %b to float
302 // CONSTRAINED: [[TMP0:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half %b, metadata !"fpexcept.strict")
303 // CHECK-ASM: fcvt s{{[0-9]+}}, h{{[0-9]+}}
304 // COMMONIR: [[TMP1:%.*]] = fneg float [[TMP0]]
305 // CHECK-ASM: fneg s{{[0-9]+}}, s{{[0-9]+}}
306 // UNCONSTRAINED: [[SUB:%.*]] = fptrunc float [[TMP1]] to half
307 // CONSTRAINED: [[SUB:%.*]] = call half @llvm.experimental.constrained.fptrunc.f16.f32(float [[TMP1]], metadata !"round.tonearest", metadata !"fpexcept.strict")
308 // CHECK-ASM: fcvt h{{[0-9]+}}, s{{[0-9]+}}
309 // COMMONIR: [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3
310 // UNCONSTRAINED: [[FMA:%.*]] = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a)
311 // CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
312 // CHECK-ASM: fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}]
313 // COMMONIR: ret half [[FMA]]
test_vfmsh_lane_f16(float16_t a,float16_t b,float16x4_t c)314 float16_t test_vfmsh_lane_f16(float16_t a, float16_t b, float16x4_t c) {
315 return vfmsh_lane_f16(a, b, c, 3);
316 }
317
318 // COMMON-LABEL: test_vfmsh_laneq_f16
319 // UNCONSTRAINED: [[TMP0:%.*]] = fpext half %b to float
320 // CONSTRAINED: [[TMP0:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half %b, metadata !"fpexcept.strict")
321 // CHECK-ASM: fcvt s{{[0-9]+}}, h{{[0-9]+}}
322 // COMMONIR: [[TMP1:%.*]] = fneg float [[TMP0]]
323 // CHECK-ASM: fneg s{{[0-9]+}}, s{{[0-9]+}}
324 // UNCONSTRAINED: [[SUB:%.*]] = fptrunc float [[TMP1]] to half
325 // CONSTRAINED: [[SUB:%.*]] = call half @llvm.experimental.constrained.fptrunc.f16.f32(float [[TMP1]], metadata !"round.tonearest", metadata !"fpexcept.strict")
326 // CHECK-ASM: fcvt h{{[0-9]+}}, s{{[0-9]+}}
327 // COMMONIR: [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7
328 // UNCONSTRAINED: [[FMA:%.*]] = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a)
329 // CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
330 // CHECK-ASM: fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}]
331 // COMMONIR: ret half [[FMA]]
test_vfmsh_laneq_f16(float16_t a,float16_t b,float16x8_t c)332 float16_t test_vfmsh_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
333 return vfmsh_laneq_f16(a, b, c, 7);
334 }
335
336