1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
2 // RUN: -fallow-half-arguments-and-returns -flax-vector-conversions=none -S -disable-O0-optnone -emit-llvm -o - %s \
3 // RUN: | opt -S -mem2reg \
4 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s
5 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
6 // RUN: -ffp-exception-behavior=maytrap -DEXCEPT=1 \
7 // RUN: -fexperimental-strict-floating-point \
8 // RUN: -fallow-half-arguments-and-returns -flax-vector-conversions=none -S -disable-O0-optnone -emit-llvm -o - %s \
9 // RUN: | opt -S -mem2reg \
10 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s
11 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
12 // RUN: -fallow-half-arguments-and-returns -flax-vector-conversions=none -S -disable-O0-optnone -emit-llvm -o - %s \
13 // RUN: | opt -S -mem2reg | llc -o=- - \
14 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s
15 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
16 // RUN: -ffp-exception-behavior=maytrap -DEXCEPT=1 \
17 // RUN: -fexperimental-strict-floating-point \
18 // RUN: -fallow-half-arguments-and-returns -flax-vector-conversions=none -S -disable-O0-optnone -emit-llvm -o - %s \
19 // RUN: | opt -S -mem2reg | llc -o=- - \
20 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s
21 //
22 // REQUIRES: aarch64-registered-target
23 
24 // Test that the constrained intrinsics are picking up the exception
25 // metadata from the AST instead of the global default from the command line.
26 // FIXME: All cases of "fpexcept.maytrap" in this test are wrong.
27 
28 #if EXCEPT
29 #pragma float_control(except, on)
30 #endif
31 
32 #include <arm_neon.h>
33 
34 // COMMON-LABEL: test_vsqrt_f16
35 // UNCONSTRAINED:  [[SQR:%.*]] = call <4 x half> @llvm.sqrt.v4f16(<4 x half> %a)
36 // CONSTRAINED:    [[SQR:%.*]] = call <4 x half> @llvm.experimental.constrained.sqrt.v4f16(<4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
37 // CHECK-ASM:      fsqrt v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
38 // COMMONIR:       ret <4 x half> [[SQR]]
test_vsqrt_f16(float16x4_t a)39 float16x4_t test_vsqrt_f16(float16x4_t a) {
40   return vsqrt_f16(a);
41 }
42 
43 // COMMON-LABEL: test_vsqrtq_f16
44 // UNCONSTRAINED:  [[SQR:%.*]] = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a)
45 // CONSTRAINED:    [[SQR:%.*]] = call <8 x half> @llvm.experimental.constrained.sqrt.v8f16(<8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
46 // CHECK-ASM:      fsqrt v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
47 // COMMONIR:       ret <8 x half> [[SQR]]
test_vsqrtq_f16(float16x8_t a)48 float16x8_t test_vsqrtq_f16(float16x8_t a) {
49   return vsqrtq_f16(a);
50 }
51 
52 // COMMON-LABEL: test_vfma_f16
53 // UNCONSTRAINED:  [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a)
54 // CONSTRAINED:    [[ADD:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
55 // CHECK-ASM:      fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
56 // COMMONIR:       ret <4 x half> [[ADD]]
test_vfma_f16(float16x4_t a,float16x4_t b,float16x4_t c)57 float16x4_t test_vfma_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
58   return vfma_f16(a, b, c);
59 }
60 
61 // COMMON-LABEL: test_vfmaq_f16
62 // UNCONSTRAINED:  [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
63 // CONSTRAINED:    [[ADD:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
64 // CHECK-ASM:      fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
65 // COMMONIR:       ret <8 x half> [[ADD]]
test_vfmaq_f16(float16x8_t a,float16x8_t b,float16x8_t c)66 float16x8_t test_vfmaq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
67   return vfmaq_f16(a, b, c);
68 }
69 
70 // COMMON-LABEL: test_vfms_f16
71 // COMMONIR:       [[SUB:%.*]] = fneg <4 x half> %b
72 // UNCONSTRAINED:  [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a)
73 // CONSTRAINED:    [[ADD:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
74 // CHECK-ASM:      fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
75 // COMMONIR:       ret <4 x half> [[ADD]]
test_vfms_f16(float16x4_t a,float16x4_t b,float16x4_t c)76 float16x4_t test_vfms_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
77   return vfms_f16(a, b, c);
78 }
79 
80 // COMMON-LABEL: test_vfmsq_f16
81 // COMMONIR:       [[SUB:%.*]] = fneg <8 x half> %b
82 // UNCONSTRAINED:  [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a)
83 // CONSTRAINED:    [[ADD:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
84 // CHECK-ASM:      fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
85 // COMMONIR:       ret <8 x half> [[ADD]]
test_vfmsq_f16(float16x8_t a,float16x8_t b,float16x8_t c)86 float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
87   return vfmsq_f16(a, b, c);
88 }
89 
90 // COMMON-LABEL: test_vfma_lane_f16
91 // COMMONIR:      [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
92 // COMMONIR:      [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
93 // COMMONIR:      [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
94 // COMMONIR:      [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
95 // COMMONIR:      [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
96 // COMMONIR:      [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
97 // COMMONIR:      [[TMP5:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
98 // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]])
99 // CONSTRAINED:   [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.maytrap")
100 // CHECK-ASM:     fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
101 // COMMONIR:      ret <4 x half> [[FMLA]]
test_vfma_lane_f16(float16x4_t a,float16x4_t b,float16x4_t c)102 float16x4_t test_vfma_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
103   return vfma_lane_f16(a, b, c, 3);
104 }
105 
106 // COMMON-LABEL: test_vfmaq_lane_f16
107 // COMMONIR:      [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
108 // COMMONIR:      [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
109 // COMMONIR:      [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
110 // COMMONIR:      [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
111 // COMMONIR:      [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
112 // COMMONIR:      [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
113 // COMMONIR:      [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
114 // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]])
115 // CONSTRAINED:   [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.maytrap")
116 // CHECK-ASM:     fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
117 // COMMONIR:      ret <8 x half> [[FMLA]]
test_vfmaq_lane_f16(float16x8_t a,float16x8_t b,float16x4_t c)118 float16x8_t test_vfmaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
119   return vfmaq_lane_f16(a, b, c, 3);
120 }
121 
122 // COMMON-LABEL: test_vfma_laneq_f16
123 // COMMONIR:      [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
124 // COMMONIR:      [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
125 // COMMONIR:      [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
126 // COMMONIR:      [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
127 // COMMONIR:      [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
128 // COMMONIR:      [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
129 // COMMONIR:      [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
130 // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]])
131 // CONSTRAINED:   [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.maytrap")
132 // CHECK-ASM:     fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
133 // COMMONIR:      ret <4 x half> [[FMLA]]
test_vfma_laneq_f16(float16x4_t a,float16x4_t b,float16x8_t c)134 float16x4_t test_vfma_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
135   return vfma_laneq_f16(a, b, c, 7);
136 }
137 
138 // COMMON-LABEL: test_vfmaq_laneq_f16
139 // COMMONIR:      [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
140 // COMMONIR:      [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
141 // COMMONIR:      [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
142 // COMMONIR:      [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
143 // COMMONIR:      [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
144 // COMMONIR:      [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
145 // COMMONIR:      [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
146 // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]])
147 // CONSTRAINED:   [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.maytrap")
148 // CHECK-ASM:     fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
149 // COMMONIR:      ret <8 x half> [[FMLA]]
test_vfmaq_laneq_f16(float16x8_t a,float16x8_t b,float16x8_t c)150 float16x8_t test_vfmaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
151   return vfmaq_laneq_f16(a, b, c, 7);
152 }
153 
154 // COMMON-LABEL: test_vfma_n_f16
155 // COMMONIR:      [[TMP0:%.*]] = insertelement <4 x half> undef, half %c, i32 0
156 // COMMONIR:      [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %c, i32 1
157 // COMMONIR:      [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %c, i32 2
158 // COMMONIR:      [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %c, i32 3
159 // UNCONSTRAINED: [[FMA:%.*]]  = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> [[TMP3]], <4 x half> %a)
160 // CONSTRAINED:   [[FMA:%.*]]  = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %b, <4 x half> [[TMP3]], <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
161 // CHECK-ASM:     fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
162 // COMMONIR:      ret <4 x half> [[FMA]]
test_vfma_n_f16(float16x4_t a,float16x4_t b,float16_t c)163 float16x4_t test_vfma_n_f16(float16x4_t a, float16x4_t b, float16_t c) {
164   return vfma_n_f16(a, b, c);
165 }
166 
167 // COMMON-LABEL: test_vfmaq_n_f16
168 // COMMONIR:      [[TMP0:%.*]] = insertelement <8 x half> undef, half %c, i32 0
169 // COMMONIR:      [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %c, i32 1
170 // COMMONIR:      [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %c, i32 2
171 // COMMONIR:      [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %c, i32 3
172 // COMMONIR:      [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %c, i32 4
173 // COMMONIR:      [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %c, i32 5
174 // COMMONIR:      [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %c, i32 6
175 // COMMONIR:      [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %c, i32 7
176 // UNCONSTRAINED: [[FMA:%.*]]  = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> [[TMP7]], <8 x half> %a)
177 // CONSTRAINED:   [[FMA:%.*]]  = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %b, <8 x half> [[TMP7]], <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
178 // CHECK-ASM:     fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
179 // COMMONIR:      ret <8 x half> [[FMA]]
test_vfmaq_n_f16(float16x8_t a,float16x8_t b,float16_t c)180 float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
181   return vfmaq_n_f16(a, b, c);
182 }
183 
184 // COMMON-LABEL: test_vfmah_lane_f16
185 // COMMONIR:      [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3
186 // UNCONSTRAINED: [[FMA:%.*]]  = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a)
187 // CONSTRAINED:   [[FMA:%.*]]  = call half @llvm.experimental.constrained.fma.f16(half %b, half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
188 // CHECK-ASM:     fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}]
189 // COMMONIR:      ret half [[FMA]]
test_vfmah_lane_f16(float16_t a,float16_t b,float16x4_t c)190 float16_t test_vfmah_lane_f16(float16_t a, float16_t b, float16x4_t c) {
191   return vfmah_lane_f16(a, b, c, 3);
192 }
193 
194 // COMMON-LABEL: test_vfmah_laneq_f16
195 // COMMONIR:      [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7
196 // UNCONSTRAINED: [[FMA:%.*]]  = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a)
197 // CONSTRAINED:   [[FMA:%.*]]  = call half @llvm.experimental.constrained.fma.f16(half %b, half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
198 // CHECK-ASM:     fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}]
199 // COMMONIR:      ret half [[FMA]]
test_vfmah_laneq_f16(float16_t a,float16_t b,float16x8_t c)200 float16_t test_vfmah_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
201   return vfmah_laneq_f16(a, b, c, 7);
202 }
203 
204 // COMMON-LABEL: test_vfms_lane_f16
205 // COMMONIR:      [[SUB:%.*]]  = fneg <4 x half> %b
206 // COMMONIR:      [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
207 // COMMONIR:      [[TMP1:%.*]] = bitcast <4 x half> [[SUB]] to <8 x i8>
208 // COMMONIR:      [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
209 // COMMONIR:      [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
210 // COMMONIR:      [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
211 // COMMONIR:      [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
212 // COMMONIR:      [[TMP5:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
213 // UNCONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]])
214 // CONSTRAINED:   [[FMA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.maytrap")
215 // CHECK-ASM:     fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
216 // COMMONIR:      ret <4 x half> [[FMA]]
test_vfms_lane_f16(float16x4_t a,float16x4_t b,float16x4_t c)217 float16x4_t test_vfms_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
218   return vfms_lane_f16(a, b, c, 3);
219 }
220 
221 // COMMON-LABEL: test_vfmsq_lane_f16
222 // COMMONIR:      [[SUB:%.*]]  = fneg <8 x half> %b
223 // COMMONIR:      [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
224 // COMMONIR:      [[TMP1:%.*]] = bitcast <8 x half> [[SUB]] to <16 x i8>
225 // COMMONIR:      [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
226 // COMMONIR:      [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
227 // COMMONIR:      [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
228 // COMMONIR:      [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
229 // COMMONIR:      [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
230 // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]])
231 // CONSTRAINED:   [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.maytrap")
232 // CHECK-ASM:     fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
233 // COMMONIR:      ret <8 x half> [[FMLA]]
test_vfmsq_lane_f16(float16x8_t a,float16x8_t b,float16x4_t c)234 float16x8_t test_vfmsq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
235   return vfmsq_lane_f16(a, b, c, 3);
236 }
237 
238 // COMMON-LABEL: test_vfms_laneq_f16
239 // COMMONIR:      [[SUB:%.*]]  = fneg <4 x half> %b
240 // CHECK-ASM-NOT: fneg
241 // COMMONIR:      [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
242 // COMMONIR:      [[TMP1:%.*]] = bitcast <4 x half> [[SUB]] to <8 x i8>
243 // COMMONIR:      [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
244 // COMMONIR:      [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
245 // COMMONIR:      [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
246 // COMMONIR:      [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
247 // COMMONIR:      [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
248 // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]])
249 // CONSTRAINED:   [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.maytrap")
250 // CHECK-ASM:     fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
251 // COMMONIR:      ret <4 x half> [[FMLA]]
test_vfms_laneq_f16(float16x4_t a,float16x4_t b,float16x8_t c)252 float16x4_t test_vfms_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
253   return vfms_laneq_f16(a, b, c, 7);
254 }
255 
256 // COMMON-LABEL: test_vfmsq_laneq_f16
257 // COMMONIR:      [[SUB:%.*]]  = fneg <8 x half> %b
258 // CHECK-ASM-NOT: fneg
259 // COMMONIR:      [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
260 // COMMONIR:      [[TMP1:%.*]] = bitcast <8 x half> [[SUB]] to <16 x i8>
261 // COMMONIR:      [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
262 // COMMONIR:      [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
263 // COMMONIR:      [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
264 // COMMONIR:      [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
265 // COMMONIR:      [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
266 // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]])
267 // CONSTRAINED:   [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.maytrap")
268 // CHECK-ASM:     fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
269 // COMMONIR:      ret <8 x half> [[FMLA]]
test_vfmsq_laneq_f16(float16x8_t a,float16x8_t b,float16x8_t c)270 float16x8_t test_vfmsq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
271   return vfmsq_laneq_f16(a, b, c, 7);
272 }
273 
274 // COMMON-LABEL: test_vfms_n_f16
275 // COMMONIR:      [[SUB:%.*]]  = fneg <4 x half> %b
276 // COMMONIR:      [[TMP0:%.*]] = insertelement <4 x half> undef, half %c, i32 0
277 // COMMONIR:      [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %c, i32 1
278 // COMMONIR:      [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %c, i32 2
279 // COMMONIR:      [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %c, i32 3
280 // UNCONSTRAINED: [[FMA:%.*]]  = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> [[TMP3]], <4 x half> %a)
281 // CONSTRAINED:   [[FMA:%.*]]  = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[SUB]], <4 x half> [[TMP3]], <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
282 // CHECK-ASM:     fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
283 // COMMONIR:      ret <4 x half> [[FMA]]
test_vfms_n_f16(float16x4_t a,float16x4_t b,float16_t c)284 float16x4_t test_vfms_n_f16(float16x4_t a, float16x4_t b, float16_t c) {
285   return vfms_n_f16(a, b, c);
286 }
287 
288 // COMMON-LABEL: test_vfmsq_n_f16
289 // COMMONIR:      [[SUB:%.*]]  = fneg <8 x half> %b
290 // COMMONIR:      [[TMP0:%.*]] = insertelement <8 x half> undef, half %c, i32 0
291 // COMMONIR:      [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %c, i32 1
292 // COMMONIR:      [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %c, i32 2
293 // COMMONIR:      [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %c, i32 3
294 // COMMONIR:      [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %c, i32 4
295 // COMMONIR:      [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %c, i32 5
296 // COMMONIR:      [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %c, i32 6
297 // COMMONIR:      [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %c, i32 7
298 // UNCONSTRAINED: [[FMA:%.*]]  = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> [[TMP7]], <8 x half> %a)
299 // CONSTRAINED:   [[FMA:%.*]]  = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[SUB]], <8 x half> [[TMP7]], <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
300 // CHECK-ASM:     fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
301 // COMMONIR:      ret <8 x half> [[FMA]]
test_vfmsq_n_f16(float16x8_t a,float16x8_t b,float16_t c)302 float16x8_t test_vfmsq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
303   return vfmsq_n_f16(a, b, c);
304 }
305 
306 // COMMON-LABEL: test_vfmsh_lane_f16
307 // UNCONSTRAINED: [[TMP0:%.*]] = fpext half %b to float
308 // CONSTRAINED:   [[TMP0:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half %b, metadata !"fpexcept.strict")
309 // CHECK-ASM:     fcvt s{{[0-9]+}}, h{{[0-9]+}}
310 // COMMONIR:      [[TMP1:%.*]] = fneg float [[TMP0]]
311 // CHECK-ASM:     fneg s{{[0-9]+}}, s{{[0-9]+}}
312 // UNCONSTRAINED: [[SUB:%.*]]  = fptrunc float [[TMP1]] to half
313 // CONSTRAINED:   [[SUB:%.*]]  = call half @llvm.experimental.constrained.fptrunc.f16.f32(float [[TMP1]], metadata !"round.tonearest", metadata !"fpexcept.strict")
314 // CHECK-ASM:     fcvt h{{[0-9]+}}, s{{[0-9]+}}
315 // COMMONIR:      [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3
316 // UNCONSTRAINED: [[FMA:%.*]]  = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a)
317 // CONSTRAINED:   [[FMA:%.*]]  = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
318 // CHECK-ASM:     fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}]
319 // COMMONIR:      ret half [[FMA]]
test_vfmsh_lane_f16(float16_t a,float16_t b,float16x4_t c)320 float16_t test_vfmsh_lane_f16(float16_t a, float16_t b, float16x4_t c) {
321   return vfmsh_lane_f16(a, b, c, 3);
322 }
323 
324 // COMMON-LABEL: test_vfmsh_laneq_f16
325 // UNCONSTRAINED: [[TMP0:%.*]] = fpext half %b to float
326 // CONSTRAINED:   [[TMP0:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half %b, metadata !"fpexcept.strict")
327 // CHECK-ASM:     fcvt s{{[0-9]+}}, h{{[0-9]+}}
328 // COMMONIR:      [[TMP1:%.*]] = fneg float [[TMP0]]
329 // CHECK-ASM:     fneg s{{[0-9]+}}, s{{[0-9]+}}
330 // UNCONSTRAINED: [[SUB:%.*]]  = fptrunc float [[TMP1]] to half
331 // CONSTRAINED:   [[SUB:%.*]]  = call half @llvm.experimental.constrained.fptrunc.f16.f32(float [[TMP1]], metadata !"round.tonearest", metadata !"fpexcept.strict")
332 // CHECK-ASM:     fcvt h{{[0-9]+}}, s{{[0-9]+}}
333 // COMMONIR:      [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7
334 // UNCONSTRAINED: [[FMA:%.*]]  = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a)
335 // CONSTRAINED:   [[FMA:%.*]]  = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
336 // CHECK-ASM:     fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}]
337 // COMMONIR:      ret half [[FMA]]
test_vfmsh_laneq_f16(float16_t a,float16_t b,float16x8_t c)338 float16_t test_vfmsh_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
339   return vfmsh_laneq_f16(a, b, c, 7);
340 }
341