1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
2 // RUN: -fallow-half-arguments-and-returns -flax-vector-conversions=none -S -disable-O0-optnone -emit-llvm -o - %s \
3 // RUN: | opt -S -mem2reg \
4 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s
5 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
6 // RUN: -ffp-exception-behavior=strict \
7 // RUN: -fexperimental-strict-floating-point \
8 // RUN: -fallow-half-arguments-and-returns -flax-vector-conversions=none -S -disable-O0-optnone -emit-llvm -o - %s \
9 // RUN: | opt -S -mem2reg \
10 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s
11 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
12 // RUN: -fallow-half-arguments-and-returns -flax-vector-conversions=none -S -disable-O0-optnone -emit-llvm -o - %s \
13 // RUN: | opt -S -mem2reg | llc -o=- - \
14 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s
15 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
16 // RUN: -ffp-exception-behavior=strict \
17 // RUN: -fexperimental-strict-floating-point \
18 // RUN: -fallow-half-arguments-and-returns -flax-vector-conversions=none -S -disable-O0-optnone -emit-llvm -o - %s \
19 // RUN: | opt -S -mem2reg | llc -o=- - \
20 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s
21 
22 // REQUIRES: aarch64-registered-target
23 
24 #include <arm_neon.h>
25 
26 // COMMON-LABEL: test_vsqrt_f16
27 // UNCONSTRAINED:  [[SQR:%.*]] = call <4 x half> @llvm.sqrt.v4f16(<4 x half> %a)
28 // CONSTRAINED:    [[SQR:%.*]] = call <4 x half> @llvm.experimental.constrained.sqrt.v4f16(<4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
29 // CHECK-ASM:      fsqrt v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
30 // COMMONIR:       ret <4 x half> [[SQR]]
test_vsqrt_f16(float16x4_t a)31 float16x4_t test_vsqrt_f16(float16x4_t a) {
32   return vsqrt_f16(a);
33 }
34 
35 // COMMON-LABEL: test_vsqrtq_f16
36 // UNCONSTRAINED:  [[SQR:%.*]] = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a)
37 // CONSTRAINED:    [[SQR:%.*]] = call <8 x half> @llvm.experimental.constrained.sqrt.v8f16(<8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
38 // CHECK-ASM:      fsqrt v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
39 // COMMONIR:       ret <8 x half> [[SQR]]
test_vsqrtq_f16(float16x8_t a)40 float16x8_t test_vsqrtq_f16(float16x8_t a) {
41   return vsqrtq_f16(a);
42 }
43 
44 // COMMON-LABEL: test_vfma_f16
45 // UNCONSTRAINED:  [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a)
46 // CONSTRAINED:    [[ADD:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
47 // CHECK-ASM:      fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
48 // COMMONIR:       ret <4 x half> [[ADD]]
test_vfma_f16(float16x4_t a,float16x4_t b,float16x4_t c)49 float16x4_t test_vfma_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
50   return vfma_f16(a, b, c);
51 }
52 
53 // COMMON-LABEL: test_vfmaq_f16
54 // UNCONSTRAINED:  [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
55 // CONSTRAINED:    [[ADD:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
56 // CHECK-ASM:      fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
57 // COMMONIR:       ret <8 x half> [[ADD]]
test_vfmaq_f16(float16x8_t a,float16x8_t b,float16x8_t c)58 float16x8_t test_vfmaq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
59   return vfmaq_f16(a, b, c);
60 }
61 
62 // COMMON-LABEL: test_vfms_f16
63 // COMMONIR:       [[SUB:%.*]] = fneg <4 x half> %b
64 // CHECK-ASM:      fneg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
65 // UNCONSTRAINED:  [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a)
66 // CONSTRAINED:    [[ADD:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
67 // CHECK-ASM:      fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
68 // COMMONIR:       ret <4 x half> [[ADD]]
test_vfms_f16(float16x4_t a,float16x4_t b,float16x4_t c)69 float16x4_t test_vfms_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
70   return vfms_f16(a, b, c);
71 }
72 
73 // COMMON-LABEL: test_vfmsq_f16
74 // COMMONIR:       [[SUB:%.*]] = fneg <8 x half> %b
75 // CHECK-ASM:      fneg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
76 // UNCONSTRAINED:  [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a)
77 // CONSTRAINED:    [[ADD:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
78 // CHECK-ASM:      fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
79 // COMMONIR:       ret <8 x half> [[ADD]]
test_vfmsq_f16(float16x8_t a,float16x8_t b,float16x8_t c)80 float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
81   return vfmsq_f16(a, b, c);
82 }
83 
84 // COMMON-LABEL: test_vfma_lane_f16
85 // COMMONIR:      [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
86 // COMMONIR:      [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
87 // COMMONIR:      [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
88 // COMMONIR:      [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
89 // COMMONIR:      [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
90 // COMMONIR:      [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
91 // COMMONIR:      [[TMP5:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
92 // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]])
93 // CONSTRAINED:   [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict")
94 // CHECK-ASM:     fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
95 // COMMONIR:      ret <4 x half> [[FMLA]]
test_vfma_lane_f16(float16x4_t a,float16x4_t b,float16x4_t c)96 float16x4_t test_vfma_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
97   return vfma_lane_f16(a, b, c, 3);
98 }
99 
100 // COMMON-LABEL: test_vfmaq_lane_f16
101 // COMMONIR:      [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
102 // COMMONIR:      [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
103 // COMMONIR:      [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
104 // COMMONIR:      [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
105 // COMMONIR:      [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
106 // COMMONIR:      [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
107 // COMMONIR:      [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
108 // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]])
109 // CONSTRAINED:   [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict")
110 // CHECK-ASM:     fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
111 // COMMONIR:      ret <8 x half> [[FMLA]]
test_vfmaq_lane_f16(float16x8_t a,float16x8_t b,float16x4_t c)112 float16x8_t test_vfmaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
113   return vfmaq_lane_f16(a, b, c, 3);
114 }
115 
116 // COMMON-LABEL: test_vfma_laneq_f16
117 // COMMONIR:      [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
118 // COMMONIR:      [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
119 // COMMONIR:      [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
120 // COMMONIR:      [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
121 // COMMONIR:      [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
122 // COMMONIR:      [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
123 // COMMONIR:      [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
124 // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]])
125 // CONSTRAINED:   [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
126 // CHECK-ASM:     fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
127 // COMMONIR:      ret <4 x half> [[FMLA]]
test_vfma_laneq_f16(float16x4_t a,float16x4_t b,float16x8_t c)128 float16x4_t test_vfma_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
129   return vfma_laneq_f16(a, b, c, 7);
130 }
131 
132 // COMMON-LABEL: test_vfmaq_laneq_f16
133 // COMMONIR:      [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
134 // COMMONIR:      [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
135 // COMMONIR:      [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
136 // COMMONIR:      [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
137 // COMMONIR:      [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
138 // COMMONIR:      [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
139 // COMMONIR:      [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
140 // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]])
141 // CONSTRAINED:   [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
142 // CHECK-ASM:     fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
143 // COMMONIR:      ret <8 x half> [[FMLA]]
test_vfmaq_laneq_f16(float16x8_t a,float16x8_t b,float16x8_t c)144 float16x8_t test_vfmaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
145   return vfmaq_laneq_f16(a, b, c, 7);
146 }
147 
148 // COMMON-LABEL: test_vfma_n_f16
149 // COMMONIR:      [[TMP0:%.*]] = insertelement <4 x half> undef, half %c, i32 0
150 // COMMONIR:      [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %c, i32 1
151 // COMMONIR:      [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %c, i32 2
152 // COMMONIR:      [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %c, i32 3
153 // UNCONSTRAINED: [[FMA:%.*]]  = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> [[TMP3]], <4 x half> %a)
154 // CONSTRAINED:   [[FMA:%.*]]  = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %b, <4 x half> [[TMP3]], <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
155 // CHECK-ASM:     fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
156 // COMMONIR:      ret <4 x half> [[FMA]]
test_vfma_n_f16(float16x4_t a,float16x4_t b,float16_t c)157 float16x4_t test_vfma_n_f16(float16x4_t a, float16x4_t b, float16_t c) {
158   return vfma_n_f16(a, b, c);
159 }
160 
161 // COMMON-LABEL: test_vfmaq_n_f16
162 // COMMONIR:      [[TMP0:%.*]] = insertelement <8 x half> undef, half %c, i32 0
163 // COMMONIR:      [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %c, i32 1
164 // COMMONIR:      [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %c, i32 2
165 // COMMONIR:      [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %c, i32 3
166 // COMMONIR:      [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %c, i32 4
167 // COMMONIR:      [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %c, i32 5
168 // COMMONIR:      [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %c, i32 6
169 // COMMONIR:      [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %c, i32 7
170 // UNCONSTRAINED: [[FMA:%.*]]  = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> [[TMP7]], <8 x half> %a)
171 // CONSTRAINED:   [[FMA:%.*]]  = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %b, <8 x half> [[TMP7]], <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
172 // CHECK-ASM:     fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
173 // COMMONIR:      ret <8 x half> [[FMA]]
test_vfmaq_n_f16(float16x8_t a,float16x8_t b,float16_t c)174 float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
175   return vfmaq_n_f16(a, b, c);
176 }
177 
178 // COMMON-LABEL: test_vfmah_lane_f16
179 // COMMONIR:      [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3
180 // UNCONSTRAINED: [[FMA:%.*]]  = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a)
181 // CONSTRAINED:   [[FMA:%.*]]  = call half @llvm.experimental.constrained.fma.f16(half %b, half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
182 // CHECK-ASM:     fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}]
183 // COMMONIR:      ret half [[FMA]]
test_vfmah_lane_f16(float16_t a,float16_t b,float16x4_t c)184 float16_t test_vfmah_lane_f16(float16_t a, float16_t b, float16x4_t c) {
185   return vfmah_lane_f16(a, b, c, 3);
186 }
187 
188 // COMMON-LABEL: test_vfmah_laneq_f16
189 // COMMONIR:      [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7
190 // UNCONSTRAINED: [[FMA:%.*]]  = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a)
191 // CONSTRAINED:   [[FMA:%.*]]  = call half @llvm.experimental.constrained.fma.f16(half %b, half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
192 // CHECK-ASM:     fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}]
193 // COMMONIR:      ret half [[FMA]]
test_vfmah_laneq_f16(float16_t a,float16_t b,float16x8_t c)194 float16_t test_vfmah_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
195   return vfmah_laneq_f16(a, b, c, 7);
196 }
197 
198 // COMMON-LABEL: test_vfms_lane_f16
199 // COMMONIR:      [[SUB:%.*]]  = fneg <4 x half> %b
200 // COMMONIR:      [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
201 // COMMONIR:      [[TMP1:%.*]] = bitcast <4 x half> [[SUB]] to <8 x i8>
202 // COMMONIR:      [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
203 // COMMONIR:      [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
204 // COMMONIR:      [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
205 // COMMONIR:      [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
206 // COMMONIR:      [[TMP5:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
207 // UNCONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]])
208 // CONSTRAINED:   [[FMA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict")
209 // CHECK-ASM:     fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
210 // COMMONIR:      ret <4 x half> [[FMA]]
test_vfms_lane_f16(float16x4_t a,float16x4_t b,float16x4_t c)211 float16x4_t test_vfms_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
212   return vfms_lane_f16(a, b, c, 3);
213 }
214 
215 // COMMON-LABEL: test_vfmsq_lane_f16
216 // COMMONIR:      [[SUB:%.*]]  = fneg <8 x half> %b
217 // COMMONIR:      [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
218 // COMMONIR:      [[TMP1:%.*]] = bitcast <8 x half> [[SUB]] to <16 x i8>
219 // COMMONIR:      [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
220 // COMMONIR:      [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
221 // COMMONIR:      [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
222 // COMMONIR:      [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
223 // COMMONIR:      [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
224 // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]])
225 // CONSTRAINED:   [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict")
226 // CHECK-ASM:     fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
227 // COMMONIR:      ret <8 x half> [[FMLA]]
test_vfmsq_lane_f16(float16x8_t a,float16x8_t b,float16x4_t c)228 float16x8_t test_vfmsq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
229   return vfmsq_lane_f16(a, b, c, 3);
230 }
231 
232 // COMMON-LABEL: test_vfms_laneq_f16
233 // COMMONIR:      [[SUB:%.*]]  = fneg <4 x half> %b
234 // CHECK-ASM-NOT: fneg
235 // COMMONIR:      [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
236 // COMMONIR:      [[TMP1:%.*]] = bitcast <4 x half> [[SUB]] to <8 x i8>
237 // COMMONIR:      [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
238 // COMMONIR:      [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
239 // COMMONIR:      [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
240 // COMMONIR:      [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
241 // COMMONIR:      [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
242 // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]])
243 // CONSTRAINED:   [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
244 // CHECK-ASM:     fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
245 // COMMONIR:      ret <4 x half> [[FMLA]]
test_vfms_laneq_f16(float16x4_t a,float16x4_t b,float16x8_t c)246 float16x4_t test_vfms_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
247   return vfms_laneq_f16(a, b, c, 7);
248 }
249 
250 // COMMON-LABEL: test_vfmsq_laneq_f16
251 // COMMONIR:      [[SUB:%.*]]  = fneg <8 x half> %b
252 // CHECK-ASM-NOT: fneg
253 // COMMONIR:      [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
254 // COMMONIR:      [[TMP1:%.*]] = bitcast <8 x half> [[SUB]] to <16 x i8>
255 // COMMONIR:      [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
256 // COMMONIR:      [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
257 // COMMONIR:      [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
258 // COMMONIR:      [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
259 // COMMONIR:      [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
260 // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]])
261 // CONSTRAINED:   [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
262 // CHECK-ASM:     fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
263 // COMMONIR:      ret <8 x half> [[FMLA]]
test_vfmsq_laneq_f16(float16x8_t a,float16x8_t b,float16x8_t c)264 float16x8_t test_vfmsq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
265   return vfmsq_laneq_f16(a, b, c, 7);
266 }
267 
268 // COMMON-LABEL: test_vfms_n_f16
269 // COMMONIR:      [[SUB:%.*]]  = fneg <4 x half> %b
270 // COMMONIR:      [[TMP0:%.*]] = insertelement <4 x half> undef, half %c, i32 0
271 // COMMONIR:      [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %c, i32 1
272 // COMMONIR:      [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %c, i32 2
273 // COMMONIR:      [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %c, i32 3
274 // UNCONSTRAINED: [[FMA:%.*]]  = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> [[TMP3]], <4 x half> %a)
275 // CONSTRAINED:   [[FMA:%.*]]  = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[SUB]], <4 x half> [[TMP3]], <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
276 // CHECK-ASM:     fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
277 // COMMONIR:      ret <4 x half> [[FMA]]
test_vfms_n_f16(float16x4_t a,float16x4_t b,float16_t c)278 float16x4_t test_vfms_n_f16(float16x4_t a, float16x4_t b, float16_t c) {
279   return vfms_n_f16(a, b, c);
280 }
281 
282 // COMMON-LABEL: test_vfmsq_n_f16
283 // COMMONIR:      [[SUB:%.*]]  = fneg <8 x half> %b
284 // COMMONIR:      [[TMP0:%.*]] = insertelement <8 x half> undef, half %c, i32 0
285 // COMMONIR:      [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %c, i32 1
286 // COMMONIR:      [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %c, i32 2
287 // COMMONIR:      [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %c, i32 3
288 // COMMONIR:      [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %c, i32 4
289 // COMMONIR:      [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %c, i32 5
290 // COMMONIR:      [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %c, i32 6
291 // COMMONIR:      [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %c, i32 7
292 // UNCONSTRAINED: [[FMA:%.*]]  = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> [[TMP7]], <8 x half> %a)
293 // CONSTRAINED:   [[FMA:%.*]]  = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[SUB]], <8 x half> [[TMP7]], <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
294 // CHECK-ASM:     fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
295 // COMMONIR:      ret <8 x half> [[FMA]]
test_vfmsq_n_f16(float16x8_t a,float16x8_t b,float16_t c)296 float16x8_t test_vfmsq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
297   return vfmsq_n_f16(a, b, c);
298 }
299 
300 // COMMON-LABEL: test_vfmsh_lane_f16
301 // UNCONSTRAINED: [[TMP0:%.*]] = fpext half %b to float
302 // CONSTRAINED:   [[TMP0:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half %b, metadata !"fpexcept.strict")
303 // CHECK-ASM:     fcvt s{{[0-9]+}}, h{{[0-9]+}}
304 // COMMONIR:      [[TMP1:%.*]] = fneg float [[TMP0]]
305 // CHECK-ASM:     fneg s{{[0-9]+}}, s{{[0-9]+}}
306 // UNCONSTRAINED: [[SUB:%.*]]  = fptrunc float [[TMP1]] to half
307 // CONSTRAINED:   [[SUB:%.*]]  = call half @llvm.experimental.constrained.fptrunc.f16.f32(float [[TMP1]], metadata !"round.tonearest", metadata !"fpexcept.strict")
308 // CHECK-ASM:     fcvt h{{[0-9]+}}, s{{[0-9]+}}
309 // COMMONIR:      [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3
310 // UNCONSTRAINED: [[FMA:%.*]]  = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a)
311 // CONSTRAINED:   [[FMA:%.*]]  = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
312 // CHECK-ASM:     fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}]
313 // COMMONIR:      ret half [[FMA]]
test_vfmsh_lane_f16(float16_t a,float16_t b,float16x4_t c)314 float16_t test_vfmsh_lane_f16(float16_t a, float16_t b, float16x4_t c) {
315   return vfmsh_lane_f16(a, b, c, 3);
316 }
317 
318 // COMMON-LABEL: test_vfmsh_laneq_f16
319 // UNCONSTRAINED: [[TMP0:%.*]] = fpext half %b to float
320 // CONSTRAINED:   [[TMP0:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half %b, metadata !"fpexcept.strict")
321 // CHECK-ASM:     fcvt s{{[0-9]+}}, h{{[0-9]+}}
322 // COMMONIR:      [[TMP1:%.*]] = fneg float [[TMP0]]
323 // CHECK-ASM:     fneg s{{[0-9]+}}, s{{[0-9]+}}
324 // UNCONSTRAINED: [[SUB:%.*]]  = fptrunc float [[TMP1]] to half
325 // CONSTRAINED:   [[SUB:%.*]]  = call half @llvm.experimental.constrained.fptrunc.f16.f32(float [[TMP1]], metadata !"round.tonearest", metadata !"fpexcept.strict")
326 // CHECK-ASM:     fcvt h{{[0-9]+}}, s{{[0-9]+}}
327 // COMMONIR:      [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7
328 // UNCONSTRAINED: [[FMA:%.*]]  = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a)
329 // CONSTRAINED:   [[FMA:%.*]]  = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
330 // CHECK-ASM:     fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}]
331 // COMMONIR:      ret half [[FMA]]
test_vfmsh_laneq_f16(float16_t a,float16_t b,float16x8_t c)332 float16_t test_vfmsh_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
333   return vfmsh_laneq_f16(a, b, c, 7);
334 }
335 
336