1; SOFT:
2; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft     | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
3; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft   | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
4; RUN: llc < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
5; RUN: llc < %s -mtriple=thumbv8.1m.main-none-eabi -float-abi=soft -mattr=+mve | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
6
7; SOFTFP:
8; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
9; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-A32
10; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16,+fp64    | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
11
12; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
13; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-T32
14; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16,+fp64    | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
15
16; Test fast-isel
17; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16,+fp64 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
18; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16,+fp64 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
19
20; HARD:
21; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3      | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
22; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4      | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
23; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16,+fp64  | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
24
25; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp3      | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
26; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp4      | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
27; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16,fp64  | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
28
29; FP-CONTRACT=FAST
30; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16,+fp64 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
31; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16,+fp64 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
32
33; TODO: we can't pass half-precision arguments as "half" types yet. We do
34; that for the time being by passing "float %f.coerce" and the necessary
35; bitconverts/truncates. But when we can pass half types, we do want to use
36; and test that here.
37
38define float @RetValBug(float %A.coerce) {
39entry:
40  ret float undef
41; Check thatLowerReturn can handle undef nodes (i.e. nodes which do not have
42; any operands) when FullFP16 is enabled.
43;
44; CHECK-LABEL:            RetValBug:
45; CHECK-HARDFP-FULLFP16:  {{.*}} lr
46}
47
48; 2. VADD
49define float @Add(float %a.coerce, float %b.coerce) {
50entry:
51  %0 = bitcast float %a.coerce to i32
52  %tmp.0.extract.trunc = trunc i32 %0 to i16
53  %1 = bitcast i16 %tmp.0.extract.trunc to half
54  %2 = bitcast float %b.coerce to i32
55  %tmp1.0.extract.trunc = trunc i32 %2 to i16
56  %3 = bitcast i16 %tmp1.0.extract.trunc to half
57  %add = fadd half %1, %3
58  %4 = bitcast half %add to i16
59  %tmp4.0.insert.ext = zext i16 %4 to i32
60  %5 = bitcast i32 %tmp4.0.insert.ext to float
61  ret float %5
62
63; CHECK-LABEL: Add:
64
65; CHECK-SOFT:  bl  __aeabi_h2f
66; CHECK-SOFT:  bl  __aeabi_h2f
67; CHECK-SOFT:  bl  __aeabi_fadd
68; CHECK-SOFT:  bl  __aeabi_f2h
69
70; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
71; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
72; CHECK-SOFTFP-VFP3:  vadd.f32
73; CHECK-SOFTFP-VFP3:  bl  __aeabi_f2h
74
75; CHECK-SOFTFP-FP16-DAG:  vmov          [[S0:s[0-9]]], r0
76; CHECK-SOFTFP-FP16-DAG:  vmov          [[S2:s[0-9]]], r1
77; CHECK-SOFTFP-FP16-DAG:  vcvtb.f32.f16 [[S0]], [[S0]]
78; CHECK-SOFTFP-FP16-DAG:  vcvtb.f32.f16 [[S2]], [[S2]]
79; CHECK-SOFTFP-FP16:  vadd.f32      [[S0]], [[S0]], [[S2]]
80; CHECK-SOFTFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
81; CHECK-SOFTFP-FP16:  vmov  r0, s0
82
83; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S0:s[0-9]]], r1
84; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S2:s[0-9]]], r0
85; CHECK-SOFTFP-FULLFP16:       vadd.f16  [[S0]], [[S2]], [[S0]]
86; CHECK-SOFTFP-FULLFP16-NEXT:  vmov.f16  r0, s0
87
88; CHECK-HARDFP-VFP3:  vmov r{{.}}, s0
89; CHECK-HARDFP-VFP3:  vmov{{.*}}, s1
90; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
91; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
92; CHECK-HARDFP-VFP3:  vadd.f32
93; CHECK-HARDFP-VFP3:  bl  __aeabi_f2h
94; CHECK-HARDFP-VFP3:  vmov  s0, r0
95
96; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S2:s[0-9]]], s1
97; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S0:s[0-9]]], s0
98; CHECK-HARDFP-FP16:  vadd.f32  [[S0]], [[S0]], [[S2]]
99; CHECK-HARDFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
100
101; CHECK-HARDFP-FULLFP16:       vadd.f16  s0, s0, s1
102}
103
104; 3. VCMP
105define zeroext i1 @VCMP1(float %F.coerce, float %G.coerce) {
106entry:
107  %0 = bitcast float %F.coerce to i32
108  %tmp.0.extract.trunc = trunc i32 %0 to i16
109  %1 = bitcast i16 %tmp.0.extract.trunc to half
110  %2 = bitcast float %G.coerce to i32
111  %tmp1.0.extract.trunc = trunc i32 %2 to i16
112  %3 = bitcast i16 %tmp1.0.extract.trunc to half
113  %cmp = fcmp une half %1, %3
114  ret i1 %cmp
115
116; CHECK-LABEL:            VCMP1:
117
118; CHECK-SOFT:             bl  __aeabi_fcmpeq
119
120; CHECK-SOFTFP-VFP3:      bl  __aeabi_h2f
121; CHECK-SOFTFP-VFP3:      bl  __aeabi_h2f
122; CHECK-SOFTFP-VFP3:      vcmp.f32 s{{.}}, s{{.}}
123
124; CHECK-SOFTFP-FP16:      vcvtb.f32.f16 s{{.}}, s{{.}}
125; CHECK-SOFTFP-FP16:      vcvtb.f32.f16 s{{.}}, s{{.}}
126; CHECK-SOFTFP-FP16:      vcmp.f32 s{{.}}, s{{.}}
127
128; CHECK-SOFTFP-FULLFP16:  vmov.f16  [[S2:s[0-9]]], r0
129; CHECK-SOFTFP-FULLFP16:  vmov.f16 [[S0:s[0-9]]], r1
130; CHECK-SOFTFP-FULLFP16:  vcmp.f16 [[S2]], [[S0]]
131
132; CHECK-HARDFP-FULLFP16-NOT:  vmov.f16  s{{.}}, r0
133; CHECK-HARDFP-FULLFP16-NOT:  vmov.f16  s{{.}}, r1
134; CHECK-HARDFP-FULLFP16:      vcmp.f16  s0, s1
135}
136
137; Check VCMPZH
138define zeroext i1 @VCMP2(float %F.coerce) {
139entry:
140  %0 = bitcast float %F.coerce to i32
141  %tmp.0.extract.trunc = trunc i32 %0 to i16
142  %1 = bitcast i16 %tmp.0.extract.trunc to half
143  %cmp = fcmp une half %1, 0.000000e+00
144  ret i1 %cmp
145
146; CHECK-LABEL:             VCMP2:
147
148; CHECK-SOFT:              bl __aeabi_fcmpeq
149; CHECK-SOFTFP-FP16:       vcmp.f32        s0, #0
150; CHECK-SOFTFP-FULLFP16:   vcmp.f16        s0, #0
151; CHECK-HARDFP-FULLFP16:   vcmp.f16        s0, #0
152}
153
154; 4. VCMPE
155define i32 @VCMPE1(float %F.coerce) {
156entry:
157  %0 = bitcast float %F.coerce to i32
158  %tmp.0.extract.trunc = trunc i32 %0 to i16
159  %1 = bitcast i16 %tmp.0.extract.trunc to half
160  %tmp = fcmp olt half %1, 0.000000e+00
161  %tmp1 = zext i1 %tmp to i32
162  ret i32 %tmp1
163
164; CHECK-LABEL:             VCMPE1:
165
166; CHECK-SOFT:              bl  __aeabi_fcmplt
167; CHECK-SOFTFP-FP16:       vcmp.f32 s0, #0
168; CHECK-SOFTFP-FULLFP16:   vcmp.f16 s0, #0
169; CHECK-HARDFP-FULLFP16:   vcmp.f16 s0, #0
170}
171
172define i32 @VCMPE2(float %F.coerce, float %G.coerce) {
173entry:
174  %0 = bitcast float %F.coerce to i32
175  %tmp.0.extract.trunc = trunc i32 %0 to i16
176  %1 = bitcast i16 %tmp.0.extract.trunc to half
177  %2 = bitcast float %G.coerce to i32
178  %tmp.1.extract.trunc = trunc i32 %2 to i16
179  %3 = bitcast i16 %tmp.1.extract.trunc to half
180  %tmp = fcmp olt half %1, %3
181  %tmp1 = zext i1 %tmp to i32
182  ret i32 %tmp1
183
184; CHECK-LABEL:  VCMPE2:
185
186; CHECK-SOFT:              bl  __aeabi_fcmplt
187; CHECK-SOFTFP-FP16:       vcmp.f32 s{{.}}, s{{.}}
188; CHECK-SOFTFP-FULLFP16:   vcmp.f16 s{{.}}, s{{.}}
189; CHECK-HARDFP-FULLFP16:   vcmp.f16 s{{.}}, s{{.}}
190}
191
192; Test lowering of BR_CC
193define hidden i32 @VCMPBRCC() {
194entry:
195  %f = alloca half, align 2
196  br label %for.cond
197
198for.cond:
199  %0 = load half, half* %f, align 2
200  %cmp = fcmp nnan ninf nsz ole half %0, 0xH6800
201  br i1 %cmp, label %for.body, label %for.end
202
203for.body:
204  ret i32 1
205
206for.end:
207  ret i32 0
208
209; CHECK-LABEL:            VCMPBRCC:
210
211; CHECK-SOFT:             bl  __aeabi_fcmp{{gt|le}}
212; CHECK-SOFT:             cmp r0, #{{0|1}}
213
214; CHECK-SOFTFP-FP16:      vcvtb.f32.f16 [[S2:s[0-9]]], [[S2]]
215; CHECK-SOFTFP-FP16:      vcmp.f32 [[S2]], s0
216; CHECK-SOFTFP-FP16:      vmrs  APSR_nzcv, fpscr
217
218; CHECK-SOFTFP-FULLFP16:  vcmp.f16 s{{.}}, s{{.}}
219; CHECK-SOFTFP-FULLFP16:  vmrs  APSR_nzcv, fpscr
220}
221
222; 5. VCVT (between floating-point and fixed-point)
223; Only assembly/disassembly support
224
225; 6. VCVT (between floating-point and integer, both directions)
226define i32 @fptosi(i32 %A.coerce) {
227entry:
228  %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
229  %0 = bitcast i16 %tmp.0.extract.trunc to half
230  %conv = fptosi half %0 to i32
231  ret i32 %conv
232
233; CHECK-LABEL:                 fptosi:
234
235; CHECK-HARDFP-FULLFP16:       vmov.f16  s0, r0
236; CHECK-HARDFP-FULLFP16-NEXT:  vcvt.s32.f16  s0, s0
237; CHECK-HARDFP-FULLFP16-NEXT:  vmov  r0, s0
238}
239
240define i32 @fptoui(i32 %A.coerce) {
241entry:
242  %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
243  %0 = bitcast i16 %tmp.0.extract.trunc to half
244  %conv = fptoui half %0 to i32
245  ret i32 %conv
246
247; CHECK-HARDFP-FULLFP16:       vcvt.u32.f16  s0, s0
248; CHECK-HARDFP-FULLFP16-NEXT:  vmov  r0, s0
249}
250
251define float @UintToH(i32 %a, i32 %b) {
252entry:
253  %0 = uitofp i32 %a to half
254  %1 = bitcast half %0 to i16
255  %tmp0.insert.ext = zext i16 %1 to i32
256  %2 = bitcast i32 %tmp0.insert.ext to float
257  ret float %2
258
259; CHECK-LABEL:                 UintToH:
260
261; CHECK-HARDFP-FULLFP16:       vmov  s0, r0
262; CHECK-HARDFP-FULLFP16-NEXT:  vcvt.f16.u32  s0, s0
263}
264
265define float @SintToH(i32 %a, i32 %b) {
266entry:
267  %0 = sitofp i32 %a to half
268  %1 = bitcast half %0 to i16
269  %tmp0.insert.ext = zext i16 %1 to i32
270  %2 = bitcast i32 %tmp0.insert.ext to float
271  ret float %2
272
273; CHECK-LABEL:                 SintToH:
274
275; CHECK-HARDFP-FULLFP16:       vmov  s0, r0
276; CHECK-HARDFP-FULLFP16-NEXT:  vcvt.f16.s32  s0, s0
277}
278
279define i32 @f2h(float %f) {
280entry:
281  %conv = fptrunc float %f to half
282  %0 = bitcast half %conv to i16
283  %tmp.0.insert.ext = zext i16 %0 to i32
284  ret i32 %tmp.0.insert.ext
285
286; CHECK-LABEL:            f2h:
287; CHECK-HARDFP-FULLFP16:  vcvtb.f16.f32 s0, s0
288}
289
290define float @h2f(i32 %h.coerce) {
291entry:
292  %tmp.0.extract.trunc = trunc i32 %h.coerce to i16
293  %0 = bitcast i16 %tmp.0.extract.trunc to half
294  %conv = fpext half %0 to float
295  ret float %conv
296
297; CHECK-LABEL:            h2f:
298; CHECK-HARDFP-FULLFP16:  vcvtb.f32.f16 s0, s0
299}
300
301
302define double @h2d(i32 %h.coerce) {
303entry:
304  %tmp.0.extract.trunc = trunc i32 %h.coerce to i16
305  %0 = bitcast i16 %tmp.0.extract.trunc to half
306  %conv = fpext half %0 to double
307  ret double %conv
308
309; CHECK-LABEL:            h2d:
310; CHECK-HARDFP-FULLFP16:  vcvtb.f64.f16 d{{.*}}, s{{.}}
311}
312
313define i32 @d2h(double %d) {
314entry:
315  %conv = fptrunc double %d to half
316  %0 = bitcast half %conv to i16
317  %tmp.0.insert.ext = zext i16 %0 to i32
318  ret i32 %tmp.0.insert.ext
319
320; CHECK-LABEL:            d2h:
321; CHECK-HARDFP-FULLFP16:  vcvtb.f16.f64 s0, d{{.*}}
322}
323
324; TODO:
325; 7.  VCVTA
326; 8.  VCVTM
327; 9.  VCVTN
328; 10. VCVTP
329; 11. VCVTR
330
331; 12. VDIV
332define float @Div(float %a.coerce, float %b.coerce) {
333entry:
334  %0 = bitcast float %a.coerce to i32
335  %tmp.0.extract.trunc = trunc i32 %0 to i16
336  %1 = bitcast i16 %tmp.0.extract.trunc to half
337  %2 = bitcast float %b.coerce to i32
338  %tmp1.0.extract.trunc = trunc i32 %2 to i16
339  %3 = bitcast i16 %tmp1.0.extract.trunc to half
340  %add = fdiv half %1, %3
341  %4 = bitcast half %add to i16
342  %tmp4.0.insert.ext = zext i16 %4 to i32
343  %5 = bitcast i32 %tmp4.0.insert.ext to float
344  ret float %5
345
346; CHECK-LABEL:  Div:
347
348; CHECK-SOFT:  bl  __aeabi_h2f
349; CHECK-SOFT:  bl  __aeabi_h2f
350; CHECK-SOFT:  bl  __aeabi_fdiv
351; CHECK-SOFT:  bl  __aeabi_f2h
352
353; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
354; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
355; CHECK-SOFTFP-VFP3:  vdiv.f32
356; CHECK-SOFTFP-VFP3:  bl  __aeabi_f2h
357
358; CHECK-SOFTFP-FP16-DAG:  vmov          [[S0:s[0-9]]], r0
359; CHECK-SOFTFP-FP16-DAG:  vmov          [[S2:s[0-9]]], r1
360; CHECK-SOFTFP-FP16-DAG:  vcvtb.f32.f16 [[S0]], [[S0]]
361; CHECK-SOFTFP-FP16-DAG:  vcvtb.f32.f16 [[S2]], [[S2]]
362; CHECK-SOFTFP-FP16:  vdiv.f32      [[S0]], [[S0]], [[S2]]
363; CHECK-SOFTFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
364; CHECK-SOFTFP-FP16:  vmov  r0, s0
365
366; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S0:s[0-9]]], r1
367; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S2:s[0-9]]], r0
368; CHECK-SOFTFP-FULLFP16:       vdiv.f16  [[S0]], [[S2]], [[S0]]
369; CHECK-SOFTFP-FULLFP16-NEXT:  vmov.f16  r0, s0
370
371; CHECK-HARDFP-VFP3:  vmov r{{.}}, s0
372; CHECK-HARDFP-VFP3:  vmov{{.*}}, s1
373; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
374; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
375; CHECK-HARDFP-VFP3:  vdiv.f32
376; CHECK-HARDFP-VFP3:  bl  __aeabi_f2h
377; CHECK-HARDFP-VFP3:  vmov  s0, r0
378
379; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S2:s[0-9]]], s1
380; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S0:s[0-9]]], s0
381; CHECK-HARDFP-FP16:  vdiv.f32  [[S0]], [[S0]], [[S2]]
382; CHECK-HARDFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
383
384; CHECK-HARDFP-FULLFP16:       vdiv.f16  s0, s0, s1
385}
386
387; 13. VFMA
388define float @VFMA(float %a.coerce, float %b.coerce, float %c.coerce) {
389entry:
390  %0 = bitcast float %a.coerce to i32
391  %tmp.0.extract.trunc = trunc i32 %0 to i16
392  %1 = bitcast i16 %tmp.0.extract.trunc to half
393  %2 = bitcast float %b.coerce to i32
394  %tmp1.0.extract.trunc = trunc i32 %2 to i16
395  %3 = bitcast i16 %tmp1.0.extract.trunc to half
396  %4 = bitcast float %c.coerce to i32
397  %tmp2.0.extract.trunc = trunc i32 %4 to i16
398  %5 = bitcast i16 %tmp2.0.extract.trunc to half
399  %mul = fmul half %1, %3
400  %add = fadd half %mul, %5
401  %6 = bitcast half %add to i16
402  %tmp4.0.insert.ext = zext i16 %6 to i32
403  %7 = bitcast i32 %tmp4.0.insert.ext to float
404  ret float %7
405
406; CHECK-LABEL:                      VFMA:
407; CHECK-HARDFP-FULLFP16-FAST:       vfma.f16  s2, s0, s1
408; CHECK-HARDFP-FULLFP16-FAST-NEXT:  vmov.f32  s0, s2
409}
410
411; 14. VFMS
412define float @VFMS(float %a.coerce, float %b.coerce, float %c.coerce) {
413entry:
414  %0 = bitcast float %a.coerce to i32
415  %tmp.0.extract.trunc = trunc i32 %0 to i16
416  %1 = bitcast i16 %tmp.0.extract.trunc to half
417  %2 = bitcast float %b.coerce to i32
418  %tmp1.0.extract.trunc = trunc i32 %2 to i16
419  %3 = bitcast i16 %tmp1.0.extract.trunc to half
420  %4 = bitcast float %c.coerce to i32
421  %tmp2.0.extract.trunc = trunc i32 %4 to i16
422  %5 = bitcast i16 %tmp2.0.extract.trunc to half
423  %mul = fmul half %1, %3
424  %sub = fsub half %5, %mul
425  %6 = bitcast half %sub to i16
426  %tmp4.0.insert.ext = zext i16 %6 to i32
427  %7 = bitcast i32 %tmp4.0.insert.ext to float
428  ret float %7
429
430; CHECK-LABEL:                      VFMS:
431; CHECK-HARDFP-FULLFP16-FAST:       vfms.f16  s2, s0, s1
432; CHECK-HARDFP-FULLFP16-FAST-NEXT:  vmov.f32  s0, s2
433}
434
435; 15. VFNMA
436define float @VFNMA(float %a.coerce, float %b.coerce, float %c.coerce) {
437entry:
438  %0 = bitcast float %a.coerce to i32
439  %tmp.0.extract.trunc = trunc i32 %0 to i16
440  %1 = bitcast i16 %tmp.0.extract.trunc to half
441  %2 = bitcast float %b.coerce to i32
442  %tmp1.0.extract.trunc = trunc i32 %2 to i16
443  %3 = bitcast i16 %tmp1.0.extract.trunc to half
444  %4 = bitcast float %c.coerce to i32
445  %tmp2.0.extract.trunc = trunc i32 %4 to i16
446  %5 = bitcast i16 %tmp2.0.extract.trunc to half
447  %mul = fmul half %1, %3
448  %sub = fsub half -0.0, %mul
449  %sub2 = fsub half %sub, %5
450  %6 = bitcast half %sub2 to i16
451  %tmp4.0.insert.ext = zext i16 %6 to i32
452  %7 = bitcast i32 %tmp4.0.insert.ext to float
453  ret float %7
454
455; CHECK-LABEL:                      VFNMA:
456; CHECK-HARDFP-FULLFP16-FAST:       vfnma.f16  s2, s0, s1
457; CHECK-HARDFP-FULLFP16-FAST-NEXT:  vmov.f32  s0, s2
458}
459
460; 16. VFNMS
461define float @VFNMS(float %a.coerce, float %b.coerce, float %c.coerce) {
462entry:
463  %0 = bitcast float %a.coerce to i32
464  %tmp.0.extract.trunc = trunc i32 %0 to i16
465  %1 = bitcast i16 %tmp.0.extract.trunc to half
466  %2 = bitcast float %b.coerce to i32
467  %tmp1.0.extract.trunc = trunc i32 %2 to i16
468  %3 = bitcast i16 %tmp1.0.extract.trunc to half
469  %4 = bitcast float %c.coerce to i32
470  %tmp2.0.extract.trunc = trunc i32 %4 to i16
471  %5 = bitcast i16 %tmp2.0.extract.trunc to half
472  %mul = fmul half %1, %3
473  %sub2 = fsub half %mul, %5
474  %6 = bitcast half %sub2 to i16
475  %tmp4.0.insert.ext = zext i16 %6 to i32
476  %7 = bitcast i32 %tmp4.0.insert.ext to float
477  ret float %7
478
479; CHECK-LABEL:                      VFNMS:
480; CHECK-HARDFP-FULLFP16-FAST:       vfnms.f16  s2, s0, s1
481; CHECK-HARDFP-FULLFP16-FAST-NEXT:  vmov.f32  s0, s2
482}
483
484; 17. VMAXNM
485; 18. VMINNM
486; Tested in fp16-vminmaxnm.ll and fp16-vminmaxnm-safe.ll
487
488; 19. VMLA
489define float @VMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
490entry:
491  %0 = bitcast float %a.coerce to i32
492  %tmp.0.extract.trunc = trunc i32 %0 to i16
493  %1 = bitcast i16 %tmp.0.extract.trunc to half
494  %2 = bitcast float %b.coerce to i32
495  %tmp1.0.extract.trunc = trunc i32 %2 to i16
496  %3 = bitcast i16 %tmp1.0.extract.trunc to half
497  %4 = bitcast float %c.coerce to i32
498  %tmp2.0.extract.trunc = trunc i32 %4 to i16
499  %5 = bitcast i16 %tmp2.0.extract.trunc to half
500  %mul = fmul half %1, %3
501  %add = fadd half %5, %mul
502  %6 = bitcast half %add to i16
503  %tmp4.0.insert.ext = zext i16 %6 to i32
504  %7 = bitcast i32 %tmp4.0.insert.ext to float
505  ret float %7
506
507; CHECK-LABEL:                 VMLA:
508; CHECK-HARDFP-FULLFP16:       vmla.f16  s2, s0, s1
509; CHECK-HARDFP-FULLFP16-NEXT:  vmov.f32  s0, s2
510}
511
512; 20. VMLS
513define float @VMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
514entry:
515  %0 = bitcast float %a.coerce to i32
516  %tmp.0.extract.trunc = trunc i32 %0 to i16
517  %1 = bitcast i16 %tmp.0.extract.trunc to half
518  %2 = bitcast float %b.coerce to i32
519  %tmp1.0.extract.trunc = trunc i32 %2 to i16
520  %3 = bitcast i16 %tmp1.0.extract.trunc to half
521  %4 = bitcast float %c.coerce to i32
522  %tmp2.0.extract.trunc = trunc i32 %4 to i16
523  %5 = bitcast i16 %tmp2.0.extract.trunc to half
524  %mul = fmul half %1, %3
525  %add = fsub half %5, %mul
526  %6 = bitcast half %add to i16
527  %tmp4.0.insert.ext = zext i16 %6 to i32
528  %7 = bitcast i32 %tmp4.0.insert.ext to float
529  ret float %7
530
531; CHECK-LABEL:                 VMLS:
532; CHECK-HARDFP-FULLFP16:       vmls.f16  s2, s0, s1
533; CHECK-HARDFP-FULLFP16-NEXT:  vmov.f32  s0, s2
534}
535
536; TODO: fix immediates.
537; 21. VMOV (between general-purpose register and half-precision register)
538
539; 22. VMOV (immediate)
540define i32 @movi(i32 %a.coerce) {
541entry:
542  %tmp.0.extract.trunc = trunc i32 %a.coerce to i16
543  %0 = bitcast i16 %tmp.0.extract.trunc to half
544  %add = fadd half %0, 0xHC000
545  %1 = bitcast half %add to i16
546  %tmp2.0.insert.ext = zext i16 %1 to i32
547  ret i32 %tmp2.0.insert.ext
548
549; CHECK-LABEL:            movi:
550; CHECK-HARDFP-FULLFP16:  vmov.f16  s0, #-2.000000e+00
551}
552
553; 23. VMUL
554define float @Mul(float %a.coerce, float %b.coerce) {
555entry:
556  %0 = bitcast float %a.coerce to i32
557  %tmp.0.extract.trunc = trunc i32 %0 to i16
558  %1 = bitcast i16 %tmp.0.extract.trunc to half
559  %2 = bitcast float %b.coerce to i32
560  %tmp1.0.extract.trunc = trunc i32 %2 to i16
561  %3 = bitcast i16 %tmp1.0.extract.trunc to half
562  %add = fmul half %1, %3
563  %4 = bitcast half %add to i16
564  %tmp4.0.insert.ext = zext i16 %4 to i32
565  %5 = bitcast i32 %tmp4.0.insert.ext to float
566  ret float %5
567
568; CHECK-LABEL:  Mul:
569
570; CHECK-SOFT:  bl  __aeabi_h2f
571; CHECK-SOFT:  bl  __aeabi_h2f
572; CHECK-SOFT:  bl  __aeabi_fmul
573; CHECK-SOFT:  bl  __aeabi_f2h
574
575; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
576; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
577; CHECK-SOFTFP-VFP3:  vmul.f32
578; CHECK-SOFTFP-VFP3:  bl  __aeabi_f2h
579
580; CHECK-SOFTFP-FP16-DAG:  vmov          [[S0:s[0-9]]], r0
581; CHECK-SOFTFP-FP16-DAG:  vmov          [[S2:s[0-9]]], r1
582; CHECK-SOFTFP-FP16-DAG:  vcvtb.f32.f16 [[S0]], [[S0]]
583; CHECK-SOFTFP-FP16-DAG:  vcvtb.f32.f16 [[S2]], [[S2]]
584; CHECK-SOFTFP-FP16:  vmul.f32      [[S0]], [[S0]], [[S2]]
585; CHECK-SOFTFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
586; CHECK-SOFTFP-FP16:  vmov  r0, s0
587
588; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S0:s[0-9]]], r1
589; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S2:s[0-9]]], r0
590; CHECK-SOFTFP-FULLFP16:       vmul.f16  [[S0]], [[S2]], [[S0]]
591; CHECK-SOFTFP-FULLFP16-NEXT:  vmov.f16  r0, s0
592
593; CHECK-HARDFP-VFP3:  vmov r{{.}}, s0
594; CHECK-HARDFP-VFP3:  vmov{{.*}}, s1
595; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
596; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
597; CHECK-HARDFP-VFP3:  vmul.f32
598; CHECK-HARDFP-VFP3:  bl  __aeabi_f2h
599; CHECK-HARDFP-VFP3:  vmov  s0, r0
600
601; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S2:s[0-9]]], s1
602; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S0:s[0-9]]], s0
603; CHECK-HARDFP-FP16:  vmul.f32  [[S0]], [[S0]], [[S2]]
604; CHECK-HARDFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
605
606; CHECK-HARDFP-FULLFP16:       vmul.f16  s0, s0, s1
607}
608
609; 24. VNEG
610define float @Neg(float %a.coerce) {
611entry:
612  %0 = bitcast float %a.coerce to i32
613  %tmp.0.extract.trunc = trunc i32 %0 to i16
614  %1 = bitcast i16 %tmp.0.extract.trunc to half
615  %2 = fsub half -0.000000e+00, %1
616  %3 = bitcast half %2 to i16
617  %tmp4.0.insert.ext = zext i16 %3 to i32
618  %4 = bitcast i32 %tmp4.0.insert.ext to float
619  ret float %4
620
621; CHECK-LABEL:                 Neg:
622; CHECK-HARDFP-FULLFP16:       vneg.f16  s0, s0
623}
624
625; 25. VNMLA
626define float @VNMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
627entry:
628  %0 = bitcast float %a.coerce to i32
629  %tmp.0.extract.trunc = trunc i32 %0 to i16
630  %1 = bitcast i16 %tmp.0.extract.trunc to half
631  %2 = bitcast float %b.coerce to i32
632  %tmp1.0.extract.trunc = trunc i32 %2 to i16
633  %3 = bitcast i16 %tmp1.0.extract.trunc to half
634  %4 = bitcast float %c.coerce to i32
635  %tmp2.0.extract.trunc = trunc i32 %4 to i16
636  %5 = bitcast i16 %tmp2.0.extract.trunc to half
637  %add = fmul half %1, %3
638  %add2 = fsub half -0.000000e+00, %add
639  %add3 = fsub half %add2, %5
640  %6 = bitcast half %add3 to i16
641  %tmp4.0.insert.ext = zext i16 %6 to i32
642  %7 = bitcast i32 %tmp4.0.insert.ext to float
643  ret float %7
644
645; CHECK-LABEL:            VNMLA:
646; CHECK-HARDFP-FULLFP16:  vnmla.f16 s2, s0, s1
647; CHECK-HARDFP-FULLFP16:  vmov.f32  s0, s2
648}
649
650; 26. VNMLS
651define float @VNMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
652entry:
653  %0 = bitcast float %a.coerce to i32
654  %tmp.0.extract.trunc = trunc i32 %0 to i16
655  %1 = bitcast i16 %tmp.0.extract.trunc to half
656  %2 = bitcast float %b.coerce to i32
657  %tmp1.0.extract.trunc = trunc i32 %2 to i16
658  %3 = bitcast i16 %tmp1.0.extract.trunc to half
659  %4 = bitcast float %c.coerce to i32
660  %tmp2.0.extract.trunc = trunc i32 %4 to i16
661  %5 = bitcast i16 %tmp2.0.extract.trunc to half
662  %add = fmul half %1, %3
663  %add2 = fsub half %add, %5
664  %6 = bitcast half %add2 to i16
665  %tmp4.0.insert.ext = zext i16 %6 to i32
666  %7 = bitcast i32 %tmp4.0.insert.ext to float
667  ret float %7
668
669; CHECK-LABEL:            VNMLS:
670; CHECK-HARDFP-FULLFP16:  vnmls.f16 s2, s0, s1
671; CHECK-HARDFP-FULLFP16:  vmov.f32  s0, s2
672}
673
674; 27. VNMUL
675define float @NMul(float %a.coerce, float %b.coerce) {
676entry:
677  %0 = bitcast float %a.coerce to i32
678  %tmp.0.extract.trunc = trunc i32 %0 to i16
679  %1 = bitcast i16 %tmp.0.extract.trunc to half
680  %2 = bitcast float %b.coerce to i32
681  %tmp1.0.extract.trunc = trunc i32 %2 to i16
682  %3 = bitcast i16 %tmp1.0.extract.trunc to half
683  %add = fmul half %1, %3
684  %add2 = fsub half -0.0, %add
685  %4 = bitcast half %add2 to i16
686  %tmp4.0.insert.ext = zext i16 %4 to i32
687  %5 = bitcast i32 %tmp4.0.insert.ext to float
688  ret float %5
689
690; CHECK-LABEL:                 NMul:
691; CHECK-HARDFP-FULLFP16:       vnmul.f16  s0, s0, s1
692}
693
694; 35. VSELEQ
695define half @select_cc1(half* %a0)  {
696  %1 = load half, half* %a0
697  %2 = fcmp nsz oeq half %1, 0xH0001
698  %3 = select i1 %2, half 0xHC000, half 0xH0002
699  ret half %3
700
701; CHECK-LABEL:                 select_cc1:
702
703; CHECK-HARDFP-FULLFP16:       vcmp.f16 s6, s0
704; CHECK-HARDFP-FULLFP16-NEXT:  vmrs APSR_nzcv, fpscr
705; CHECK-HARDFP-FULLFP16:       vseleq.f16  s0, s{{.}}, s{{.}}
706
707; CHECK-SOFTFP-FP16-A32:       vcmp.f32 s6, s0
708; CHECK-SOFTFP-FP16-A32-NEXT:  vmrs APSR_nzcv, fpscr
709; CHECK-SOFTFP-FP16-A32-NEXT:  vmoveq.f32 s{{.}}, s{{.}}
710
711; CHECK-SOFTFP-FP16-T32:       vcmp.f32 s6, s0
712; CHECK-SOFTFP-FP16-T32:       vmrs APSR_nzcv, fpscr
713; CHECK-SOFTFP-FP16-T32:       it eq
714; CHECK-SOFTFP-FP16-T32:       vmoveq.f32 s{{.}}, s{{.}}
715}
716
717; FIXME: more tests need to be added for VSELGE and VSELGT.
718; That is, more combinations of immediate operands that can or can't
719; be encoded as an FP16 immediate need to be added here.
720;
721; 36. VSELGE
722define half @select_cc_ge1(half* %a0)  {
723  %1 = load half, half* %a0
724  %2 = fcmp nsz oge half %1, 0xH0001
725  %3 = select i1 %2, half 0xHC000, half 0xH0002
726  ret half %3
727
728; CHECK-LABEL:                 select_cc_ge1:
729
730; CHECK-HARDFP-FULLFP16:       vcmp.f16 s6, s0
731; CHECK-HARDFP-FULLFP16-NEXT:  vmrs APSR_nzcv, fpscr
732; CHECK-HARDFP-FULLFP16-NEXT:  vselge.f16 s0, s{{.}}, s{{.}}
733
734; CHECK-SOFTFP-FP16-A32:       vcmp.f32 s6, s0
735; CHECK-SOFTFP-FP16-A32-NEXT:  vmrs APSR_nzcv, fpscr
736; CHECK-SOFTFP-FP16-A32-NEXT:  vmovge.f32 s{{.}}, s{{.}}
737
738; CHECK-SOFTFP-FP16-T32:       vcmp.f32 s6, s0
739; CHECK-SOFTFP-FP16-T32-NEXT:  vmrs APSR_nzcv, fpscr
740; CHECK-SOFTFP-FP16-T32-NEXT:  it ge
741; CHECK-SOFTFP-FP16-T32-NEXT:  vmovge.f32 s{{.}}, s{{.}}
742}
743
744define half @select_cc_ge2(half* %a0)  {
745  %1 = load half, half* %a0
746  %2 = fcmp nsz ole half %1, 0xH0001
747  %3 = select i1 %2, half 0xHC000, half 0xH0002
748  ret half %3
749
750; CHECK-LABEL:                 select_cc_ge2:
751
752; CHECK-HARDFP-FULLFP16:       vcmp.f16 s0, s6
753; CHECK-HARDFP-FULLFP16-NEXT:  vmrs APSR_nzcv, fpscr
754; CHECK-HARDFP-FULLFP16-NEXT:  vselge.f16 s0, s{{.}}, s{{.}}
755
756; CHECK-SOFTFP-FP16-A32:       vcmp.f32 s6, s0
757; CHECK-SOFTFP-FP16-A32-NEXT:  vmrs APSR_nzcv, fpscr
758; CHECK-SOFTFP-FP16-A32-NEXT:  vmovls.f32 s{{.}}, s{{.}}
759
760; CHECK-SOFTFP-FP16-T32:       vcmp.f32 s6, s0
761; CHECK-SOFTFP-FP16-T32-NEXT:  vmrs APSR_nzcv, fpscr
762; CHECK-SOFTFP-FP16-T32-NEXT:  it ls
763; CHECK-SOFTFP-FP16-T32-NEXT:  vmovls.f32 s{{.}}, s{{.}}
764}
765
766define half @select_cc_ge3(half* %a0)  {
767  %1 = load half, half* %a0
768  %2 = fcmp nsz ugt half %1, 0xH0001
769  %3 = select i1 %2, half 0xHC000, half 0xH0002
770  ret half %3
771
772; CHECK-LABEL:                 select_cc_ge3:
773
774; CHECK-HARDFP-FULLFP16:       vcmp.f16 s0, s6
775; CHECK-HARDFP-FULLFP16-NEXT:  vmrs APSR_nzcv, fpscr
776; CHECK-HARDFP-FULLFP16-NEXT:  vselge.f16 s0, s{{.}}, s{{.}}
777
778; CHECK-SOFTFP-FP16-A32:       vcmp.f32 s6, s0
779; CHECK-SOFTFP-FP16-A32-NEXT:  vmrs APSR_nzcv, fpscr
780; CHECK-SOFTFP-FP16-A32-NEXT:  vmovhi.f32 s{{.}}, s{{.}}
781
782; CHECK-SOFTFP-FP16-T32:       vcmp.f32 s6, s0
783; CHECK-SOFTFP-FP16-T32-NEXT:  vmrs APSR_nzcv, fpscr
784; CHECK-SOFTFP-FP16-T32-NEXT:  it hi
785; CHECK-SOFTFP-FP16-T32-NEXT:  vmovhi.f32 s{{.}}, s{{.}}
786}
787
788define half @select_cc_ge4(half* %a0)  {
789  %1 = load half, half* %a0
790  %2 = fcmp nsz ult half %1, 0xH0001
791  %3 = select i1 %2, half 0xHC000, half 0xH0002
792  ret half %3
793
794; CHECK-LABEL:                 select_cc_ge4:
795
796; CHECK-HARDFP-FULLFP16:       vcmp.f16 s6, s0
797; CHECK-HARDFP-FULLFP16-NEXT:  vmrs APSR_nzcv, fpscr
798; CHECK-HARDFP-FULLFP16-NEXT:  vselge.f16 s0, s{{.}}, s{{.}}
799
800; CHECK-SOFTFP-FP16-A32:       vcmp.f32 s6, s0
801; CHECK-SOFTFP-FP16-A32-NEXT:  vmrs APSR_nzcv, fpscr
802; CHECK-SOFTFP-FP16-A32-NEXT:  vmovlt.f32 s{{.}}, s{{.}}
803
804; CHECK-SOFTFP-FP16-T32:       vcmp.f32 s6, s0
805; CHECK-SOFTFP-FP16-T32-NEXT:  vmrs APSR_nzcv, fpscr
806; CHECK-SOFTFP-FP16-T32-NEXT:  it lt
807; CHECK-SOFTFP-FP16-T32-NEXT:  vmovlt.f32 s{{.}}, s{{.}}
808}
809
810; 37. VSELGT
811define half @select_cc_gt1(half* %a0)  {
812  %1 = load half, half* %a0
813  %2 = fcmp nsz ogt half %1, 0xH0001
814  %3 = select i1 %2, half 0xHC000, half 0xH0002
815  ret half %3
816
817; CHECK-LABEL:                 select_cc_gt1:
818
819; CHECK-HARDFP-FULLFP16:       vcmp.f16 s6, s0
820; CHECK-HARDFP-FULLFP16-NEXT:  vmrs APSR_nzcv, fpscr
821; CHECK-HARDFP-FULLFP16-NEXT:  vselgt.f16  s0, s{{.}}, s{{.}}
822
823; CHECK-SOFTFP-FP16-A32:       vcmp.f32 s6, s0
824; CHECK-SOFTFP-FP16-A32-NEXT:  vmrs APSR_nzcv, fpscr
825; CHECK-SOFTFP-FP16-A32-NEXT:  vmovgt.f32 s{{.}}, s{{.}}
826
827; CHECK-SOFTFP-FP16-T32:       vcmp.f32 s6, s0
828; CHECK-SOFTFP-FP16-T32-NEXT:  vmrs APSR_nzcv, fpscr
829; CHECK-SOFTFP-FP16-T32-NEXT:  it gt
830; CHECK-SOFTFP-FP16-T32-NEXT:  vmovgt.f32 s{{.}}, s{{.}}
831}
832
833define half @select_cc_gt2(half* %a0)  {
834  %1 = load half, half* %a0
835  %2 = fcmp nsz uge half %1, 0xH0001
836  %3 = select i1 %2, half 0xHC000, half 0xH0002
837  ret half %3
838
839; CHECK-LABEL:                 select_cc_gt2:
840
841; CHECK-HARDFP-FULLFP16:       vcmp.f16 s0, s6
842; CHECK-HARDFP-FULLFP16-NEXT:  vmrs  APSR_nzcv, fpscr
843; CHECK-HARDFP-FULLFP16-NEXT:  vselgt.f16  s0, s{{.}}, s{{.}}
844
845; CHECK-SOFTFP-FP16-A32:       vcmp.f32 s6, s0
846; CHECK-SOFTFP-FP16-A32-NEXT:  vmrs APSR_nzcv, fpscr
847; CHECK-SOFTFP-FP16-A32-NEXT:  vmovpl.f32 s{{.}}, s{{.}}
848
849; CHECK-SOFTFP-FP16-T32:       vcmp.f32 s6, s0
850; CHECK-SOFTFP-FP16-T32-NEXT:  vmrs APSR_nzcv, fpscr
851; CHECK-SOFTFP-FP16-T32-NEXT:  it pl
852; CHECK-SOFTFP-FP16-T32-NEXT:  vmovpl.f32 s{{.}}, s{{.}}
853}
854
855define half @select_cc_gt3(half* %a0)  {
856  %1 = load half, half* %a0
857  %2 = fcmp nsz ule half %1, 0xH0001
858  %3 = select i1 %2, half 0xHC000, half 0xH0002
859  ret half %3
860
861; CHECK-LABEL:                 select_cc_gt3:
862
863; CHECK-HARDFP-FULLFP16:       vcmp.f16 s6, s0
864; CHECK-HARDFP-FULLFP16-NEXT:  vmrs  APSR_nzcv, fpscr
865; CHECK-HARDFP-FULLFP16-NEXT:  vselgt.f16  s0, s{{.}}, s{{.}}
866
867; CHECK-SOFTFP-FP16-A32:       vcmp.f32 s6, s0
868; CHECK-SOFTFP-FP16-A32-NEXT:  vmrs APSR_nzcv, fpscr
869; CHECK-SOFTFP-FP16-A32-NEXT:  vmovle.f32 s{{.}}, s{{.}}
870
871; CHECK-SOFTFP-FP16-T32:       vcmp.f32 s6, s0
872; CHECK-SOFTFP-FP16-T32-NEXT:  vmrs APSR_nzcv, fpscr
873; CHECK-SOFTFP-FP16-T32-NEXT:  it le
874; CHECK-SOFTFP-FP16-T32-NEXT:  vmovle.f32 s{{.}}, s{{.}}
875}
876
877define half @select_cc_gt4(half* %a0)  {
878  %1 = load half, half* %a0
879  %2 = fcmp nsz olt half %1, 0xH0001
880  %3 = select i1 %2, half 0xHC000, half 0xH0002
881  ret half %3
882
883; CHECK-LABEL:                 select_cc_gt4:
884
885; CHECK-HARDFP-FULLFP16:       vcmp.f16 s0, s6
886; CHECK-HARDFP-FULLFP16-NEXT:  vmrs  APSR_nzcv, fpscr
887; CHECK-HARDFP-FULLFP16-NEXT:  vselgt.f16  s0, s{{.}}, s{{.}}
888
889; CHECK-SOFTFP-FP16-A32:       vcmp.f32 s6, s0
890; CHECK-SOFTFP-FP16-A32-NEXT:  vmrs APSR_nzcv, fpscr
891; CHECK-SOFTFP-FP16-A32-NEXT:  vmovmi.f32 s{{.}}, s{{.}}
892
893; CHECK-SOFTFP-FP16-T32:       vcmp.f32 s6, s0
894; CHECK-SOFTFP-FP16-T32-NEXT:  vmrs APSR_nzcv, fpscr
895; CHECK-SOFTFP-FP16-T32-NEXT:  it mi
896; CHECK-SOFTFP-FP16-T32-NEXT:  vmovmi.f32 s{{.}}, s{{.}}
897}
898
899; 38. VSELVS
900define float @select_cc4(float %a.coerce) {
901entry:
902  %0 = bitcast float %a.coerce to i32
903  %tmp.0.extract.trunc = trunc i32 %0 to i16
904  %1 = bitcast i16 %tmp.0.extract.trunc to half
905
906  %2 = fcmp nsz ueq half %1, 0xH0001
907  %3 = select i1 %2, half 0xHC000, half 0xH0002
908
909  %4 = bitcast half %3 to i16
910  %tmp4.0.insert.ext = zext i16 %4 to i32
911  %5 = bitcast i32 %tmp4.0.insert.ext to float
912  ret float %5
913
914; CHECK-LABEL:                 select_cc4:
915
916; CHECK-HARDFP-FULLFP16:       vldr.16	[[S2:s[0-9]]], .LCPI{{.*}}
917; CHECK-HARDFP-FULLFP16:       vldr.16	[[S4:s[0-9]]], .LCPI{{.*}}
918; CHECK-HARDFP-FULLFP16:       vmov.f16 [[S6:s[0-9]]], #-2.000000e+00
919; CHECK-HARDFP-FULLFP16:       vcmp.f16	s0, [[S2]]
920; CHECK-HARDFP-FULLFP16-NEXT:  vmrs	APSR_nzcv, fpscr
921; CHECK-HARDFP-FULLFP16-NEXT:  vseleq.f16	[[S0:s[0-9]]], [[S6]], [[S4]]
922; CHECK-HARDFP-FULLFP16-NEXT:  vselvs.f16	s0, [[S6]], [[S0]]
923
924; CHECK-SOFTFP-FP16-A32:       vmov	[[S6:s[0-9]]], r0
925; CHECK-SOFTFP-FP16-A32:       vldr	s0, .LCP{{.*}}
926; CHECK-SOFTFP-FP16-A32:       vcvtb.f32.f16	[[S6]], [[S6]]
927; CHECK-SOFTFP-FP16-A32:       vmov.f32	[[S2:s[0-9]]], #-2.000000e+00
928; CHECK-SOFTFP-FP16-A32:       vcmp.f32	[[S6]], s0
929; CHECK-SOFTFP-FP16-A32:       vldr	[[S4:s[0-9]]], .LCPI{{.*}}
930; CHECK-SOFTFP-FP16-A32:       vmrs	APSR_nzcv, fpscr
931; CHECK-SOFTFP-FP16-A32:       vmoveq.f32	[[S4]], [[S2]]
932; CHECK-SOFTFP-FP16-A32-NEXT:  vmovvs.f32	[[S4]], [[S2]]
933; CHECK-SOFTFP-FP16-A32-NEXT:  vcvtb.f16.f32 s0, [[S4]]
934
935; CHECK-SOFTFP-FP16-T32:       vmov	[[S6:s[0-9]]], r0
936; CHECK-SOFTFP-FP16-T32:       vldr	s0, .LCP{{.*}}
937; CHECK-SOFTFP-FP16-T32:       vcvtb.f32.f16	[[S6]], [[S6]]
938; CHECK-SOFTFP-FP16-T32:       vldr	[[S4:s[0-9]]], .LCPI{{.*}}
939; CHECK-SOFTFP-FP16-T32:       vcmp.f32	[[S6]], s0
940; CHECK-SOFTFP-FP16-T32:       vmov.f32	[[S2:s[0-9]]], #-2.000000e+00
941; CHECK-SOFTFP-FP16-T32:       vmrs	APSR_nzcv, fpscr
942; CHECK-SOFTFP-FP16-T32:       it eq
943; CHECK-SOFTFP-FP16-T32:       vmoveq.f32	[[S4]], [[S2]]
944; CHECK-SOFTFP-FP16-T32:       it vs
945; CHECK-SOFTFP-FP16-T32-NEXT:  vmovvs.f32	[[S4]], [[S2]]
946; CHECK-SOFTFP-FP16-T32-NEXT:  vcvtb.f16.f32 s0, [[S4]]
947}
948
949; 40. VSUB
950define float @Sub(float %a.coerce, float %b.coerce) {
951entry:
952  %0 = bitcast float %a.coerce to i32
953  %tmp.0.extract.trunc = trunc i32 %0 to i16
954  %1 = bitcast i16 %tmp.0.extract.trunc to half
955  %2 = bitcast float %b.coerce to i32
956  %tmp1.0.extract.trunc = trunc i32 %2 to i16
957  %3 = bitcast i16 %tmp1.0.extract.trunc to half
958  %add = fsub half %1, %3
959  %4 = bitcast half %add to i16
960  %tmp4.0.insert.ext = zext i16 %4 to i32
961  %5 = bitcast i32 %tmp4.0.insert.ext to float
962  ret float %5
963
964; CHECK-LABEL:  Sub:
965
966; CHECK-SOFT:  bl  __aeabi_h2f
967; CHECK-SOFT:  bl  __aeabi_h2f
968; CHECK-SOFT:  bl  __aeabi_fsub
969; CHECK-SOFT:  bl  __aeabi_f2h
970
971; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
972; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
973; CHECK-SOFTFP-VFP3:  vsub.f32
974; CHECK-SOFTFP-VFP3:  bl  __aeabi_f2h
975
976; CHECK-SOFTFP-FP16-DAG:  vmov          [[S0:s[0-9]]], r0
977; CHECK-SOFTFP-FP16-DAG:  vmov          [[S2:s[0-9]]], r1
978; CHECK-SOFTFP-FP16-DAG:  vcvtb.f32.f16 [[S0]], [[S0]]
979; CHECK-SOFTFP-FP16-DAG:  vcvtb.f32.f16 [[S2]], [[S2]]
980; CHECK-SOFTFP-FP16:  vsub.f32      [[S0]], [[S0]], [[S2]]
981; CHECK-SOFTFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
982; CHECK-SOFTFP-FP16:  vmov  r0, s0
983
984; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S0:s[0-9]]], r1
985; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S2:s[0-9]]], r0
986; CHECK-SOFTFP-FULLFP16:       vsub.f16  [[S0]], [[S2]], [[S0]]
987; CHECK-SOFTFP-FULLFP16-NEXT:  vmov.f16  r0, s0
988
989; CHECK-HARDFP-VFP3:  vmov r{{.}}, s0
990; CHECK-HARDFP-VFP3:  vmov{{.*}}, s1
991; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
992; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
993; CHECK-HARDFP-VFP3:  vsub.f32
994; CHECK-HARDFP-VFP3:  bl  __aeabi_f2h
995; CHECK-HARDFP-VFP3:  vmov  s0, r0
996
997; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S2:s[0-9]]], s1
998; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S0:s[0-9]]], s0
999; CHECK-HARDFP-FP16:  vsub.f32  [[S0]], [[S0]], [[S2]]
1000; CHECK-HARDFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
1001
1002; CHECK-HARDFP-FULLFP16:       vsub.f16  s0, s0, s1
1003}
1004
1005; Check for VSTRH with a FCONSTH, this checks that addressing mode
1006; AddrMode5FP16 is supported.
1007define i32 @ThumbAddrMode5FP16(i32 %A.coerce) {
1008entry:
1009  %S = alloca half, align 2
1010  %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
1011  %0 = bitcast i16 %tmp.0.extract.trunc to half
1012  %S.0.S.0..sroa_cast = bitcast half* %S to i8*
1013  store volatile half 0xH3C00, half* %S, align 2
1014  %S.0.S.0. = load volatile half, half* %S, align 2
1015  %add = fadd half %S.0.S.0., %0
1016  %1 = bitcast half %add to i16
1017  %tmp2.0.insert.ext = zext i16 %1 to i32
1018  ret i32 %tmp2.0.insert.ext
1019
1020; CHECK-LABEL:            ThumbAddrMode5FP16
1021
1022; CHECK-SOFTFP-FULLFP16:  vmov.f16    [[S0:s[0-9]]], #1.000000e+00
1023; CHECK-SOFTFP-FULLFP16:  vstr.16     [[S0]], [sp, #{{.}}]
1024; CHECK-SOFTFP-FULLFP16:  vmov.f16    [[S0_2:s[0-9]]], r0
1025; CHECK-SOFTFP-FULLFP16:  vldr.16     [[S2:s[0-9]]], [sp, #{{.}}]
1026; CHECK-SOFTFP-FULLFP16:  vadd.f16    s{{.}}, [[S2]], [[S0_2]]
1027}
1028
1029; Test function calls to check store/load reg to/from stack
1030define i32 @fn1() {
1031entry:
1032  %coerce = alloca half, align 2
1033  %tmp2 = alloca i32, align 4
1034  store half 0xH7C00, half* %coerce, align 2
1035  %0 = load i32, i32* %tmp2, align 4
1036  %call = call i32 bitcast (i32 (...)* @fn2 to i32 (i32)*)(i32 %0)
1037  store half 0xH7C00, half* %coerce, align 2
1038  %1 = load i32, i32* %tmp2, align 4
1039  %call3 = call i32 bitcast (i32 (...)* @fn3 to i32 (i32)*)(i32 %1)
1040  ret i32 %call3
1041
1042; CHECK-SPILL-RELOAD-LABEL: fn1:
1043; CHECK-SPILL-RELOAD:       vstr.16 s0, [sp, #{{.}}]  @ 2-byte Spill
1044; CHECK-SPILL-RELOAD:  bl  fn2
1045; CHECK-SPILL-RELOAD-NEXT:  vldr.16 s0, [sp, #{{.}}]  @ 2-byte Reload
1046}
1047
1048declare dso_local i32 @fn2(...)
1049declare dso_local i32 @fn3(...)
1050