1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s --check-prefix=X32-NOF16C
3; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s --check-prefix=X32-F16C
4; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s --check-prefix=X64-NOF16C
5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=f16c | FileCheck %s --check-prefix=X64-F16C
6
7@a = global half 0xH0000, align 2
8@b = global half 0xH0000, align 2
9@c = global half 0xH0000, align 2
10
11define float @half_to_float() strictfp {
12; X32-NOF16C-LABEL: half_to_float:
13; X32-NOF16C:       ## %bb.0:
14; X32-NOF16C-NEXT:    subl $12, %esp
15; X32-NOF16C-NEXT:    .cfi_def_cfa_offset 16
16; X32-NOF16C-NEXT:    movzwl _a, %eax
17; X32-NOF16C-NEXT:    movl %eax, (%esp)
18; X32-NOF16C-NEXT:    calll ___extendhfsf2
19; X32-NOF16C-NEXT:    addl $12, %esp
20; X32-NOF16C-NEXT:    retl
21;
22; X32-F16C-LABEL: half_to_float:
23; X32-F16C:       ## %bb.0:
24; X32-F16C-NEXT:    subl $12, %esp
25; X32-F16C-NEXT:    .cfi_def_cfa_offset 16
26; X32-F16C-NEXT:    movzwl _a, %eax
27; X32-F16C-NEXT:    movl %eax, (%esp)
28; X32-F16C-NEXT:    calll ___extendhfsf2
29; X32-F16C-NEXT:    addl $12, %esp
30; X32-F16C-NEXT:    retl
31;
32; X64-NOF16C-LABEL: half_to_float:
33; X64-NOF16C:       ## %bb.0:
34; X64-NOF16C-NEXT:    pushq %rax
35; X64-NOF16C-NEXT:    .cfi_def_cfa_offset 16
36; X64-NOF16C-NEXT:    movzwl _a(%rip), %edi
37; X64-NOF16C-NEXT:    callq ___extendhfsf2
38; X64-NOF16C-NEXT:    popq %rax
39; X64-NOF16C-NEXT:    retq
40;
41; X64-F16C-LABEL: half_to_float:
42; X64-F16C:       ## %bb.0:
43; X64-F16C-NEXT:    movzwl _a(%rip), %eax
44; X64-F16C-NEXT:    vmovd %eax, %xmm0
45; X64-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
46; X64-F16C-NEXT:    retq
47  %1 = load half, half* @a, align 2
48  %2 = tail call float @llvm.experimental.constrained.fpext.f32.f16(half %1, metadata !"fpexcept.strict") #0
49  ret float %2
50}
51
52define double @half_to_double() strictfp {
53; X32-NOF16C-LABEL: half_to_double:
54; X32-NOF16C:       ## %bb.0:
55; X32-NOF16C-NEXT:    subl $12, %esp
56; X32-NOF16C-NEXT:    .cfi_def_cfa_offset 16
57; X32-NOF16C-NEXT:    movzwl _a, %eax
58; X32-NOF16C-NEXT:    movl %eax, (%esp)
59; X32-NOF16C-NEXT:    calll ___extendhfsf2
60; X32-NOF16C-NEXT:    addl $12, %esp
61; X32-NOF16C-NEXT:    retl
62;
63; X32-F16C-LABEL: half_to_double:
64; X32-F16C:       ## %bb.0:
65; X32-F16C-NEXT:    subl $12, %esp
66; X32-F16C-NEXT:    .cfi_def_cfa_offset 16
67; X32-F16C-NEXT:    movzwl _a, %eax
68; X32-F16C-NEXT:    movl %eax, (%esp)
69; X32-F16C-NEXT:    calll ___extendhfsf2
70; X32-F16C-NEXT:    addl $12, %esp
71; X32-F16C-NEXT:    retl
72;
73; X64-NOF16C-LABEL: half_to_double:
74; X64-NOF16C:       ## %bb.0:
75; X64-NOF16C-NEXT:    pushq %rax
76; X64-NOF16C-NEXT:    .cfi_def_cfa_offset 16
77; X64-NOF16C-NEXT:    movzwl _a(%rip), %edi
78; X64-NOF16C-NEXT:    callq ___extendhfsf2
79; X64-NOF16C-NEXT:    cvtss2sd %xmm0, %xmm0
80; X64-NOF16C-NEXT:    popq %rax
81; X64-NOF16C-NEXT:    retq
82;
83; X64-F16C-LABEL: half_to_double:
84; X64-F16C:       ## %bb.0:
85; X64-F16C-NEXT:    movzwl _a(%rip), %eax
86; X64-F16C-NEXT:    vmovd %eax, %xmm0
87; X64-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
88; X64-F16C-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
89; X64-F16C-NEXT:    retq
90  %1 = load half, half* @a, align 2
91  %2 = tail call double @llvm.experimental.constrained.fpext.f64.f16(half %1, metadata !"fpexcept.strict") #0
92  ret double %2
93}
94
95define x86_fp80 @half_to_fp80() strictfp {
96; X32-NOF16C-LABEL: half_to_fp80:
97; X32-NOF16C:       ## %bb.0:
98; X32-NOF16C-NEXT:    subl $12, %esp
99; X32-NOF16C-NEXT:    .cfi_def_cfa_offset 16
100; X32-NOF16C-NEXT:    movzwl _a, %eax
101; X32-NOF16C-NEXT:    movl %eax, (%esp)
102; X32-NOF16C-NEXT:    calll ___extendhfsf2
103; X32-NOF16C-NEXT:    addl $12, %esp
104; X32-NOF16C-NEXT:    retl
105;
106; X32-F16C-LABEL: half_to_fp80:
107; X32-F16C:       ## %bb.0:
108; X32-F16C-NEXT:    subl $12, %esp
109; X32-F16C-NEXT:    .cfi_def_cfa_offset 16
110; X32-F16C-NEXT:    movzwl _a, %eax
111; X32-F16C-NEXT:    movl %eax, (%esp)
112; X32-F16C-NEXT:    calll ___extendhfsf2
113; X32-F16C-NEXT:    addl $12, %esp
114; X32-F16C-NEXT:    retl
115;
116; X64-NOF16C-LABEL: half_to_fp80:
117; X64-NOF16C:       ## %bb.0:
118; X64-NOF16C-NEXT:    pushq %rax
119; X64-NOF16C-NEXT:    .cfi_def_cfa_offset 16
120; X64-NOF16C-NEXT:    movzwl _a(%rip), %edi
121; X64-NOF16C-NEXT:    callq ___extendhfsf2
122; X64-NOF16C-NEXT:    movss %xmm0, {{[0-9]+}}(%rsp)
123; X64-NOF16C-NEXT:    flds {{[0-9]+}}(%rsp)
124; X64-NOF16C-NEXT:    wait
125; X64-NOF16C-NEXT:    popq %rax
126; X64-NOF16C-NEXT:    retq
127;
128; X64-F16C-LABEL: half_to_fp80:
129; X64-F16C:       ## %bb.0:
130; X64-F16C-NEXT:    movzwl _a(%rip), %eax
131; X64-F16C-NEXT:    vmovd %eax, %xmm0
132; X64-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
133; X64-F16C-NEXT:    vmovss %xmm0, -{{[0-9]+}}(%rsp)
134; X64-F16C-NEXT:    flds -{{[0-9]+}}(%rsp)
135; X64-F16C-NEXT:    wait
136; X64-F16C-NEXT:    retq
137  %1 = load half, half* @a, align 2
138  %2 = tail call x86_fp80 @llvm.experimental.constrained.fpext.f80.f16(half %1, metadata !"fpexcept.strict") #0
139  ret x86_fp80 %2
140}
141
142define void @float_to_half(float %0) strictfp {
143; X32-NOF16C-LABEL: float_to_half:
144; X32-NOF16C:       ## %bb.0:
145; X32-NOF16C-NEXT:    subl $12, %esp
146; X32-NOF16C-NEXT:    .cfi_def_cfa_offset 16
147; X32-NOF16C-NEXT:    flds {{[0-9]+}}(%esp)
148; X32-NOF16C-NEXT:    fstps (%esp)
149; X32-NOF16C-NEXT:    wait
150; X32-NOF16C-NEXT:    calll ___truncsfhf2
151; X32-NOF16C-NEXT:    movw %ax, _a
152; X32-NOF16C-NEXT:    addl $12, %esp
153; X32-NOF16C-NEXT:    retl
154;
155; X32-F16C-LABEL: float_to_half:
156; X32-F16C:       ## %bb.0:
157; X32-F16C-NEXT:    subl $12, %esp
158; X32-F16C-NEXT:    .cfi_def_cfa_offset 16
159; X32-F16C-NEXT:    flds {{[0-9]+}}(%esp)
160; X32-F16C-NEXT:    fstps (%esp)
161; X32-F16C-NEXT:    wait
162; X32-F16C-NEXT:    calll ___truncsfhf2
163; X32-F16C-NEXT:    movw %ax, _a
164; X32-F16C-NEXT:    addl $12, %esp
165; X32-F16C-NEXT:    retl
166;
167; X64-NOF16C-LABEL: float_to_half:
168; X64-NOF16C:       ## %bb.0:
169; X64-NOF16C-NEXT:    pushq %rax
170; X64-NOF16C-NEXT:    .cfi_def_cfa_offset 16
171; X64-NOF16C-NEXT:    callq ___truncsfhf2
172; X64-NOF16C-NEXT:    movw %ax, _a(%rip)
173; X64-NOF16C-NEXT:    popq %rax
174; X64-NOF16C-NEXT:    retq
175;
176; X64-F16C-LABEL: float_to_half:
177; X64-F16C:       ## %bb.0:
178; X64-F16C-NEXT:    vxorps %xmm1, %xmm1, %xmm1
179; X64-F16C-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
180; X64-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
181; X64-F16C-NEXT:    vpextrw $0, %xmm0, _a(%rip)
182; X64-F16C-NEXT:    retq
183  %2 = tail call half @llvm.experimental.constrained.fptrunc.f16.f32(float %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
184  store half %2, half* @a, align 2
185  ret void
186}
187
188define void @double_to_half(double %0) strictfp {
189; X32-NOF16C-LABEL: double_to_half:
190; X32-NOF16C:       ## %bb.0:
191; X32-NOF16C-NEXT:    subl $12, %esp
192; X32-NOF16C-NEXT:    .cfi_def_cfa_offset 16
193; X32-NOF16C-NEXT:    fldl {{[0-9]+}}(%esp)
194; X32-NOF16C-NEXT:    fstpl (%esp)
195; X32-NOF16C-NEXT:    wait
196; X32-NOF16C-NEXT:    calll ___truncdfhf2
197; X32-NOF16C-NEXT:    movw %ax, _a
198; X32-NOF16C-NEXT:    addl $12, %esp
199; X32-NOF16C-NEXT:    retl
200;
201; X32-F16C-LABEL: double_to_half:
202; X32-F16C:       ## %bb.0:
203; X32-F16C-NEXT:    subl $12, %esp
204; X32-F16C-NEXT:    .cfi_def_cfa_offset 16
205; X32-F16C-NEXT:    fldl {{[0-9]+}}(%esp)
206; X32-F16C-NEXT:    fstpl (%esp)
207; X32-F16C-NEXT:    wait
208; X32-F16C-NEXT:    calll ___truncdfhf2
209; X32-F16C-NEXT:    movw %ax, _a
210; X32-F16C-NEXT:    addl $12, %esp
211; X32-F16C-NEXT:    retl
212;
213; X64-NOF16C-LABEL: double_to_half:
214; X64-NOF16C:       ## %bb.0:
215; X64-NOF16C-NEXT:    pushq %rax
216; X64-NOF16C-NEXT:    .cfi_def_cfa_offset 16
217; X64-NOF16C-NEXT:    callq ___truncdfhf2
218; X64-NOF16C-NEXT:    movw %ax, _a(%rip)
219; X64-NOF16C-NEXT:    popq %rax
220; X64-NOF16C-NEXT:    retq
221;
222; X64-F16C-LABEL: double_to_half:
223; X64-F16C:       ## %bb.0:
224; X64-F16C-NEXT:    pushq %rax
225; X64-F16C-NEXT:    .cfi_def_cfa_offset 16
226; X64-F16C-NEXT:    callq ___truncdfhf2
227; X64-F16C-NEXT:    movw %ax, _a(%rip)
228; X64-F16C-NEXT:    popq %rax
229; X64-F16C-NEXT:    retq
230  %2 = tail call half @llvm.experimental.constrained.fptrunc.f16.f64(double %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
231  store half %2, half* @a, align 2
232  ret void
233}
234
235define void @fp80_to_half(x86_fp80 %0) strictfp {
236; X32-NOF16C-LABEL: fp80_to_half:
237; X32-NOF16C:       ## %bb.0:
238; X32-NOF16C-NEXT:    subl $28, %esp
239; X32-NOF16C-NEXT:    .cfi_def_cfa_offset 32
240; X32-NOF16C-NEXT:    fldt {{[0-9]+}}(%esp)
241; X32-NOF16C-NEXT:    fstpt (%esp)
242; X32-NOF16C-NEXT:    wait
243; X32-NOF16C-NEXT:    calll ___truncxfhf2
244; X32-NOF16C-NEXT:    movw %ax, _a
245; X32-NOF16C-NEXT:    addl $28, %esp
246; X32-NOF16C-NEXT:    retl
247;
248; X32-F16C-LABEL: fp80_to_half:
249; X32-F16C:       ## %bb.0:
250; X32-F16C-NEXT:    subl $28, %esp
251; X32-F16C-NEXT:    .cfi_def_cfa_offset 32
252; X32-F16C-NEXT:    fldt {{[0-9]+}}(%esp)
253; X32-F16C-NEXT:    fstpt (%esp)
254; X32-F16C-NEXT:    wait
255; X32-F16C-NEXT:    calll ___truncxfhf2
256; X32-F16C-NEXT:    movw %ax, _a
257; X32-F16C-NEXT:    addl $28, %esp
258; X32-F16C-NEXT:    retl
259;
260; X64-NOF16C-LABEL: fp80_to_half:
261; X64-NOF16C:       ## %bb.0:
262; X64-NOF16C-NEXT:    subq $24, %rsp
263; X64-NOF16C-NEXT:    .cfi_def_cfa_offset 32
264; X64-NOF16C-NEXT:    fldt {{[0-9]+}}(%rsp)
265; X64-NOF16C-NEXT:    fstpt (%rsp)
266; X64-NOF16C-NEXT:    wait
267; X64-NOF16C-NEXT:    callq ___truncxfhf2
268; X64-NOF16C-NEXT:    movw %ax, _a(%rip)
269; X64-NOF16C-NEXT:    addq $24, %rsp
270; X64-NOF16C-NEXT:    retq
271;
272; X64-F16C-LABEL: fp80_to_half:
273; X64-F16C:       ## %bb.0:
274; X64-F16C-NEXT:    subq $24, %rsp
275; X64-F16C-NEXT:    .cfi_def_cfa_offset 32
276; X64-F16C-NEXT:    fldt {{[0-9]+}}(%rsp)
277; X64-F16C-NEXT:    fstpt (%rsp)
278; X64-F16C-NEXT:    wait
279; X64-F16C-NEXT:    callq ___truncxfhf2
280; X64-F16C-NEXT:    movw %ax, _a(%rip)
281; X64-F16C-NEXT:    addq $24, %rsp
282; X64-F16C-NEXT:    retq
283  %2 = tail call half @llvm.experimental.constrained.fptrunc.f16.f80(x86_fp80 %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
284  store half %2, half* @a, align 2
285  ret void
286}
287
288define void @add() strictfp {
289; X32-NOF16C-LABEL: add:
290; X32-NOF16C:       ## %bb.0:
291; X32-NOF16C-NEXT:    subl $12, %esp
292; X32-NOF16C-NEXT:    .cfi_def_cfa_offset 16
293; X32-NOF16C-NEXT:    movzwl _a, %eax
294; X32-NOF16C-NEXT:    movl %eax, (%esp)
295; X32-NOF16C-NEXT:    calll ___extendhfsf2
296; X32-NOF16C-NEXT:    fstps {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
297; X32-NOF16C-NEXT:    wait
298; X32-NOF16C-NEXT:    movzwl _b, %eax
299; X32-NOF16C-NEXT:    movl %eax, (%esp)
300; X32-NOF16C-NEXT:    calll ___extendhfsf2
301; X32-NOF16C-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Reload
302; X32-NOF16C-NEXT:    faddp %st, %st(1)
303; X32-NOF16C-NEXT:    fstps (%esp)
304; X32-NOF16C-NEXT:    wait
305; X32-NOF16C-NEXT:    calll ___truncsfhf2
306; X32-NOF16C-NEXT:    movw %ax, _c
307; X32-NOF16C-NEXT:    addl $12, %esp
308; X32-NOF16C-NEXT:    retl
309;
310; X32-F16C-LABEL: add:
311; X32-F16C:       ## %bb.0:
312; X32-F16C-NEXT:    subl $12, %esp
313; X32-F16C-NEXT:    .cfi_def_cfa_offset 16
314; X32-F16C-NEXT:    movzwl _a, %eax
315; X32-F16C-NEXT:    movl %eax, (%esp)
316; X32-F16C-NEXT:    calll ___extendhfsf2
317; X32-F16C-NEXT:    fstps {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
318; X32-F16C-NEXT:    wait
319; X32-F16C-NEXT:    movzwl _b, %eax
320; X32-F16C-NEXT:    movl %eax, (%esp)
321; X32-F16C-NEXT:    calll ___extendhfsf2
322; X32-F16C-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Reload
323; X32-F16C-NEXT:    faddp %st, %st(1)
324; X32-F16C-NEXT:    fstps (%esp)
325; X32-F16C-NEXT:    wait
326; X32-F16C-NEXT:    calll ___truncsfhf2
327; X32-F16C-NEXT:    movw %ax, _c
328; X32-F16C-NEXT:    addl $12, %esp
329; X32-F16C-NEXT:    retl
330;
331; X64-NOF16C-LABEL: add:
332; X64-NOF16C:       ## %bb.0:
333; X64-NOF16C-NEXT:    pushq %rax
334; X64-NOF16C-NEXT:    .cfi_def_cfa_offset 16
335; X64-NOF16C-NEXT:    movzwl _a(%rip), %edi
336; X64-NOF16C-NEXT:    callq ___extendhfsf2
337; X64-NOF16C-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
338; X64-NOF16C-NEXT:    movzwl _b(%rip), %edi
339; X64-NOF16C-NEXT:    callq ___extendhfsf2
340; X64-NOF16C-NEXT:    addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 4-byte Folded Reload
341; X64-NOF16C-NEXT:    callq ___truncsfhf2
342; X64-NOF16C-NEXT:    movw %ax, _c(%rip)
343; X64-NOF16C-NEXT:    popq %rax
344; X64-NOF16C-NEXT:    retq
345;
346; X64-F16C-LABEL: add:
347; X64-F16C:       ## %bb.0:
348; X64-F16C-NEXT:    movzwl _a(%rip), %eax
349; X64-F16C-NEXT:    vmovd %eax, %xmm0
350; X64-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
351; X64-F16C-NEXT:    movzwl _b(%rip), %eax
352; X64-F16C-NEXT:    vmovd %eax, %xmm1
353; X64-F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
354; X64-F16C-NEXT:    vaddss %xmm1, %xmm0, %xmm0
355; X64-F16C-NEXT:    vxorps %xmm1, %xmm1, %xmm1
356; X64-F16C-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
357; X64-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
358; X64-F16C-NEXT:    vpextrw $0, %xmm0, _c(%rip)
359; X64-F16C-NEXT:    retq
360  %1 = load half, half* @a, align 2
361  %2 = tail call float @llvm.experimental.constrained.fpext.f32.f16(half %1, metadata !"fpexcept.strict") #0
362  %3 = load half, half* @b, align 2
363  %4 = tail call float @llvm.experimental.constrained.fpext.f32.f16(half %3, metadata !"fpexcept.strict") #0
364  %5 = tail call float @llvm.experimental.constrained.fadd.f32(float %2, float %4, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
365  %6 = tail call half @llvm.experimental.constrained.fptrunc.f16.f32(float %5, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
366  store half %6, half* @c, align 2
367  ret void
368}
369
370declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata)
371declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata)
372declare x86_fp80 @llvm.experimental.constrained.fpext.f80.f16(half, metadata)
373declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
374declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata)
375declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata)
376declare half @llvm.experimental.constrained.fptrunc.f16.f80(x86_fp80, metadata, metadata)
377
378attributes #0 = { strictfp }
379
380