1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 -O3 | FileCheck %s --check-prefix=SSE41-X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 -O3 | FileCheck %s --check-prefix=SSE41-X64
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefix=AVX-X86
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefix=AVX-X64
6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefix=AVX-X86
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefix=AVX-X64
8
9declare float @llvm.experimental.constrained.ceil.f32(float, metadata)
10declare double @llvm.experimental.constrained.ceil.f64(double, metadata)
11declare float @llvm.experimental.constrained.floor.f32(float, metadata)
12declare double @llvm.experimental.constrained.floor.f64(double, metadata)
13declare float @llvm.experimental.constrained.trunc.f32(float, metadata)
14declare double @llvm.experimental.constrained.trunc.f64(double, metadata)
15declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata)
16declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
17declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata)
18declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
19declare float @llvm.experimental.constrained.round.f32(float, metadata)
20declare double @llvm.experimental.constrained.round.f64(double, metadata)
21declare float @llvm.experimental.constrained.roundeven.f32(float, metadata)
22declare double @llvm.experimental.constrained.roundeven.f64(double, metadata)
23
24define float @fceil32(float %f) #0 {
25; SSE41-X86-LABEL: fceil32:
26; SSE41-X86:       # %bb.0:
27; SSE41-X86-NEXT:    pushl %eax
28; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8
29; SSE41-X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
30; SSE41-X86-NEXT:    roundss $10, %xmm0, %xmm0
31; SSE41-X86-NEXT:    movss %xmm0, (%esp)
32; SSE41-X86-NEXT:    flds (%esp)
33; SSE41-X86-NEXT:    wait
34; SSE41-X86-NEXT:    popl %eax
35; SSE41-X86-NEXT:    .cfi_def_cfa_offset 4
36; SSE41-X86-NEXT:    retl
37;
38; SSE41-X64-LABEL: fceil32:
39; SSE41-X64:       # %bb.0:
40; SSE41-X64-NEXT:    roundss $10, %xmm0, %xmm0
41; SSE41-X64-NEXT:    retq
42;
43; AVX-X86-LABEL: fceil32:
44; AVX-X86:       # %bb.0:
45; AVX-X86-NEXT:    pushl %eax
46; AVX-X86-NEXT:    .cfi_def_cfa_offset 8
47; AVX-X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
48; AVX-X86-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0
49; AVX-X86-NEXT:    vmovss %xmm0, (%esp)
50; AVX-X86-NEXT:    flds (%esp)
51; AVX-X86-NEXT:    wait
52; AVX-X86-NEXT:    popl %eax
53; AVX-X86-NEXT:    .cfi_def_cfa_offset 4
54; AVX-X86-NEXT:    retl
55;
56; AVX-X64-LABEL: fceil32:
57; AVX-X64:       # %bb.0:
58; AVX-X64-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0
59; AVX-X64-NEXT:    retq
60  %res = call float @llvm.experimental.constrained.ceil.f32(
61                        float %f, metadata !"fpexcept.strict") #0
62  ret float %res
63}
64
65define double @fceilf64(double %f) #0 {
66; SSE41-X86-LABEL: fceilf64:
67; SSE41-X86:       # %bb.0:
68; SSE41-X86-NEXT:    pushl %ebp
69; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8
70; SSE41-X86-NEXT:    .cfi_offset %ebp, -8
71; SSE41-X86-NEXT:    movl %esp, %ebp
72; SSE41-X86-NEXT:    .cfi_def_cfa_register %ebp
73; SSE41-X86-NEXT:    andl $-8, %esp
74; SSE41-X86-NEXT:    subl $8, %esp
75; SSE41-X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
76; SSE41-X86-NEXT:    roundsd $10, %xmm0, %xmm0
77; SSE41-X86-NEXT:    movsd %xmm0, (%esp)
78; SSE41-X86-NEXT:    fldl (%esp)
79; SSE41-X86-NEXT:    wait
80; SSE41-X86-NEXT:    movl %ebp, %esp
81; SSE41-X86-NEXT:    popl %ebp
82; SSE41-X86-NEXT:    .cfi_def_cfa %esp, 4
83; SSE41-X86-NEXT:    retl
84;
85; SSE41-X64-LABEL: fceilf64:
86; SSE41-X64:       # %bb.0:
87; SSE41-X64-NEXT:    roundsd $10, %xmm0, %xmm0
88; SSE41-X64-NEXT:    retq
89;
90; AVX-X86-LABEL: fceilf64:
91; AVX-X86:       # %bb.0:
92; AVX-X86-NEXT:    pushl %ebp
93; AVX-X86-NEXT:    .cfi_def_cfa_offset 8
94; AVX-X86-NEXT:    .cfi_offset %ebp, -8
95; AVX-X86-NEXT:    movl %esp, %ebp
96; AVX-X86-NEXT:    .cfi_def_cfa_register %ebp
97; AVX-X86-NEXT:    andl $-8, %esp
98; AVX-X86-NEXT:    subl $8, %esp
99; AVX-X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
100; AVX-X86-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm0
101; AVX-X86-NEXT:    vmovsd %xmm0, (%esp)
102; AVX-X86-NEXT:    fldl (%esp)
103; AVX-X86-NEXT:    wait
104; AVX-X86-NEXT:    movl %ebp, %esp
105; AVX-X86-NEXT:    popl %ebp
106; AVX-X86-NEXT:    .cfi_def_cfa %esp, 4
107; AVX-X86-NEXT:    retl
108;
109; AVX-X64-LABEL: fceilf64:
110; AVX-X64:       # %bb.0:
111; AVX-X64-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm0
112; AVX-X64-NEXT:    retq
113  %res = call double @llvm.experimental.constrained.ceil.f64(
114                        double %f, metadata !"fpexcept.strict") #0
115  ret double %res
116}
117
118define float @ffloor32(float %f) #0 {
119; SSE41-X86-LABEL: ffloor32:
120; SSE41-X86:       # %bb.0:
121; SSE41-X86-NEXT:    pushl %eax
122; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8
123; SSE41-X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
124; SSE41-X86-NEXT:    roundss $9, %xmm0, %xmm0
125; SSE41-X86-NEXT:    movss %xmm0, (%esp)
126; SSE41-X86-NEXT:    flds (%esp)
127; SSE41-X86-NEXT:    wait
128; SSE41-X86-NEXT:    popl %eax
129; SSE41-X86-NEXT:    .cfi_def_cfa_offset 4
130; SSE41-X86-NEXT:    retl
131;
132; SSE41-X64-LABEL: ffloor32:
133; SSE41-X64:       # %bb.0:
134; SSE41-X64-NEXT:    roundss $9, %xmm0, %xmm0
135; SSE41-X64-NEXT:    retq
136;
137; AVX-X86-LABEL: ffloor32:
138; AVX-X86:       # %bb.0:
139; AVX-X86-NEXT:    pushl %eax
140; AVX-X86-NEXT:    .cfi_def_cfa_offset 8
141; AVX-X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
142; AVX-X86-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0
143; AVX-X86-NEXT:    vmovss %xmm0, (%esp)
144; AVX-X86-NEXT:    flds (%esp)
145; AVX-X86-NEXT:    wait
146; AVX-X86-NEXT:    popl %eax
147; AVX-X86-NEXT:    .cfi_def_cfa_offset 4
148; AVX-X86-NEXT:    retl
149;
150; AVX-X64-LABEL: ffloor32:
151; AVX-X64:       # %bb.0:
152; AVX-X64-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0
153; AVX-X64-NEXT:    retq
154  %res = call float @llvm.experimental.constrained.floor.f32(
155                        float %f, metadata !"fpexcept.strict") #0
156  ret float %res
157}
158
159define double @ffloorf64(double %f) #0 {
160; SSE41-X86-LABEL: ffloorf64:
161; SSE41-X86:       # %bb.0:
162; SSE41-X86-NEXT:    pushl %ebp
163; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8
164; SSE41-X86-NEXT:    .cfi_offset %ebp, -8
165; SSE41-X86-NEXT:    movl %esp, %ebp
166; SSE41-X86-NEXT:    .cfi_def_cfa_register %ebp
167; SSE41-X86-NEXT:    andl $-8, %esp
168; SSE41-X86-NEXT:    subl $8, %esp
169; SSE41-X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
170; SSE41-X86-NEXT:    roundsd $9, %xmm0, %xmm0
171; SSE41-X86-NEXT:    movsd %xmm0, (%esp)
172; SSE41-X86-NEXT:    fldl (%esp)
173; SSE41-X86-NEXT:    wait
174; SSE41-X86-NEXT:    movl %ebp, %esp
175; SSE41-X86-NEXT:    popl %ebp
176; SSE41-X86-NEXT:    .cfi_def_cfa %esp, 4
177; SSE41-X86-NEXT:    retl
178;
179; SSE41-X64-LABEL: ffloorf64:
180; SSE41-X64:       # %bb.0:
181; SSE41-X64-NEXT:    roundsd $9, %xmm0, %xmm0
182; SSE41-X64-NEXT:    retq
183;
184; AVX-X86-LABEL: ffloorf64:
185; AVX-X86:       # %bb.0:
186; AVX-X86-NEXT:    pushl %ebp
187; AVX-X86-NEXT:    .cfi_def_cfa_offset 8
188; AVX-X86-NEXT:    .cfi_offset %ebp, -8
189; AVX-X86-NEXT:    movl %esp, %ebp
190; AVX-X86-NEXT:    .cfi_def_cfa_register %ebp
191; AVX-X86-NEXT:    andl $-8, %esp
192; AVX-X86-NEXT:    subl $8, %esp
193; AVX-X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
194; AVX-X86-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm0
195; AVX-X86-NEXT:    vmovsd %xmm0, (%esp)
196; AVX-X86-NEXT:    fldl (%esp)
197; AVX-X86-NEXT:    wait
198; AVX-X86-NEXT:    movl %ebp, %esp
199; AVX-X86-NEXT:    popl %ebp
200; AVX-X86-NEXT:    .cfi_def_cfa %esp, 4
201; AVX-X86-NEXT:    retl
202;
203; AVX-X64-LABEL: ffloorf64:
204; AVX-X64:       # %bb.0:
205; AVX-X64-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm0
206; AVX-X64-NEXT:    retq
207  %res = call double @llvm.experimental.constrained.floor.f64(
208                        double %f, metadata !"fpexcept.strict") #0
209  ret double %res
210}
211
212define float @ftrunc32(float %f) #0 {
213; SSE41-X86-LABEL: ftrunc32:
214; SSE41-X86:       # %bb.0:
215; SSE41-X86-NEXT:    pushl %eax
216; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8
217; SSE41-X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
218; SSE41-X86-NEXT:    roundss $11, %xmm0, %xmm0
219; SSE41-X86-NEXT:    movss %xmm0, (%esp)
220; SSE41-X86-NEXT:    flds (%esp)
221; SSE41-X86-NEXT:    wait
222; SSE41-X86-NEXT:    popl %eax
223; SSE41-X86-NEXT:    .cfi_def_cfa_offset 4
224; SSE41-X86-NEXT:    retl
225;
226; SSE41-X64-LABEL: ftrunc32:
227; SSE41-X64:       # %bb.0:
228; SSE41-X64-NEXT:    roundss $11, %xmm0, %xmm0
229; SSE41-X64-NEXT:    retq
230;
231; AVX-X86-LABEL: ftrunc32:
232; AVX-X86:       # %bb.0:
233; AVX-X86-NEXT:    pushl %eax
234; AVX-X86-NEXT:    .cfi_def_cfa_offset 8
235; AVX-X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
236; AVX-X86-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
237; AVX-X86-NEXT:    vmovss %xmm0, (%esp)
238; AVX-X86-NEXT:    flds (%esp)
239; AVX-X86-NEXT:    wait
240; AVX-X86-NEXT:    popl %eax
241; AVX-X86-NEXT:    .cfi_def_cfa_offset 4
242; AVX-X86-NEXT:    retl
243;
244; AVX-X64-LABEL: ftrunc32:
245; AVX-X64:       # %bb.0:
246; AVX-X64-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
247; AVX-X64-NEXT:    retq
248  %res = call float @llvm.experimental.constrained.trunc.f32(
249                        float %f, metadata !"fpexcept.strict") #0
250  ret float %res
251}
252
253define double @ftruncf64(double %f) #0 {
254; SSE41-X86-LABEL: ftruncf64:
255; SSE41-X86:       # %bb.0:
256; SSE41-X86-NEXT:    pushl %ebp
257; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8
258; SSE41-X86-NEXT:    .cfi_offset %ebp, -8
259; SSE41-X86-NEXT:    movl %esp, %ebp
260; SSE41-X86-NEXT:    .cfi_def_cfa_register %ebp
261; SSE41-X86-NEXT:    andl $-8, %esp
262; SSE41-X86-NEXT:    subl $8, %esp
263; SSE41-X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
264; SSE41-X86-NEXT:    roundsd $11, %xmm0, %xmm0
265; SSE41-X86-NEXT:    movsd %xmm0, (%esp)
266; SSE41-X86-NEXT:    fldl (%esp)
267; SSE41-X86-NEXT:    wait
268; SSE41-X86-NEXT:    movl %ebp, %esp
269; SSE41-X86-NEXT:    popl %ebp
270; SSE41-X86-NEXT:    .cfi_def_cfa %esp, 4
271; SSE41-X86-NEXT:    retl
272;
273; SSE41-X64-LABEL: ftruncf64:
274; SSE41-X64:       # %bb.0:
275; SSE41-X64-NEXT:    roundsd $11, %xmm0, %xmm0
276; SSE41-X64-NEXT:    retq
277;
278; AVX-X86-LABEL: ftruncf64:
279; AVX-X86:       # %bb.0:
280; AVX-X86-NEXT:    pushl %ebp
281; AVX-X86-NEXT:    .cfi_def_cfa_offset 8
282; AVX-X86-NEXT:    .cfi_offset %ebp, -8
283; AVX-X86-NEXT:    movl %esp, %ebp
284; AVX-X86-NEXT:    .cfi_def_cfa_register %ebp
285; AVX-X86-NEXT:    andl $-8, %esp
286; AVX-X86-NEXT:    subl $8, %esp
287; AVX-X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
288; AVX-X86-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
289; AVX-X86-NEXT:    vmovsd %xmm0, (%esp)
290; AVX-X86-NEXT:    fldl (%esp)
291; AVX-X86-NEXT:    wait
292; AVX-X86-NEXT:    movl %ebp, %esp
293; AVX-X86-NEXT:    popl %ebp
294; AVX-X86-NEXT:    .cfi_def_cfa %esp, 4
295; AVX-X86-NEXT:    retl
296;
297; AVX-X64-LABEL: ftruncf64:
298; AVX-X64:       # %bb.0:
299; AVX-X64-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
300; AVX-X64-NEXT:    retq
301  %res = call double @llvm.experimental.constrained.trunc.f64(
302                        double %f, metadata !"fpexcept.strict") #0
303  ret double %res
304}
305
306define float @frint32(float %f) #0 {
307; SSE41-X86-LABEL: frint32:
308; SSE41-X86:       # %bb.0:
309; SSE41-X86-NEXT:    pushl %eax
310; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8
311; SSE41-X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
312; SSE41-X86-NEXT:    roundss $4, %xmm0, %xmm0
313; SSE41-X86-NEXT:    movss %xmm0, (%esp)
314; SSE41-X86-NEXT:    flds (%esp)
315; SSE41-X86-NEXT:    wait
316; SSE41-X86-NEXT:    popl %eax
317; SSE41-X86-NEXT:    .cfi_def_cfa_offset 4
318; SSE41-X86-NEXT:    retl
319;
320; SSE41-X64-LABEL: frint32:
321; SSE41-X64:       # %bb.0:
322; SSE41-X64-NEXT:    roundss $4, %xmm0, %xmm0
323; SSE41-X64-NEXT:    retq
324;
325; AVX-X86-LABEL: frint32:
326; AVX-X86:       # %bb.0:
327; AVX-X86-NEXT:    pushl %eax
328; AVX-X86-NEXT:    .cfi_def_cfa_offset 8
329; AVX-X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
330; AVX-X86-NEXT:    vroundss $4, %xmm0, %xmm0, %xmm0
331; AVX-X86-NEXT:    vmovss %xmm0, (%esp)
332; AVX-X86-NEXT:    flds (%esp)
333; AVX-X86-NEXT:    wait
334; AVX-X86-NEXT:    popl %eax
335; AVX-X86-NEXT:    .cfi_def_cfa_offset 4
336; AVX-X86-NEXT:    retl
337;
338; AVX-X64-LABEL: frint32:
339; AVX-X64:       # %bb.0:
340; AVX-X64-NEXT:    vroundss $4, %xmm0, %xmm0, %xmm0
341; AVX-X64-NEXT:    retq
342  %res = call float @llvm.experimental.constrained.rint.f32(
343                        float %f,
344                        metadata !"round.dynamic", metadata !"fpexcept.strict") #0
345  ret float %res
346}
347
348define double @frintf64(double %f) #0 {
349; SSE41-X86-LABEL: frintf64:
350; SSE41-X86:       # %bb.0:
351; SSE41-X86-NEXT:    pushl %ebp
352; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8
353; SSE41-X86-NEXT:    .cfi_offset %ebp, -8
354; SSE41-X86-NEXT:    movl %esp, %ebp
355; SSE41-X86-NEXT:    .cfi_def_cfa_register %ebp
356; SSE41-X86-NEXT:    andl $-8, %esp
357; SSE41-X86-NEXT:    subl $8, %esp
358; SSE41-X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
359; SSE41-X86-NEXT:    roundsd $4, %xmm0, %xmm0
360; SSE41-X86-NEXT:    movsd %xmm0, (%esp)
361; SSE41-X86-NEXT:    fldl (%esp)
362; SSE41-X86-NEXT:    wait
363; SSE41-X86-NEXT:    movl %ebp, %esp
364; SSE41-X86-NEXT:    popl %ebp
365; SSE41-X86-NEXT:    .cfi_def_cfa %esp, 4
366; SSE41-X86-NEXT:    retl
367;
368; SSE41-X64-LABEL: frintf64:
369; SSE41-X64:       # %bb.0:
370; SSE41-X64-NEXT:    roundsd $4, %xmm0, %xmm0
371; SSE41-X64-NEXT:    retq
372;
373; AVX-X86-LABEL: frintf64:
374; AVX-X86:       # %bb.0:
375; AVX-X86-NEXT:    pushl %ebp
376; AVX-X86-NEXT:    .cfi_def_cfa_offset 8
377; AVX-X86-NEXT:    .cfi_offset %ebp, -8
378; AVX-X86-NEXT:    movl %esp, %ebp
379; AVX-X86-NEXT:    .cfi_def_cfa_register %ebp
380; AVX-X86-NEXT:    andl $-8, %esp
381; AVX-X86-NEXT:    subl $8, %esp
382; AVX-X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
383; AVX-X86-NEXT:    vroundsd $4, %xmm0, %xmm0, %xmm0
384; AVX-X86-NEXT:    vmovsd %xmm0, (%esp)
385; AVX-X86-NEXT:    fldl (%esp)
386; AVX-X86-NEXT:    wait
387; AVX-X86-NEXT:    movl %ebp, %esp
388; AVX-X86-NEXT:    popl %ebp
389; AVX-X86-NEXT:    .cfi_def_cfa %esp, 4
390; AVX-X86-NEXT:    retl
391;
392; AVX-X64-LABEL: frintf64:
393; AVX-X64:       # %bb.0:
394; AVX-X64-NEXT:    vroundsd $4, %xmm0, %xmm0, %xmm0
395; AVX-X64-NEXT:    retq
396  %res = call double @llvm.experimental.constrained.rint.f64(
397                        double %f,
398                        metadata !"round.dynamic", metadata !"fpexcept.strict") #0
399  ret double %res
400}
401
402define float @fnearbyint32(float %f) #0 {
403; SSE41-X86-LABEL: fnearbyint32:
404; SSE41-X86:       # %bb.0:
405; SSE41-X86-NEXT:    pushl %eax
406; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8
407; SSE41-X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
408; SSE41-X86-NEXT:    roundss $12, %xmm0, %xmm0
409; SSE41-X86-NEXT:    movss %xmm0, (%esp)
410; SSE41-X86-NEXT:    flds (%esp)
411; SSE41-X86-NEXT:    wait
412; SSE41-X86-NEXT:    popl %eax
413; SSE41-X86-NEXT:    .cfi_def_cfa_offset 4
414; SSE41-X86-NEXT:    retl
415;
416; SSE41-X64-LABEL: fnearbyint32:
417; SSE41-X64:       # %bb.0:
418; SSE41-X64-NEXT:    roundss $12, %xmm0, %xmm0
419; SSE41-X64-NEXT:    retq
420;
421; AVX-X86-LABEL: fnearbyint32:
422; AVX-X86:       # %bb.0:
423; AVX-X86-NEXT:    pushl %eax
424; AVX-X86-NEXT:    .cfi_def_cfa_offset 8
425; AVX-X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
426; AVX-X86-NEXT:    vroundss $12, %xmm0, %xmm0, %xmm0
427; AVX-X86-NEXT:    vmovss %xmm0, (%esp)
428; AVX-X86-NEXT:    flds (%esp)
429; AVX-X86-NEXT:    wait
430; AVX-X86-NEXT:    popl %eax
431; AVX-X86-NEXT:    .cfi_def_cfa_offset 4
432; AVX-X86-NEXT:    retl
433;
434; AVX-X64-LABEL: fnearbyint32:
435; AVX-X64:       # %bb.0:
436; AVX-X64-NEXT:    vroundss $12, %xmm0, %xmm0, %xmm0
437; AVX-X64-NEXT:    retq
438  %res = call float @llvm.experimental.constrained.nearbyint.f32(
439                        float %f,
440                        metadata !"round.dynamic", metadata !"fpexcept.strict") #0
441  ret float %res
442}
443
444define double @fnearbyintf64(double %f) #0 {
445; SSE41-X86-LABEL: fnearbyintf64:
446; SSE41-X86:       # %bb.0:
447; SSE41-X86-NEXT:    pushl %ebp
448; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8
449; SSE41-X86-NEXT:    .cfi_offset %ebp, -8
450; SSE41-X86-NEXT:    movl %esp, %ebp
451; SSE41-X86-NEXT:    .cfi_def_cfa_register %ebp
452; SSE41-X86-NEXT:    andl $-8, %esp
453; SSE41-X86-NEXT:    subl $8, %esp
454; SSE41-X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
455; SSE41-X86-NEXT:    roundsd $12, %xmm0, %xmm0
456; SSE41-X86-NEXT:    movsd %xmm0, (%esp)
457; SSE41-X86-NEXT:    fldl (%esp)
458; SSE41-X86-NEXT:    wait
459; SSE41-X86-NEXT:    movl %ebp, %esp
460; SSE41-X86-NEXT:    popl %ebp
461; SSE41-X86-NEXT:    .cfi_def_cfa %esp, 4
462; SSE41-X86-NEXT:    retl
463;
464; SSE41-X64-LABEL: fnearbyintf64:
465; SSE41-X64:       # %bb.0:
466; SSE41-X64-NEXT:    roundsd $12, %xmm0, %xmm0
467; SSE41-X64-NEXT:    retq
468;
469; AVX-X86-LABEL: fnearbyintf64:
470; AVX-X86:       # %bb.0:
471; AVX-X86-NEXT:    pushl %ebp
472; AVX-X86-NEXT:    .cfi_def_cfa_offset 8
473; AVX-X86-NEXT:    .cfi_offset %ebp, -8
474; AVX-X86-NEXT:    movl %esp, %ebp
475; AVX-X86-NEXT:    .cfi_def_cfa_register %ebp
476; AVX-X86-NEXT:    andl $-8, %esp
477; AVX-X86-NEXT:    subl $8, %esp
478; AVX-X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
479; AVX-X86-NEXT:    vroundsd $12, %xmm0, %xmm0, %xmm0
480; AVX-X86-NEXT:    vmovsd %xmm0, (%esp)
481; AVX-X86-NEXT:    fldl (%esp)
482; AVX-X86-NEXT:    wait
483; AVX-X86-NEXT:    movl %ebp, %esp
484; AVX-X86-NEXT:    popl %ebp
485; AVX-X86-NEXT:    .cfi_def_cfa %esp, 4
486; AVX-X86-NEXT:    retl
487;
488; AVX-X64-LABEL: fnearbyintf64:
489; AVX-X64:       # %bb.0:
490; AVX-X64-NEXT:    vroundsd $12, %xmm0, %xmm0, %xmm0
491; AVX-X64-NEXT:    retq
492  %res = call double @llvm.experimental.constrained.nearbyint.f64(
493                        double %f,
494                        metadata !"round.dynamic", metadata !"fpexcept.strict") #0
495  ret double %res
496}
497
498define float @fround32(float %f) #0 {
499; SSE41-X86-LABEL: fround32:
500; SSE41-X86:       # %bb.0:
501; SSE41-X86-NEXT:    pushl %eax
502; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8
503; SSE41-X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
504; SSE41-X86-NEXT:    movss %xmm0, (%esp)
505; SSE41-X86-NEXT:    calll roundf
506; SSE41-X86-NEXT:    popl %eax
507; SSE41-X86-NEXT:    .cfi_def_cfa_offset 4
508; SSE41-X86-NEXT:    retl
509;
510; SSE41-X64-LABEL: fround32:
511; SSE41-X64:       # %bb.0:
512; SSE41-X64-NEXT:    pushq %rax
513; SSE41-X64-NEXT:    .cfi_def_cfa_offset 16
514; SSE41-X64-NEXT:    callq roundf
515; SSE41-X64-NEXT:    popq %rax
516; SSE41-X64-NEXT:    .cfi_def_cfa_offset 8
517; SSE41-X64-NEXT:    retq
518;
519; AVX-X86-LABEL: fround32:
520; AVX-X86:       # %bb.0:
521; AVX-X86-NEXT:    pushl %eax
522; AVX-X86-NEXT:    .cfi_def_cfa_offset 8
523; AVX-X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
524; AVX-X86-NEXT:    vmovss %xmm0, (%esp)
525; AVX-X86-NEXT:    calll roundf
526; AVX-X86-NEXT:    popl %eax
527; AVX-X86-NEXT:    .cfi_def_cfa_offset 4
528; AVX-X86-NEXT:    retl
529;
530; AVX-X64-LABEL: fround32:
531; AVX-X64:       # %bb.0:
532; AVX-X64-NEXT:    pushq %rax
533; AVX-X64-NEXT:    .cfi_def_cfa_offset 16
534; AVX-X64-NEXT:    callq roundf
535; AVX-X64-NEXT:    popq %rax
536; AVX-X64-NEXT:    .cfi_def_cfa_offset 8
537; AVX-X64-NEXT:    retq
538  %res = call float @llvm.experimental.constrained.round.f32(
539                        float %f, metadata !"fpexcept.strict") #0
540  ret float %res
541}
542
543define double @froundf64(double %f) #0 {
544; SSE41-X86-LABEL: froundf64:
545; SSE41-X86:       # %bb.0:
546; SSE41-X86-NEXT:    subl $8, %esp
547; SSE41-X86-NEXT:    .cfi_def_cfa_offset 12
548; SSE41-X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
549; SSE41-X86-NEXT:    movsd %xmm0, (%esp)
550; SSE41-X86-NEXT:    calll round
551; SSE41-X86-NEXT:    addl $8, %esp
552; SSE41-X86-NEXT:    .cfi_def_cfa_offset 4
553; SSE41-X86-NEXT:    retl
554;
555; SSE41-X64-LABEL: froundf64:
556; SSE41-X64:       # %bb.0:
557; SSE41-X64-NEXT:    pushq %rax
558; SSE41-X64-NEXT:    .cfi_def_cfa_offset 16
559; SSE41-X64-NEXT:    callq round
560; SSE41-X64-NEXT:    popq %rax
561; SSE41-X64-NEXT:    .cfi_def_cfa_offset 8
562; SSE41-X64-NEXT:    retq
563;
564; AVX-X86-LABEL: froundf64:
565; AVX-X86:       # %bb.0:
566; AVX-X86-NEXT:    subl $8, %esp
567; AVX-X86-NEXT:    .cfi_def_cfa_offset 12
568; AVX-X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
569; AVX-X86-NEXT:    vmovsd %xmm0, (%esp)
570; AVX-X86-NEXT:    calll round
571; AVX-X86-NEXT:    addl $8, %esp
572; AVX-X86-NEXT:    .cfi_def_cfa_offset 4
573; AVX-X86-NEXT:    retl
574;
575; AVX-X64-LABEL: froundf64:
576; AVX-X64:       # %bb.0:
577; AVX-X64-NEXT:    pushq %rax
578; AVX-X64-NEXT:    .cfi_def_cfa_offset 16
579; AVX-X64-NEXT:    callq round
580; AVX-X64-NEXT:    popq %rax
581; AVX-X64-NEXT:    .cfi_def_cfa_offset 8
582; AVX-X64-NEXT:    retq
583  %res = call double @llvm.experimental.constrained.round.f64(
584                        double %f, metadata !"fpexcept.strict") #0
585  ret double %res
586}
587
588define float @froundeven32(float %f) #0 {
589; SSE41-X86-LABEL: froundeven32:
590; SSE41-X86:       # %bb.0:
591; SSE41-X86-NEXT:    pushl %eax
592; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8
593; SSE41-X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
594; SSE41-X86-NEXT:    roundss $8, %xmm0, %xmm0
595; SSE41-X86-NEXT:    movss %xmm0, (%esp)
596; SSE41-X86-NEXT:    flds (%esp)
597; SSE41-X86-NEXT:    wait
598; SSE41-X86-NEXT:    popl %eax
599; SSE41-X86-NEXT:    .cfi_def_cfa_offset 4
600; SSE41-X86-NEXT:    retl
601;
602; SSE41-X64-LABEL: froundeven32:
603; SSE41-X64:       # %bb.0:
604; SSE41-X64-NEXT:    roundss $8, %xmm0, %xmm0
605; SSE41-X64-NEXT:    retq
606;
607; AVX-X86-LABEL: froundeven32:
608; AVX-X86:       # %bb.0:
609; AVX-X86-NEXT:    pushl %eax
610; AVX-X86-NEXT:    .cfi_def_cfa_offset 8
611; AVX-X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
612; AVX-X86-NEXT:    vroundss $8, %xmm0, %xmm0, %xmm0
613; AVX-X86-NEXT:    vmovss %xmm0, (%esp)
614; AVX-X86-NEXT:    flds (%esp)
615; AVX-X86-NEXT:    wait
616; AVX-X86-NEXT:    popl %eax
617; AVX-X86-NEXT:    .cfi_def_cfa_offset 4
618; AVX-X86-NEXT:    retl
619;
620; AVX-X64-LABEL: froundeven32:
621; AVX-X64:       # %bb.0:
622; AVX-X64-NEXT:    vroundss $8, %xmm0, %xmm0, %xmm0
623; AVX-X64-NEXT:    retq
624  %res = call float @llvm.experimental.constrained.roundeven.f32(
625                        float %f, metadata !"fpexcept.strict") #0
626  ret float %res
627}
628
629define double @froundevenf64(double %f) #0 {
630; SSE41-X86-LABEL: froundevenf64:
631; SSE41-X86:       # %bb.0:
632; SSE41-X86-NEXT:    pushl %ebp
633; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8
634; SSE41-X86-NEXT:    .cfi_offset %ebp, -8
635; SSE41-X86-NEXT:    movl %esp, %ebp
636; SSE41-X86-NEXT:    .cfi_def_cfa_register %ebp
637; SSE41-X86-NEXT:    andl $-8, %esp
638; SSE41-X86-NEXT:    subl $8, %esp
639; SSE41-X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
640; SSE41-X86-NEXT:    roundsd $8, %xmm0, %xmm0
641; SSE41-X86-NEXT:    movsd %xmm0, (%esp)
642; SSE41-X86-NEXT:    fldl (%esp)
643; SSE41-X86-NEXT:    wait
644; SSE41-X86-NEXT:    movl %ebp, %esp
645; SSE41-X86-NEXT:    popl %ebp
646; SSE41-X86-NEXT:    .cfi_def_cfa %esp, 4
647; SSE41-X86-NEXT:    retl
648;
649; SSE41-X64-LABEL: froundevenf64:
650; SSE41-X64:       # %bb.0:
651; SSE41-X64-NEXT:    roundsd $8, %xmm0, %xmm0
652; SSE41-X64-NEXT:    retq
653;
654; AVX-X86-LABEL: froundevenf64:
655; AVX-X86:       # %bb.0:
656; AVX-X86-NEXT:    pushl %ebp
657; AVX-X86-NEXT:    .cfi_def_cfa_offset 8
658; AVX-X86-NEXT:    .cfi_offset %ebp, -8
659; AVX-X86-NEXT:    movl %esp, %ebp
660; AVX-X86-NEXT:    .cfi_def_cfa_register %ebp
661; AVX-X86-NEXT:    andl $-8, %esp
662; AVX-X86-NEXT:    subl $8, %esp
663; AVX-X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
664; AVX-X86-NEXT:    vroundsd $8, %xmm0, %xmm0, %xmm0
665; AVX-X86-NEXT:    vmovsd %xmm0, (%esp)
666; AVX-X86-NEXT:    fldl (%esp)
667; AVX-X86-NEXT:    wait
668; AVX-X86-NEXT:    movl %ebp, %esp
669; AVX-X86-NEXT:    popl %ebp
670; AVX-X86-NEXT:    .cfi_def_cfa %esp, 4
671; AVX-X86-NEXT:    retl
672;
673; AVX-X64-LABEL: froundevenf64:
674; AVX-X64:       # %bb.0:
675; AVX-X64-NEXT:    vroundsd $8, %xmm0, %xmm0, %xmm0
676; AVX-X64-NEXT:    retq
677  %res = call double @llvm.experimental.constrained.roundeven.f64(
678                        double %f, metadata !"fpexcept.strict") #0
679  ret double %res
680}
681
682attributes #0 = { strictfp }
683