1; RUN: llc < %s -mcpu=generic -mtriple=i386-apple-darwin -no-integrated-as | FileCheck %s
2
3; There should be no stack manipulations between the inline asm and ret.
4; CHECK: test1
5; CHECK: InlineAsm End
6; CHECK-NEXT: ret
7define x86_fp80 @test1() {
8        %tmp85 = call x86_fp80 asm sideeffect "fld0", "={st(0)}"()
9        ret x86_fp80 %tmp85
10}
11
12; CHECK: test2
13; CHECK: InlineAsm End
14; CHECK-NEXT: ret
15define double @test2() {
16        %tmp85 = call double asm sideeffect "fld0", "={st(0)}"()
17        ret double %tmp85
18}
19
20; Setting up argument in st(0) should be a single fld.
21; CHECK: test3
22; CHECK: fld
23; CHECK-NEXT: InlineAsm Start
24; Asm consumes stack, nothing should be popped.
25; CHECK: InlineAsm End
26; CHECK-NOT: fstp
27; CHECK: ret
28define void @test3(x86_fp80 %X) {
29        call void asm sideeffect "frob ", "{st(0)},~{st},~{dirflag},~{fpsr},~{flags}"( x86_fp80 %X)
30        ret void
31}
32
33; CHECK: test4
34; CHECK: fld
35; CHECK-NEXT: InlineAsm Start
36; CHECK: InlineAsm End
37; CHECK-NOT: fstp
38; CHECK: ret
39define void @test4(double %X) {
40        call void asm sideeffect "frob ", "{st(0)},~{st},~{dirflag},~{fpsr},~{flags}"( double %X)
41        ret void
42}
43
44; Same as test3/4, but using value from fadd.
45; The fadd can be done in xmm or x87 regs - we don't test that.
46; CHECK: test5
47; CHECK: InlineAsm End
48; CHECK-NOT: fstp
49; CHECK: ret
50define void @test5(double %X) {
51        %Y = fadd double %X, 123.0
52        call void asm sideeffect "frob ", "{st(0)},~{st},~{dirflag},~{fpsr},~{flags}"( double %Y)
53        ret void
54}
55
56; CHECK: test6
57define void @test6(double %A, double %B, double %C,
58                   double %D, double %E) nounwind  {
59entry:
60; Uses the same value twice, should have one fstp after the asm.
61; CHECK: foo
62; CHECK: InlineAsm End
63; CHECK-NEXT: fstp
64; CHECK-NOT: fstp
65	tail call void asm sideeffect "foo $0 $1", "f,f,~{dirflag},~{fpsr},~{flags}"( double %A, double %A ) nounwind
66; Uses two different values, should be in st(0)/st(1) and both be popped.
67; CHECK: bar
68; CHECK: InlineAsm End
69; CHECK-NEXT: fstp
70; CHECK-NEXT: fstp
71	tail call void asm sideeffect "bar $0 $1", "f,f,~{dirflag},~{fpsr},~{flags}"( double %B, double %C ) nounwind
72; Uses two different values, one of which isn't killed in this asm, it
73; should not be popped after the asm.
74; CHECK: baz
75; CHECK: InlineAsm End
76; CHECK-NEXT: fstp
77; CHECK-NOT: fstp
78	tail call void asm sideeffect "baz $0 $1", "f,f,~{dirflag},~{fpsr},~{flags}"( double %D, double %E ) nounwind
79; This is the last use of %D, so it should be popped after.
80; CHECK: baz
81; CHECK: InlineAsm End
82; CHECK-NEXT: fstp
83; CHECK-NOT: fstp
84; CHECK: ret
85	tail call void asm sideeffect "baz $0", "f,~{dirflag},~{fpsr},~{flags}"( double %D ) nounwind
86	ret void
87}
88
89; PR4185
90; Passing a non-killed value to asm in {st}.
91; Make sure it is duped before.
92; asm kills st(0), so we shouldn't pop anything
93; CHECK: testPR4185
94; CHECK: fld %st(0)
95; CHECK: fistpl
96; CHECK-NOT: fstp
97; CHECK: fistpl
98; CHECK-NOT: fstp
99; CHECK: ret
100; A valid alternative would be to remat the constant pool load before each
101; inline asm.
102define void @testPR4185() {
103return:
104	call void asm sideeffect "fistpl $0", "{st},~{st}"(double 1.000000e+06)
105	call void asm sideeffect "fistpl $0", "{st},~{st}"(double 1.000000e+06)
106	ret void
107}
108
109; Passing a non-killed value through asm in {st}.
110; Make sure it is not duped before.
111; Second asm kills st(0), so we shouldn't pop anything
112; CHECK: testPR4185b
113; CHECK-NOT: fld %st(0)
114; CHECK: fistl
115; CHECK-NOT: fstp
116; CHECK: fistpl
117; CHECK-NOT: fstp
118; CHECK: ret
119; A valid alternative would be to remat the constant pool load before each
120; inline asm.
121define void @testPR4185b() {
122return:
123	call void asm sideeffect "fistl $0", "{st}"(double 1.000000e+06)
124	call void asm sideeffect "fistpl $0", "{st},~{st}"(double 1.000000e+06)
125	ret void
126}
127
128; PR4459
129; The return value from ceil must be duped before being consumed by asm.
130; CHECK: testPR4459
131; CHECK: ceil
132; CHECK: fld %st(0)
133; CHECK-NOT: fxch
134; CHECK: fistpl
135; CHECK-NOT: fxch
136; CHECK: fstpt
137; CHECK: test
138define void @testPR4459(x86_fp80 %a) {
139entry:
140	%0 = call x86_fp80 @ceil(x86_fp80 %a)
141	call void asm sideeffect "fistpl $0", "{st},~{st}"( x86_fp80 %0)
142	call void @test3(x86_fp80 %0 )
143        ret void
144}
145declare x86_fp80 @ceil(x86_fp80)
146
147; PR4484
148; test1 leaves a value on the stack that is needed after the asm.
149; CHECK: testPR4484
150; CHECK: calll _test1
151; CHECK-NOT: fstp
152; Load %a from stack after ceil
153; CHECK: fldt
154; CHECK-NOT: fxch
155; CHECK: fistpl
156; CHECK-NOT: fstp
157; Set up call to test.
158; CHECK: fstpt
159; CHECK: test
160define void @testPR4484(x86_fp80 %a) {
161entry:
162	%0 = call x86_fp80 @test1()
163	call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %a)
164	call void @test3(x86_fp80 %0)
165	ret void
166}
167
168; PR4485
169; CHECK: testPR4485
170define void @testPR4485(x86_fp80* %a) {
171entry:
172	%0 = load x86_fp80* %a, align 16
173	%1 = fmul x86_fp80 %0, 0xK4006B400000000000000
174	%2 = fmul x86_fp80 %1, 0xK4012F424000000000000
175	tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %2)
176	%3 = load x86_fp80* %a, align 16
177	%4 = fmul x86_fp80 %3, 0xK4006B400000000000000
178	%5 = fmul x86_fp80 %4, 0xK4012F424000000000000
179	tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %5)
180	ret void
181}
182
183; An input argument in a fixed position is implicitly popped by the asm only if
184; the input argument is tied to an output register, or it is in the clobber list.
185; The clobber list case is tested above.
186;
187; This doesn't implicitly pop the stack:
188;
189;   void fist1(long double x, int *p) {
190;     asm volatile ("fistl %1" : : "t"(x), "m"(*p));
191;   }
192;
193; CHECK: fist1
194; CHECK: fldt
195; CHECK: fistl (%e
196; CHECK: fstp
197; CHECK: ret
198define void @fist1(x86_fp80 %x, i32* %p) nounwind ssp {
199entry:
200  tail call void asm sideeffect "fistl $1", "{st},*m,~{memory},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, i32* %p) nounwind
201  ret void
202}
203
204; Here, the input operand is tied to an output which means that is is
205; implicitly popped (and then the output is implicitly pushed).
206;
207;   long double fist2(long double x, int *p) {
208;     long double y;
209;     asm ("fistl %1" : "=&t"(y) : "0"(x), "m"(*p) : "memory");
210;     return y;
211;   }
212;
213; CHECK: fist2
214; CHECK: fldt
215; CHECK: fistl (%e
216; CHECK-NOT: fstp
217; CHECK: ret
218define x86_fp80 @fist2(x86_fp80 %x, i32* %p) nounwind ssp {
219entry:
220  %0 = tail call x86_fp80 asm "fistl $2", "=&{st},0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, i32* %p) nounwind
221  ret x86_fp80 %0
222}
223
224; An 'f' constraint is never implicitly popped:
225;
226;   void fucomp1(long double x, long double y) {
227;     asm volatile ("fucomp %1" : : "t"(x), "f"(y) : "st");
228;   }
229; CHECK: fucomp1
230; CHECK: fldt
231; CHECK: fldt
232; CHECK: fucomp %st
233; CHECK: fstp
234; CHECK-NOT: fstp
235; CHECK: ret
236define void @fucomp1(x86_fp80 %x, x86_fp80 %y) nounwind ssp {
237entry:
238  tail call void asm sideeffect "fucomp $1", "{st},f,~{st},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, x86_fp80 %y) nounwind
239  ret void
240}
241
242; The 'u' constraint is only popped implicitly when clobbered:
243;
244;   void fucomp2(long double x, long double y) {
245;     asm volatile ("fucomp %1" : : "t"(x), "u"(y) : "st");
246;   }
247;
248;   void fucomp3(long double x, long double y) {
249;     asm volatile ("fucompp %1" : : "t"(x), "u"(y) : "st", "st(1)");
250;   }
251;
252; CHECK: fucomp2
253; CHECK: fldt
254; CHECK: fldt
255; CHECK: fucomp %st(1)
256; CHECK: fstp
257; CHECK-NOT: fstp
258; CHECK: ret
259;
260; CHECK: fucomp3
261; CHECK: fldt
262; CHECK: fldt
263; CHECK: fucompp %st(1)
264; CHECK-NOT: fstp
265; CHECK: ret
266define void @fucomp2(x86_fp80 %x, x86_fp80 %y) nounwind ssp {
267entry:
268  tail call void asm sideeffect "fucomp $1", "{st},{st(1)},~{st},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, x86_fp80 %y) nounwind
269  ret void
270}
271define void @fucomp3(x86_fp80 %x, x86_fp80 %y) nounwind ssp {
272entry:
273  tail call void asm sideeffect "fucompp $1", "{st},{st(1)},~{st},~{st(1)},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, x86_fp80 %y) nounwind
274  ret void
275}
276
277; One input, two outputs, one dead output.
278%complex = type { float, float }
279; CHECK: sincos1
280; CHECK: flds
281; CHECK-NOT: fxch
282; CHECK: sincos
283; CHECK-NOT: fstp
284; CHECK: fstp %st(1)
285; CHECK-NOT: fstp
286; CHECK: ret
287define float @sincos1(float %x) nounwind ssp {
288entry:
289  %0 = tail call %complex asm "sincos", "={st},={st(1)},0,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind
290  %asmresult = extractvalue %complex %0, 0
291  ret float %asmresult
292}
293
294; Same thing, swapped output operands.
295; CHECK: sincos2
296; CHECK: flds
297; CHECK-NOT: fxch
298; CHECK: sincos
299; CHECK-NOT: fstp
300; CHECK: fstp %st(1)
301; CHECK-NOT: fstp
302; CHECK: ret
303define float @sincos2(float %x) nounwind ssp {
304entry:
305  %0 = tail call %complex asm "sincos", "={st(1)},={st},1,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind
306  %asmresult = extractvalue %complex %0, 1
307  ret float %asmresult
308}
309
310; Clobber st(0) after it was live-out/dead from the previous asm.
311; CHECK: sincos3
312; Load x, make a copy for the second asm.
313; CHECK: flds
314; CHECK: fld %st(0)
315; CHECK: sincos
316; Discard dead result in st(0), bring x to the top.
317; CHECK: fstp %st(0)
318; CHECK: fxch
319; x is now in st(0) for the second asm
320; CHECK: sincos
321; Discard both results.
322; CHECK: fstp
323; CHECK: fstp
324; CHECK: ret
325define float @sincos3(float %x) nounwind ssp {
326entry:
327  %0 = tail call %complex asm sideeffect "sincos", "={st(1)},={st},1,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind
328  %1 = tail call %complex asm sideeffect "sincos", "={st(1)},={st},1,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind
329  %asmresult = extractvalue %complex %0, 0
330  ret float %asmresult
331}
332
333; Pass the same value in two fixed stack slots.
334; CHECK: PR10602
335; CHECK: flds LCPI
336; CHECK: fld %st(0)
337; CHECK: fcomi %st(1), %st(0)
338define i32 @PR10602() nounwind ssp {
339entry:
340  %0 = tail call i32 asm "fcomi $2, $1; pushf; pop $0", "=r,{st},{st(1)},~{dirflag},~{fpsr},~{flags}"(double 2.000000e+00, double 2.000000e+00) nounwind
341  ret i32 %0
342}
343
344; <rdar://problem/16952634>
345; X87 stackifier asserted when there was an ST register defined by an
346; inline-asm instruction and the ST register was live across another
347; inline-asm instruction.
348;
349; INLINEASM <es:frndint> [sideeffect] [attdialect], $0:[regdef], %ST0<imp-def,tied5>, $1:[reguse tiedto:$0], %ST0<tied3>, $2:[clobber], %EFLAGS<earlyclobber,imp-def,dead>
350; INLINEASM <es:fldcw $0> [sideeffect] [mayload] [attdialect], $0:[mem], %EAX<undef>, 1, %noreg, 0, %noreg, $1:[clobber], %EFLAGS<earlyclobber,imp-def,dead>
351; %FP0<def> = COPY %ST0
352
353; CHECK-LABEL: _test_live_st
354; CHECK: ## InlineAsm Start
355; CHECK: frndint
356; CHECK: ## InlineAsm End
357; CHECK: ## InlineAsm Start
358; CHECK: fldcw
359; CHECK: ## InlineAsm End
360
361%struct.fpu_t = type { [8 x x86_fp80], x86_fp80, %struct.anon1, %struct.anon2, i32, i8, [15 x i8] }
362%struct.anon1 = type { i32, i32, i32 }
363%struct.anon2 = type { i32, i32, i32, i32 }
364
365@fpu = external global %struct.fpu_t, align 16
366
367; Function Attrs: ssp
368define void @test_live_st(i32 %a1) {
369entry:
370  %0 = load x86_fp80* undef, align 16
371  %cond = icmp eq i32 %a1, 1
372  br i1 %cond, label %sw.bb4.i, label %_Z5tointRKe.exit
373
374sw.bb4.i:
375  %1 = call x86_fp80 asm sideeffect "frndint", "={st},0,~{dirflag},~{fpsr},~{flags}"(x86_fp80 %0)
376  call void asm sideeffect "fldcw $0", "*m,~{dirflag},~{fpsr},~{flags}"(i32* undef)
377  br label %_Z5tointRKe.exit
378
379_Z5tointRKe.exit:
380  %result.0.i = phi x86_fp80 [ %1, %sw.bb4.i ], [ %0, %entry ]
381  %conv.i1814 = fptosi x86_fp80 %result.0.i to i32
382  %conv626 = sitofp i32 %conv.i1814 to x86_fp80
383  store x86_fp80 %conv626, x86_fp80* getelementptr inbounds (%struct.fpu_t* @fpu, i32 0, i32 1)
384  br label %return
385
386return:
387  ret void
388}
389
390; Check that x87 stackifier is correctly rewriting FP registers to ST registers.
391;
392; CHECK-LABEL: _test_operand_rewrite
393; CHECK: ## InlineAsm Start
394; CHECK: foo %st(0), %st(1)
395; CHECK: ## InlineAsm End
396
397define double @test_operand_rewrite() {
398entry:
399  %0 = tail call { double, double } asm sideeffect "foo $0, $1", "={st},={st(1)},~{dirflag},~{fpsr},~{flags}"()
400  %asmresult = extractvalue { double, double } %0, 0
401  %asmresult1 = extractvalue { double, double } %0, 1
402  %sub = fsub double %asmresult, %asmresult1
403  ret double %sub
404}
405