1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=i686-windows < %s | FileCheck %s
3
4declare void @addrof_i1(i1*)
5declare void @addrof_i32(i32*)
6declare void @addrof_i64(i64*)
7declare void @addrof_i128(i128*)
8declare void @addrof_i32_x3(i32*, i32*, i32*)
9
10define void @simple(i32 %x) {
11; CHECK-LABEL: simple:
12; CHECK:       # %bb.0: # %entry
13; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
14; CHECK-NEXT:    pushl %eax
15; CHECK-NEXT:    calll _addrof_i32
16; CHECK-NEXT:    addl $4, %esp
17; CHECK-NEXT:    retl
18entry:
19  %x.addr = alloca i32
20  store i32 %x, i32* %x.addr
21  call void @addrof_i32(i32* %x.addr)
22  ret void
23}
24
25; We need to load %x before calling addrof_i32 now because it could mutate %x in
26; place.
27
28define i32 @use_arg(i32 %x) {
29; CHECK-LABEL: use_arg:
30; CHECK:       # %bb.0: # %entry
31; CHECK-NEXT:    pushl %esi
32; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
33; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
34; CHECK-NEXT:    pushl %eax
35; CHECK-NEXT:    calll _addrof_i32
36; CHECK-NEXT:    addl $4, %esp
37; CHECK-NEXT:    movl %esi, %eax
38; CHECK-NEXT:    popl %esi
39; CHECK-NEXT:    retl
40entry:
41  %x.addr = alloca i32
42  store i32 %x, i32* %x.addr
43  call void @addrof_i32(i32* %x.addr)
44  ret i32 %x
45}
46
47; We won't copy elide for types needing legalization such as i64 or i1.
48
49define i64 @split_i64(i64 %x) {
50; CHECK-LABEL: split_i64:
51; CHECK:       # %bb.0: # %entry
52; CHECK-NEXT:    pushl %edi
53; CHECK-NEXT:    pushl %esi
54; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
55; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
56; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
57; CHECK-NEXT:    pushl %eax
58; CHECK-NEXT:    calll _addrof_i64
59; CHECK-NEXT:    addl $4, %esp
60; CHECK-NEXT:    movl %esi, %eax
61; CHECK-NEXT:    movl %edi, %edx
62; CHECK-NEXT:    popl %esi
63; CHECK-NEXT:    popl %edi
64; CHECK-NEXT:    retl
65entry:
66  %x.addr = alloca i64, align 4
67  store i64 %x, i64* %x.addr, align 4
68  call void @addrof_i64(i64* %x.addr)
69  ret i64 %x
70}
71
72define i1 @i1_arg(i1 %x) {
73; CHECK-LABEL: i1_arg:
74; CHECK:       # %bb.0:
75; CHECK-NEXT:    pushl %ebx
76; CHECK-NEXT:    pushl %eax
77; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %bl
78; CHECK-NEXT:    movl %ebx, %eax
79; CHECK-NEXT:    andb $1, %al
80; CHECK-NEXT:    movb %al, {{[0-9]+}}(%esp)
81; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
82; CHECK-NEXT:    pushl %eax
83; CHECK-NEXT:    calll _addrof_i1
84; CHECK-NEXT:    addl $4, %esp
85; CHECK-NEXT:    movl %ebx, %eax
86; CHECK-NEXT:    addl $4, %esp
87; CHECK-NEXT:    popl %ebx
88; CHECK-NEXT:    retl
89  %x.addr = alloca i1
90  store i1 %x, i1* %x.addr
91  call void @addrof_i1(i1* %x.addr)
92  ret i1 %x
93}
94
95; We can't copy elide when an i64 is split between registers and memory in a
96; fastcc function.
97
98define fastcc i64 @fastcc_split_i64(i64* %p, i64 %x) {
99; CHECK-LABEL: fastcc_split_i64:
100; CHECK:       # %bb.0: # %entry
101; CHECK-NEXT:    pushl %edi
102; CHECK-NEXT:    pushl %esi
103; CHECK-NEXT:    subl $8, %esp
104; CHECK-NEXT:    movl %edx, %esi
105; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
106; CHECK-NEXT:    movl %edi, {{[0-9]+}}(%esp)
107; CHECK-NEXT:    movl %edx, (%esp)
108; CHECK-NEXT:    movl %esp, %eax
109; CHECK-NEXT:    pushl %eax
110; CHECK-NEXT:    calll _addrof_i64
111; CHECK-NEXT:    addl $4, %esp
112; CHECK-NEXT:    movl %esi, %eax
113; CHECK-NEXT:    movl %edi, %edx
114; CHECK-NEXT:    addl $8, %esp
115; CHECK-NEXT:    popl %esi
116; CHECK-NEXT:    popl %edi
117; CHECK-NEXT:    retl
118entry:
119  %x.addr = alloca i64, align 4
120  store i64 %x, i64* %x.addr, align 4
121  call void @addrof_i64(i64* %x.addr)
122  ret i64 %x
123}
124
125; We can't copy elide when it would reduce the user requested alignment.
126
127define void @high_alignment(i32 %x) {
128; CHECK-LABEL: high_alignment:
129; CHECK:       # %bb.0: # %entry
130; CHECK-NEXT:    pushl %ebp
131; CHECK-NEXT:    movl %esp, %ebp
132; CHECK-NEXT:    andl $-128, %esp
133; CHECK-NEXT:    subl $128, %esp
134; CHECK-NEXT:    movl 8(%ebp), %eax
135; CHECK-NEXT:    movl %eax, (%esp)
136; CHECK-NEXT:    movl %esp, %eax
137; CHECK-NEXT:    pushl %eax
138; CHECK-NEXT:    calll _addrof_i32
139; CHECK-NEXT:    addl $4, %esp
140; CHECK-NEXT:    movl %ebp, %esp
141; CHECK-NEXT:    popl %ebp
142; CHECK-NEXT:    retl
143entry:
144  %x.p = alloca i32, align 128
145  store i32 %x, i32* %x.p
146  call void @addrof_i32(i32* %x.p)
147  ret void
148}
149
150; We can't copy elide when it would reduce the ABI required alignment.
151; FIXME: We should lower the ABI alignment of i64 on Windows, since MSVC
152; doesn't guarantee it.
153
154define void @abi_alignment(i64 %x) {
155; CHECK-LABEL: abi_alignment:
156; CHECK:       # %bb.0: # %entry
157; CHECK-NEXT:    pushl %ebp
158; CHECK-NEXT:    movl %esp, %ebp
159; CHECK-NEXT:    andl $-8, %esp
160; CHECK-NEXT:    subl $8, %esp
161; CHECK-NEXT:    movl 8(%ebp), %eax
162; CHECK-NEXT:    movl 12(%ebp), %ecx
163; CHECK-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
164; CHECK-NEXT:    movl %eax, (%esp)
165; CHECK-NEXT:    movl %esp, %eax
166; CHECK-NEXT:    pushl %eax
167; CHECK-NEXT:    calll _addrof_i64
168; CHECK-NEXT:    addl $4, %esp
169; CHECK-NEXT:    movl %ebp, %esp
170; CHECK-NEXT:    popl %ebp
171; CHECK-NEXT:    retl
172entry:
173  %x.p = alloca i64
174  store i64 %x, i64* %x.p
175  call void @addrof_i64(i64* %x.p)
176  ret void
177}
178
179; The code we generate for this is unimportant. This is mostly a crash test.
180
181define void @split_i128(i128* %sret, i128 %x) {
182; CHECK-LABEL: split_i128:
183; CHECK:       # %bb.0: # %entry
184; CHECK-NEXT:    pushl %ebp
185; CHECK-NEXT:    movl %esp, %ebp
186; CHECK-NEXT:    pushl %ebx
187; CHECK-NEXT:    pushl %edi
188; CHECK-NEXT:    pushl %esi
189; CHECK-NEXT:    andl $-8, %esp
190; CHECK-NEXT:    subl $32, %esp
191; CHECK-NEXT:    movl 12(%ebp), %eax
192; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
193; CHECK-NEXT:    movl 16(%ebp), %ebx
194; CHECK-NEXT:    movl 20(%ebp), %esi
195; CHECK-NEXT:    movl 24(%ebp), %edi
196; CHECK-NEXT:    movl %edi, {{[0-9]+}}(%esp)
197; CHECK-NEXT:    movl %esi, {{[0-9]+}}(%esp)
198; CHECK-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
199; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp)
200; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
201; CHECK-NEXT:    pushl %eax
202; CHECK-NEXT:    calll _addrof_i128
203; CHECK-NEXT:    addl $4, %esp
204; CHECK-NEXT:    movl 8(%ebp), %eax
205; CHECK-NEXT:    movl %edi, 12(%eax)
206; CHECK-NEXT:    movl %esi, 8(%eax)
207; CHECK-NEXT:    movl %ebx, 4(%eax)
208; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
209; CHECK-NEXT:    movl %ecx, (%eax)
210; CHECK-NEXT:    leal -12(%ebp), %esp
211; CHECK-NEXT:    popl %esi
212; CHECK-NEXT:    popl %edi
213; CHECK-NEXT:    popl %ebx
214; CHECK-NEXT:    popl %ebp
215; CHECK-NEXT:    retl
216entry:
217  %x.addr = alloca i128
218  store i128 %x, i128* %x.addr
219  call void @addrof_i128(i128* %x.addr)
220  store i128 %x, i128* %sret
221  ret void
222}
223
224; Check that we load all of x, y, and z before the call.
225
226define i32 @three_args(i32 %x, i32 %y, i32 %z) {
227; CHECK-LABEL: three_args:
228; CHECK:       # %bb.0: # %entry
229; CHECK-NEXT:    pushl %esi
230; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
231; CHECK-NEXT:    addl {{[0-9]+}}(%esp), %esi
232; CHECK-NEXT:    addl {{[0-9]+}}(%esp), %esi
233; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
234; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %ecx
235; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %edx
236; CHECK-NEXT:    pushl %eax
237; CHECK-NEXT:    pushl %ecx
238; CHECK-NEXT:    pushl %edx
239; CHECK-NEXT:    calll _addrof_i32_x3
240; CHECK-NEXT:    addl $12, %esp
241; CHECK-NEXT:    movl %esi, %eax
242; CHECK-NEXT:    popl %esi
243; CHECK-NEXT:    retl
244entry:
245  %z.addr = alloca i32, align 4
246  %y.addr = alloca i32, align 4
247  %x.addr = alloca i32, align 4
248  store i32 %z, i32* %z.addr, align 4
249  store i32 %y, i32* %y.addr, align 4
250  store i32 %x, i32* %x.addr, align 4
251  call void @addrof_i32_x3(i32* %x.addr, i32* %y.addr, i32* %z.addr)
252  %s1 = add i32 %x, %y
253  %sum = add i32 %s1, %z
254  ret i32 %sum
255}
256
257define void @two_args_same_alloca(i32 %x, i32 %y) {
258; CHECK-LABEL: two_args_same_alloca:
259; CHECK:       # %bb.0: # %entry
260; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
261; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp)
262; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
263; CHECK-NEXT:    pushl %eax
264; CHECK-NEXT:    calll _addrof_i32
265; CHECK-NEXT:    addl $4, %esp
266; CHECK-NEXT:    retl
267entry:
268  %x.addr = alloca i32
269  store i32 %x, i32* %x.addr
270  store i32 %y, i32* %x.addr
271  call void @addrof_i32(i32* %x.addr)
272  ret void
273}
274
275define void @avoid_byval(i32* byval(i32) %x) {
276; CHECK-LABEL: avoid_byval:
277; CHECK:       # %bb.0: # %entry
278; CHECK-NEXT:    pushl %eax
279; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
280; CHECK-NEXT:    movl %eax, (%esp)
281; CHECK-NEXT:    pushl %eax
282; CHECK-NEXT:    calll _addrof_i32
283; CHECK-NEXT:    addl $4, %esp
284; CHECK-NEXT:    popl %eax
285; CHECK-NEXT:    retl
286entry:
287  %x.p.p = alloca i32*
288  store i32* %x, i32** %x.p.p
289  call void @addrof_i32(i32* %x)
290  ret void
291}
292
293define void @avoid_inalloca(i32* inalloca(i32) %x) {
294; CHECK-LABEL: avoid_inalloca:
295; CHECK:       # %bb.0: # %entry
296; CHECK-NEXT:    pushl %eax
297; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
298; CHECK-NEXT:    movl %eax, (%esp)
299; CHECK-NEXT:    pushl %eax
300; CHECK-NEXT:    calll _addrof_i32
301; CHECK-NEXT:    addl $4, %esp
302; CHECK-NEXT:    popl %eax
303; CHECK-NEXT:    retl
304entry:
305  %x.p.p = alloca i32*
306  store i32* %x, i32** %x.p.p
307  call void @addrof_i32(i32* %x)
308  ret void
309}
310
311define void @avoid_preallocated(i32* preallocated(i32) %x) {
312; CHECK-LABEL: avoid_preallocated:
313; CHECK:       # %bb.0: # %entry
314; CHECK-NEXT:    pushl %eax
315; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
316; CHECK-NEXT:    movl %eax, (%esp)
317; CHECK-NEXT:    pushl %eax
318; CHECK-NEXT:    calll _addrof_i32
319; CHECK-NEXT:    addl $4, %esp
320; CHECK-NEXT:    popl %eax
321; CHECK-NEXT:    retl
322entry:
323  %x.p.p = alloca i32*
324  store i32* %x, i32** %x.p.p
325  call void @addrof_i32(i32* %x)
326  ret void
327}
328
329; Don't elide the copy when the alloca is escaped with a store.
330define void @escape_with_store(i32 %x) {
331; CHECK-LABEL: escape_with_store:
332; CHECK:       # %bb.0:
333; CHECK-NEXT:    subl $8, %esp
334; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
335; CHECK-NEXT:    movl %esp, %ecx
336; CHECK-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
337; CHECK-NEXT:    movl %eax, (%esp)
338; CHECK-NEXT:    pushl %ecx
339; CHECK-NEXT:    calll _addrof_i32
340; CHECK-NEXT:    addl $12, %esp
341; CHECK-NEXT:    retl
342  %x1 = alloca i32
343  %x2 = alloca i32*
344  store i32* %x1, i32** %x2
345  %x3 = load i32*, i32** %x2
346  store i32 0, i32* %x3
347  store i32 %x, i32* %x1
348  call void @addrof_i32(i32* %x1)
349  ret void
350}
351
352; This test case exposed issues with the use of TokenFactor.
353
354define void @sret_and_elide(i32* sret(i32) %sret, i32 %v) {
355; CHECK-LABEL: sret_and_elide:
356; CHECK:       # %bb.0:
357; CHECK-NEXT:    pushl %edi
358; CHECK-NEXT:    pushl %esi
359; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
360; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
361; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
362; CHECK-NEXT:    pushl %eax
363; CHECK-NEXT:    calll _addrof_i32
364; CHECK-NEXT:    addl $4, %esp
365; CHECK-NEXT:    movl %edi, (%esi)
366; CHECK-NEXT:    movl %esi, %eax
367; CHECK-NEXT:    popl %esi
368; CHECK-NEXT:    popl %edi
369; CHECK-NEXT:    retl
370  %v.p = alloca i32
371  store i32 %v, i32* %v.p
372  call void @addrof_i32(i32* %v.p)
373  store i32 %v, i32* %sret
374  ret void
375}
376
377define void @avoid_partially_initialized_alloca(i32 %x) {
378; CHECK-LABEL: avoid_partially_initialized_alloca:
379; CHECK:       # %bb.0:
380; CHECK-NEXT:    pushl %ebp
381; CHECK-NEXT:    movl %esp, %ebp
382; CHECK-NEXT:    andl $-8, %esp
383; CHECK-NEXT:    subl $8, %esp
384; CHECK-NEXT:    movl 8(%ebp), %eax
385; CHECK-NEXT:    movl %eax, (%esp)
386; CHECK-NEXT:    movl %esp, %eax
387; CHECK-NEXT:    pushl %eax
388; CHECK-NEXT:    calll _addrof_i32
389; CHECK-NEXT:    addl $4, %esp
390; CHECK-NEXT:    movl %ebp, %esp
391; CHECK-NEXT:    popl %ebp
392; CHECK-NEXT:    retl
393  %a = alloca i64
394  %p = bitcast i64* %a to i32*
395  store i32 %x, i32* %p
396  call void @addrof_i32(i32* %p)
397  ret void
398}
399
400; Ensure no copy elision happens as the two i3 values fed into icmp may have
401; garbage in the upper bits, a truncation is needed.
402
403define i1 @use_i3(i3 %a1, i3 %a2) {
404; CHECK-LABEL: use_i3:
405; CHECK:       # %bb.0:
406; CHECK-NEXT:    pushl %eax
407; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %al
408; CHECK-NEXT:    andb $7, %al
409; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %cl
410; CHECK-NEXT:    andb $7, %cl
411; CHECK-NEXT:    movb %cl, {{[0-9]+}}(%esp)
412; CHECK-NEXT:    cmpb %cl, %al
413; CHECK-NEXT:    sete %al
414; CHECK-NEXT:    popl %ecx
415; CHECK-NEXT:    retl
416  %tmp = alloca i3
417  store i3 %a2, i3* %tmp
418  %val = load i3, i3* %tmp
419  %res = icmp eq i3 %a1, %val
420  ret i1 %res
421}
422
423