CodeGen/X86/arg-copy-elide.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=i686-windows < %s | FileCheck %s

declare void @addrof_i1(i1*)
declare void @addrof_i32(i32*)
declare void @addrof_i64(i64*)
declare void @addrof_i128(i128*)
declare void @addrof_i32_x3(i32*, i32*, i32*)

define void @simple(i32 %x) {
; CHECK-LABEL: simple:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    pushl %eax
; CHECK-NEXT:    calll _addrof_i32
; CHECK-NEXT:    addl $4, %esp
; CHECK-NEXT:    retl
entry:
  %x.addr = alloca i32
  store i32 %x, i32* %x.addr
  call void @addrof_i32(i32* %x.addr)
  ret void
}

; We need to load %x before calling addrof_i32 now because it could mutate %x in
; place.

define i32 @use_arg(i32 %x) {
; CHECK-LABEL: use_arg:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    pushl %esi
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    pushl %eax
; CHECK-NEXT:    calll _addrof_i32
; CHECK-NEXT:    addl $4, %esp
; CHECK-NEXT:    movl %esi, %eax
; CHECK-NEXT:    popl %esi
; CHECK-NEXT:    retl
entry:
  %x.addr = alloca i32
  store i32 %x, i32* %x.addr
  call void @addrof_i32(i32* %x.addr)
  ret i32 %x
}

; We won't copy elide for types needing legalization such as i64 or i1.

define i64 @split_i64(i64 %x) {
; CHECK-LABEL: split_i64:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    pushl %edi
; CHECK-NEXT:    pushl %esi
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    pushl %eax
; CHECK-NEXT:    calll _addrof_i64
; CHECK-NEXT:    addl $4, %esp
; CHECK-NEXT:    movl %esi, %eax
; CHECK-NEXT:    movl %edi, %edx
; CHECK-NEXT:    popl %esi
; CHECK-NEXT:    popl %edi
; CHECK-NEXT:    retl
entry:
  %x.addr = alloca i64, align 4
  store i64 %x, i64* %x.addr, align 4
  call void @addrof_i64(i64* %x.addr)
  ret i64 %x
}

define i1 @i1_arg(i1 %x) {
; CHECK-LABEL: i1_arg:
; CHECK:       # %bb.0:
; CHECK-NEXT:    pushl %ebx
; CHECK-NEXT:    pushl %eax
; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %bl
; CHECK-NEXT:    movl %ebx, %eax
; CHECK-NEXT:    andb $1, %al
; CHECK-NEXT:    movb %al, {{[0-9]+}}(%esp)
; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    pushl %eax
; CHECK-NEXT:    calll _addrof_i1
; CHECK-NEXT:    addl $4, %esp
; CHECK-NEXT:    movl %ebx, %eax
; CHECK-NEXT:    addl $4, %esp
; CHECK-NEXT:    popl %ebx
; CHECK-NEXT:    retl
  %x.addr = alloca i1
  store i1 %x, i1* %x.addr
  call void @addrof_i1(i1* %x.addr)
  ret i1 %x
}

; We can't copy elide when an i64 is split between registers and memory in a
; fastcc function.

define fastcc i64 @fastcc_split_i64(i64* %p, i64 %x) {
; CHECK-LABEL: fastcc_split_i64:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    pushl %edi
; CHECK-NEXT:    pushl %esi
; CHECK-NEXT:    subl $8, %esp
; CHECK-NEXT:    movl %edx, %esi
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
; CHECK-NEXT:    movl %edi, {{[0-9]+}}(%esp)
; CHECK-NEXT:    movl %edx, (%esp)
; CHECK-NEXT:    movl %esp, %eax
; CHECK-NEXT:    pushl %eax
; CHECK-NEXT:    calll _addrof_i64
; CHECK-NEXT:    addl $4, %esp
; CHECK-NEXT:    movl %esi, %eax
; CHECK-NEXT:    movl %edi, %edx
; CHECK-NEXT:    addl $8, %esp
; CHECK-NEXT:    popl %esi
; CHECK-NEXT:    popl %edi
; CHECK-NEXT:    retl
entry:
  %x.addr = alloca i64, align 4
  store i64 %x, i64* %x.addr, align 4
  call void @addrof_i64(i64* %x.addr)
  ret i64 %x
}

; We can't copy elide when it would reduce the user requested alignment.

define void @high_alignment(i32 %x) {
; CHECK-LABEL: high_alignment:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    pushl %ebp
; CHECK-NEXT:    movl %esp, %ebp
; CHECK-NEXT:    andl $-128, %esp
; CHECK-NEXT:    subl $128, %esp
; CHECK-NEXT:    movl 8(%ebp), %eax
; CHECK-NEXT:    movl %eax, (%esp)
; CHECK-NEXT:    movl %esp, %eax
; CHECK-NEXT:    pushl %eax
; CHECK-NEXT:    calll _addrof_i32
; CHECK-NEXT:    addl $4, %esp
; CHECK-NEXT:    movl %ebp, %esp
; CHECK-NEXT:    popl %ebp
; CHECK-NEXT:    retl
entry:
  %x.p = alloca i32, align 128
  store i32 %x, i32* %x.p
  call void @addrof_i32(i32* %x.p)
  ret void
}

; We can't copy elide when it would reduce the ABI required alignment.
; FIXME: We should lower the ABI alignment of i64 on Windows, since MSVC
; doesn't guarantee it.

define void @abi_alignment(i64 %x) {
; CHECK-LABEL: abi_alignment:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    pushl %ebp
; CHECK-NEXT:    movl %esp, %ebp
; CHECK-NEXT:    andl $-8, %esp
; CHECK-NEXT:    subl $8, %esp
; CHECK-NEXT:    movl 8(%ebp), %eax
; CHECK-NEXT:    movl 12(%ebp), %ecx
; CHECK-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
; CHECK-NEXT:    movl %eax, (%esp)
; CHECK-NEXT:    movl %esp, %eax
; CHECK-NEXT:    pushl %eax
; CHECK-NEXT:    calll _addrof_i64
; CHECK-NEXT:    addl $4, %esp
; CHECK-NEXT:    movl %ebp, %esp
; CHECK-NEXT:    popl %ebp
; CHECK-NEXT:    retl
entry:
  %x.p = alloca i64
  store i64 %x, i64* %x.p
  call void @addrof_i64(i64* %x.p)
  ret void
}

; The code we generate for this is unimportant. This is mostly a crash test.

define void @split_i128(i128* %sret, i128 %x) {
; CHECK-LABEL: split_i128:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    pushl %ebp
; CHECK-NEXT:    movl %esp, %ebp
; CHECK-NEXT:    pushl %ebx
; CHECK-NEXT:    pushl %edi
; CHECK-NEXT:    pushl %esi
; CHECK-NEXT:    andl $-8, %esp
; CHECK-NEXT:    subl $32, %esp
; CHECK-NEXT:    movl 12(%ebp), %eax
; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT:    movl 16(%ebp), %ebx
; CHECK-NEXT:    movl 20(%ebp), %esi
; CHECK-NEXT:    movl 24(%ebp), %edi
; CHECK-NEXT:    movl %edi, {{[0-9]+}}(%esp)
; CHECK-NEXT:    movl %esi, {{[0-9]+}}(%esp)
; CHECK-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    pushl %eax
; CHECK-NEXT:    calll _addrof_i128
; CHECK-NEXT:    addl $4, %esp
; CHECK-NEXT:    movl 8(%ebp), %eax
; CHECK-NEXT:    movl %edi, 12(%eax)
; CHECK-NEXT:    movl %esi, 8(%eax)
; CHECK-NEXT:    movl %ebx, 4(%eax)
; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT:    movl %ecx, (%eax)
; CHECK-NEXT:    leal -12(%ebp), %esp
; CHECK-NEXT:    popl %esi
; CHECK-NEXT:    popl %edi
; CHECK-NEXT:    popl %ebx
; CHECK-NEXT:    popl %ebp
; CHECK-NEXT:    retl
entry:
  %x.addr = alloca i128
  store i128 %x, i128* %x.addr
  call void @addrof_i128(i128* %x.addr)
  store i128 %x, i128* %sret
  ret void
}

; Check that we load all of x, y, and z before the call.

define i32 @three_args(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: three_args:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    pushl %esi
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT:    addl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT:    addl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %edx
; CHECK-NEXT:    pushl %eax
; CHECK-NEXT:    pushl %ecx
; CHECK-NEXT:    pushl %edx
; CHECK-NEXT:    calll _addrof_i32_x3
; CHECK-NEXT:    addl $12, %esp
; CHECK-NEXT:    movl %esi, %eax
; CHECK-NEXT:    popl %esi
; CHECK-NEXT:    retl
entry:
  %z.addr = alloca i32, align 4
  %y.addr = alloca i32, align 4
  %x.addr = alloca i32, align 4
  store i32 %z, i32* %z.addr, align 4
  store i32 %y, i32* %y.addr, align 4
  store i32 %x, i32* %x.addr, align 4
  call void @addrof_i32_x3(i32* %x.addr, i32* %y.addr, i32* %z.addr)
  %s1 = add i32 %x, %y
  %sum = add i32 %s1, %z
  ret i32 %sum
}

define void @two_args_same_alloca(i32 %x, i32 %y) {
; CHECK-LABEL: two_args_same_alloca:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    pushl %eax
; CHECK-NEXT:    calll _addrof_i32
; CHECK-NEXT:    addl $4, %esp
; CHECK-NEXT:    retl
entry:
  %x.addr = alloca i32
  store i32 %x, i32* %x.addr
  store i32 %y, i32* %x.addr
  call void @addrof_i32(i32* %x.addr)
  ret void
}

define void @avoid_byval(i32* byval(i32) %x) {
; CHECK-LABEL: avoid_byval:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    pushl %eax
; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    movl %eax, (%esp)
; CHECK-NEXT:    pushl %eax
; CHECK-NEXT:    calll _addrof_i32
; CHECK-NEXT:    addl $4, %esp
; CHECK-NEXT:    popl %eax
; CHECK-NEXT:    retl
entry:
  %x.p.p = alloca i32*
  store i32* %x, i32** %x.p.p
  call void @addrof_i32(i32* %x)
  ret void
}

define void @avoid_inalloca(i32* inalloca(i32) %x) {
; CHECK-LABEL: avoid_inalloca:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    pushl %eax
; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    movl %eax, (%esp)
; CHECK-NEXT:    pushl %eax
; CHECK-NEXT:    calll _addrof_i32
; CHECK-NEXT:    addl $4, %esp
; CHECK-NEXT:    popl %eax
; CHECK-NEXT:    retl
entry:
  %x.p.p = alloca i32*
  store i32* %x, i32** %x.p.p
  call void @addrof_i32(i32* %x)
  ret void
}

define void @avoid_preallocated(i32* preallocated(i32) %x) {
; CHECK-LABEL: avoid_preallocated:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    pushl %eax
; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    movl %eax, (%esp)
; CHECK-NEXT:    pushl %eax
; CHECK-NEXT:    calll _addrof_i32
; CHECK-NEXT:    addl $4, %esp
; CHECK-NEXT:    popl %eax
; CHECK-NEXT:    retl
entry:
  %x.p.p = alloca i32*
  store i32* %x, i32** %x.p.p
  call void @addrof_i32(i32* %x)
  ret void
}

; Don't elide the copy when the alloca is escaped with a store.
define void @escape_with_store(i32 %x) {
; CHECK-LABEL: escape_with_store:
; CHECK:       # %bb.0:
; CHECK-NEXT:    subl $8, %esp
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    movl %esp, %ecx
; CHECK-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
; CHECK-NEXT:    movl %eax, (%esp)
; CHECK-NEXT:    pushl %ecx
; CHECK-NEXT:    calll _addrof_i32
; CHECK-NEXT:    addl $12, %esp
; CHECK-NEXT:    retl
  %x1 = alloca i32
  %x2 = alloca i32*
  store i32* %x1, i32** %x2
  %x3 = load i32*, i32** %x2
  store i32 0, i32* %x3
  store i32 %x, i32* %x1
  call void @addrof_i32(i32* %x1)
  ret void
}

; This test case exposed issues with the use of TokenFactor.

define void @sret_and_elide(i32* sret(i32) %sret, i32 %v) {
; CHECK-LABEL: sret_and_elide:
; CHECK:       # %bb.0:
; CHECK-NEXT:    pushl %edi
; CHECK-NEXT:    pushl %esi
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    pushl %eax
; CHECK-NEXT:    calll _addrof_i32
; CHECK-NEXT:    addl $4, %esp
; CHECK-NEXT:    movl %edi, (%esi)
; CHECK-NEXT:    movl %esi, %eax
; CHECK-NEXT:    popl %esi
; CHECK-NEXT:    popl %edi
; CHECK-NEXT:    retl
  %v.p = alloca i32
  store i32 %v, i32* %v.p
  call void @addrof_i32(i32* %v.p)
  store i32 %v, i32* %sret
  ret void
}

define void @avoid_partially_initialized_alloca(i32 %x) {
; CHECK-LABEL: avoid_partially_initialized_alloca:
; CHECK:       # %bb.0:
; CHECK-NEXT:    pushl %ebp
; CHECK-NEXT:    movl %esp, %ebp
; CHECK-NEXT:    andl $-8, %esp
; CHECK-NEXT:    subl $8, %esp
; CHECK-NEXT:    movl 8(%ebp), %eax
; CHECK-NEXT:    movl %eax, (%esp)
; CHECK-NEXT:    movl %esp, %eax
; CHECK-NEXT:    pushl %eax
; CHECK-NEXT:    calll _addrof_i32
; CHECK-NEXT:    addl $4, %esp
; CHECK-NEXT:    movl %ebp, %esp
; CHECK-NEXT:    popl %ebp
; CHECK-NEXT:    retl
  %a = alloca i64
  %p = bitcast i64* %a to i32*
  store i32 %x, i32* %p
  call void @addrof_i32(i32* %p)
  ret void
}

; Ensure no copy elision happens as the two i3 values fed into icmp may have
; garbage in the upper bits, a truncation is needed.

define i1 @use_i3(i3 %a1, i3 %a2) {
; CHECK-LABEL: use_i3:
; CHECK:       # %bb.0:
; CHECK-NEXT:    pushl %eax
; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %al
; CHECK-NEXT:    andb $7, %al
; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %cl
; CHECK-NEXT:    andb $7, %cl
; CHECK-NEXT:    movb %cl, {{[0-9]+}}(%esp)
; CHECK-NEXT:    cmpb %cl, %al
; CHECK-NEXT:    sete %al
; CHECK-NEXT:    popl %ecx
; CHECK-NEXT:    retl
  %tmp = alloca i3
  store i3 %a2, i3* %tmp
  %val = load i3, i3* %tmp
  %res = icmp eq i3 %a1, %val
  ret i1 %res
}