1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=X64
3; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=X86
4
5; Make sure that flags are properly preserved despite atomic optimizations.
6
7define i32 @atomic_and_flags_1(i8* %p, i32 %a, i32 %b) {
8; X64-LABEL: atomic_and_flags_1:
9; X64:       # %bb.0:
10; X64-NEXT:    cmpl %edx, %esi
11; X64-NEXT:    jne .LBB0_3
12; X64-NEXT:  # %bb.1: # %L1
13; X64-NEXT:    incb (%rdi)
14; X64-NEXT:    cmpl %edx, %esi
15; X64-NEXT:    jne .LBB0_2
16; X64-NEXT:  # %bb.4: # %L3
17; X64-NEXT:    movl $3, %eax
18; X64-NEXT:    retq
19; X64-NEXT:  .LBB0_3: # %L2
20; X64-NEXT:    movl $2, %eax
21; X64-NEXT:    retq
22; X64-NEXT:  .LBB0_2: # %L4
23; X64-NEXT:    movl $4, %eax
24; X64-NEXT:    retq
25;
26; X86-LABEL: atomic_and_flags_1:
27; X86:       # %bb.0:
28; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
29; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
30; X86-NEXT:    cmpl %eax, %ecx
31; X86-NEXT:    jne .LBB0_3
32; X86-NEXT:  # %bb.1: # %L1
33; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
34; X86-NEXT:    incb (%edx)
35; X86-NEXT:    cmpl %eax, %ecx
36; X86-NEXT:    jne .LBB0_2
37; X86-NEXT:  # %bb.4: # %L3
38; X86-NEXT:    movl $3, %eax
39; X86-NEXT:    retl
40; X86-NEXT:  .LBB0_3: # %L2
41; X86-NEXT:    movl $2, %eax
42; X86-NEXT:    retl
43; X86-NEXT:  .LBB0_2: # %L4
44; X86-NEXT:    movl $4, %eax
45; X86-NEXT:    retl
46  ; Generate flags value, and use it.
47  %cmp = icmp eq i32 %a, %b
48  br i1 %cmp, label %L1, label %L2
49
50L1:
51  ; The following pattern will get folded.
52  %1 = load atomic i8, i8* %p seq_cst, align 1
53  %2 = add i8 %1, 1 ; This forces the INC instruction to be generated.
54  store atomic i8 %2, i8* %p release, align 1
55
56  ; Use the comparison result again. We need to rematerialize the comparison
57  ; somehow. This test checks that cmpl gets emitted again, but any
58  ; rematerialization would work (the optimizer used to clobber the flags with
59  ; the add).
60  br i1 %cmp, label %L3, label %L4
61
62L2:
63  ret i32 2
64
65L3:
66  ret i32 3
67
68L4:
69  ret i32 4
70}
71
72; Same as above, but using 2 as immediate to avoid the INC instruction.
73define i32 @atomic_and_flags_2(i8* %p, i32 %a, i32 %b) {
74; X64-LABEL: atomic_and_flags_2:
75; X64:       # %bb.0:
76; X64-NEXT:    cmpl %edx, %esi
77; X64-NEXT:    jne .LBB1_3
78; X64-NEXT:  # %bb.1: # %L1
79; X64-NEXT:    addb $2, (%rdi)
80; X64-NEXT:    cmpl %edx, %esi
81; X64-NEXT:    jne .LBB1_2
82; X64-NEXT:  # %bb.4: # %L3
83; X64-NEXT:    movl $3, %eax
84; X64-NEXT:    retq
85; X64-NEXT:  .LBB1_3: # %L2
86; X64-NEXT:    movl $2, %eax
87; X64-NEXT:    retq
88; X64-NEXT:  .LBB1_2: # %L4
89; X64-NEXT:    movl $4, %eax
90; X64-NEXT:    retq
91;
92; X86-LABEL: atomic_and_flags_2:
93; X86:       # %bb.0:
94; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
95; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
96; X86-NEXT:    cmpl %eax, %ecx
97; X86-NEXT:    jne .LBB1_3
98; X86-NEXT:  # %bb.1: # %L1
99; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
100; X86-NEXT:    addb $2, (%edx)
101; X86-NEXT:    cmpl %eax, %ecx
102; X86-NEXT:    jne .LBB1_2
103; X86-NEXT:  # %bb.4: # %L3
104; X86-NEXT:    movl $3, %eax
105; X86-NEXT:    retl
106; X86-NEXT:  .LBB1_3: # %L2
107; X86-NEXT:    movl $2, %eax
108; X86-NEXT:    retl
109; X86-NEXT:  .LBB1_2: # %L4
110; X86-NEXT:    movl $4, %eax
111; X86-NEXT:    retl
112  %cmp = icmp eq i32 %a, %b
113  br i1 %cmp, label %L1, label %L2
114L1:
115  %1 = load atomic i8, i8* %p seq_cst, align 1
116  %2 = add i8 %1, 2
117  store atomic i8 %2, i8* %p release, align 1
118  br i1 %cmp, label %L3, label %L4
119L2:
120  ret i32 2
121L3:
122  ret i32 3
123L4:
124  ret i32 4
125}
126
127; PR20841 - ensure we don't reuse the ZF flag from XADD for compares with zero,
128; the flags are set for the result of the add result (the value stored to memory),
129; not the value returned by the atomicrmw add.
130
131define zeroext i1 @xadd_cmp0_i64(i64* %x) nounwind {
132; X64-LABEL: xadd_cmp0_i64:
133; X64:       # %bb.0:
134; X64-NEXT:    movl $1, %eax
135; X64-NEXT:    lock xaddq %rax, (%rdi)
136; X64-NEXT:    testq %rax, %rax
137; X64-NEXT:    sete %al
138; X64-NEXT:    retq
139;
140; X86-LABEL: xadd_cmp0_i64:
141; X86:       # %bb.0:
142; X86-NEXT:    pushl %ebx
143; X86-NEXT:    pushl %esi
144; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
145; X86-NEXT:    movl (%esi), %eax
146; X86-NEXT:    movl 4(%esi), %edx
147; X86-NEXT:    .p2align 4, 0x90
148; X86-NEXT:  .LBB2_1: # %atomicrmw.start
149; X86-NEXT:    # =>This Inner Loop Header: Depth=1
150; X86-NEXT:    movl %eax, %ebx
151; X86-NEXT:    addl $1, %ebx
152; X86-NEXT:    movl %edx, %ecx
153; X86-NEXT:    adcl $0, %ecx
154; X86-NEXT:    lock cmpxchg8b (%esi)
155; X86-NEXT:    jne .LBB2_1
156; X86-NEXT:  # %bb.2: # %atomicrmw.end
157; X86-NEXT:    orl %edx, %eax
158; X86-NEXT:    sete %al
159; X86-NEXT:    popl %esi
160; X86-NEXT:    popl %ebx
161; X86-NEXT:    retl
162  %add = atomicrmw add i64* %x, i64 1 seq_cst
163  %cmp = icmp eq i64 %add, 0
164  ret i1 %cmp
165}
166
167define zeroext i1 @xadd_cmp0_i32(i32* %x) nounwind {
168; X64-LABEL: xadd_cmp0_i32:
169; X64:       # %bb.0:
170; X64-NEXT:    movl $1, %eax
171; X64-NEXT:    lock xaddl %eax, (%rdi)
172; X64-NEXT:    testl %eax, %eax
173; X64-NEXT:    setne %al
174; X64-NEXT:    retq
175;
176; X86-LABEL: xadd_cmp0_i32:
177; X86:       # %bb.0:
178; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
179; X86-NEXT:    movl $1, %ecx
180; X86-NEXT:    lock xaddl %ecx, (%eax)
181; X86-NEXT:    testl %ecx, %ecx
182; X86-NEXT:    setne %al
183; X86-NEXT:    retl
184  %add = atomicrmw add i32* %x, i32 1 seq_cst
185  %cmp = icmp ne i32 %add, 0
186  ret i1 %cmp
187}
188
189define zeroext i1 @xadd_cmp0_i16(i16* %x) nounwind {
190; X64-LABEL: xadd_cmp0_i16:
191; X64:       # %bb.0:
192; X64-NEXT:    movw $1, %ax
193; X64-NEXT:    lock xaddw %ax, (%rdi)
194; X64-NEXT:    testw %ax, %ax
195; X64-NEXT:    sete %al
196; X64-NEXT:    retq
197;
198; X86-LABEL: xadd_cmp0_i16:
199; X86:       # %bb.0:
200; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
201; X86-NEXT:    movw $1, %cx
202; X86-NEXT:    lock xaddw %cx, (%eax)
203; X86-NEXT:    testw %cx, %cx
204; X86-NEXT:    sete %al
205; X86-NEXT:    retl
206  %add = atomicrmw add i16* %x, i16 1 seq_cst
207  %cmp = icmp eq i16 %add, 0
208  ret i1 %cmp
209}
210
211define zeroext i1 @xadd_cmp0_i8(i8* %x) nounwind {
212; X64-LABEL: xadd_cmp0_i8:
213; X64:       # %bb.0:
214; X64-NEXT:    movb $1, %al
215; X64-NEXT:    lock xaddb %al, (%rdi)
216; X64-NEXT:    testb %al, %al
217; X64-NEXT:    setne %al
218; X64-NEXT:    retq
219;
220; X86-LABEL: xadd_cmp0_i8:
221; X86:       # %bb.0:
222; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
223; X86-NEXT:    movb $1, %cl
224; X86-NEXT:    lock xaddb %cl, (%eax)
225; X86-NEXT:    testb %cl, %cl
226; X86-NEXT:    setne %al
227; X86-NEXT:    retl
228  %add = atomicrmw add i8* %x, i8 1 seq_cst
229  %cmp = icmp ne i8 %add, 0
230  ret i1 %cmp
231}
232