1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=X64 3; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=X86 4 5; Make sure that flags are properly preserved despite atomic optimizations. 6 7define i32 @atomic_and_flags_1(i8* %p, i32 %a, i32 %b) { 8; X64-LABEL: atomic_and_flags_1: 9; X64: # %bb.0: 10; X64-NEXT: cmpl %edx, %esi 11; X64-NEXT: jne .LBB0_3 12; X64-NEXT: # %bb.1: # %L1 13; X64-NEXT: incb (%rdi) 14; X64-NEXT: cmpl %edx, %esi 15; X64-NEXT: jne .LBB0_2 16; X64-NEXT: # %bb.4: # %L3 17; X64-NEXT: movl $3, %eax 18; X64-NEXT: retq 19; X64-NEXT: .LBB0_3: # %L2 20; X64-NEXT: movl $2, %eax 21; X64-NEXT: retq 22; X64-NEXT: .LBB0_2: # %L4 23; X64-NEXT: movl $4, %eax 24; X64-NEXT: retq 25; 26; X86-LABEL: atomic_and_flags_1: 27; X86: # %bb.0: 28; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 29; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 30; X86-NEXT: cmpl %eax, %ecx 31; X86-NEXT: jne .LBB0_3 32; X86-NEXT: # %bb.1: # %L1 33; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 34; X86-NEXT: incb (%edx) 35; X86-NEXT: cmpl %eax, %ecx 36; X86-NEXT: jne .LBB0_2 37; X86-NEXT: # %bb.4: # %L3 38; X86-NEXT: movl $3, %eax 39; X86-NEXT: retl 40; X86-NEXT: .LBB0_3: # %L2 41; X86-NEXT: movl $2, %eax 42; X86-NEXT: retl 43; X86-NEXT: .LBB0_2: # %L4 44; X86-NEXT: movl $4, %eax 45; X86-NEXT: retl 46 ; Generate flags value, and use it. 47 %cmp = icmp eq i32 %a, %b 48 br i1 %cmp, label %L1, label %L2 49 50L1: 51 ; The following pattern will get folded. 52 %1 = load atomic i8, i8* %p seq_cst, align 1 53 %2 = add i8 %1, 1 ; This forces the INC instruction to be generated. 54 store atomic i8 %2, i8* %p release, align 1 55 56 ; Use the comparison result again. We need to rematerialize the comparison 57 ; somehow. This test checks that cmpl gets emitted again, but any 58 ; rematerialization would work (the optimizer used to clobber the flags with 59 ; the add). 60 br i1 %cmp, label %L3, label %L4 61 62L2: 63 ret i32 2 64 65L3: 66 ret i32 3 67 68L4: 69 ret i32 4 70} 71 72; Same as above, but using 2 as immediate to avoid the INC instruction. 73define i32 @atomic_and_flags_2(i8* %p, i32 %a, i32 %b) { 74; X64-LABEL: atomic_and_flags_2: 75; X64: # %bb.0: 76; X64-NEXT: cmpl %edx, %esi 77; X64-NEXT: jne .LBB1_3 78; X64-NEXT: # %bb.1: # %L1 79; X64-NEXT: addb $2, (%rdi) 80; X64-NEXT: cmpl %edx, %esi 81; X64-NEXT: jne .LBB1_2 82; X64-NEXT: # %bb.4: # %L3 83; X64-NEXT: movl $3, %eax 84; X64-NEXT: retq 85; X64-NEXT: .LBB1_3: # %L2 86; X64-NEXT: movl $2, %eax 87; X64-NEXT: retq 88; X64-NEXT: .LBB1_2: # %L4 89; X64-NEXT: movl $4, %eax 90; X64-NEXT: retq 91; 92; X86-LABEL: atomic_and_flags_2: 93; X86: # %bb.0: 94; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 95; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 96; X86-NEXT: cmpl %eax, %ecx 97; X86-NEXT: jne .LBB1_3 98; X86-NEXT: # %bb.1: # %L1 99; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 100; X86-NEXT: addb $2, (%edx) 101; X86-NEXT: cmpl %eax, %ecx 102; X86-NEXT: jne .LBB1_2 103; X86-NEXT: # %bb.4: # %L3 104; X86-NEXT: movl $3, %eax 105; X86-NEXT: retl 106; X86-NEXT: .LBB1_3: # %L2 107; X86-NEXT: movl $2, %eax 108; X86-NEXT: retl 109; X86-NEXT: .LBB1_2: # %L4 110; X86-NEXT: movl $4, %eax 111; X86-NEXT: retl 112 %cmp = icmp eq i32 %a, %b 113 br i1 %cmp, label %L1, label %L2 114L1: 115 %1 = load atomic i8, i8* %p seq_cst, align 1 116 %2 = add i8 %1, 2 117 store atomic i8 %2, i8* %p release, align 1 118 br i1 %cmp, label %L3, label %L4 119L2: 120 ret i32 2 121L3: 122 ret i32 3 123L4: 124 ret i32 4 125} 126 127; PR20841 - ensure we don't reuse the ZF flag from XADD for compares with zero, 128; the flags are set for the result of the add result (the value stored to memory), 129; not the value returned by the atomicrmw add. 130 131define zeroext i1 @xadd_cmp0_i64(i64* %x) nounwind { 132; X64-LABEL: xadd_cmp0_i64: 133; X64: # %bb.0: 134; X64-NEXT: movl $1, %eax 135; X64-NEXT: lock xaddq %rax, (%rdi) 136; X64-NEXT: testq %rax, %rax 137; X64-NEXT: sete %al 138; X64-NEXT: retq 139; 140; X86-LABEL: xadd_cmp0_i64: 141; X86: # %bb.0: 142; X86-NEXT: pushl %ebx 143; X86-NEXT: pushl %esi 144; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 145; X86-NEXT: movl (%esi), %eax 146; X86-NEXT: movl 4(%esi), %edx 147; X86-NEXT: .p2align 4, 0x90 148; X86-NEXT: .LBB2_1: # %atomicrmw.start 149; X86-NEXT: # =>This Inner Loop Header: Depth=1 150; X86-NEXT: movl %eax, %ebx 151; X86-NEXT: addl $1, %ebx 152; X86-NEXT: movl %edx, %ecx 153; X86-NEXT: adcl $0, %ecx 154; X86-NEXT: lock cmpxchg8b (%esi) 155; X86-NEXT: jne .LBB2_1 156; X86-NEXT: # %bb.2: # %atomicrmw.end 157; X86-NEXT: orl %edx, %eax 158; X86-NEXT: sete %al 159; X86-NEXT: popl %esi 160; X86-NEXT: popl %ebx 161; X86-NEXT: retl 162 %add = atomicrmw add i64* %x, i64 1 seq_cst 163 %cmp = icmp eq i64 %add, 0 164 ret i1 %cmp 165} 166 167define zeroext i1 @xadd_cmp0_i32(i32* %x) nounwind { 168; X64-LABEL: xadd_cmp0_i32: 169; X64: # %bb.0: 170; X64-NEXT: movl $1, %eax 171; X64-NEXT: lock xaddl %eax, (%rdi) 172; X64-NEXT: testl %eax, %eax 173; X64-NEXT: setne %al 174; X64-NEXT: retq 175; 176; X86-LABEL: xadd_cmp0_i32: 177; X86: # %bb.0: 178; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 179; X86-NEXT: movl $1, %ecx 180; X86-NEXT: lock xaddl %ecx, (%eax) 181; X86-NEXT: testl %ecx, %ecx 182; X86-NEXT: setne %al 183; X86-NEXT: retl 184 %add = atomicrmw add i32* %x, i32 1 seq_cst 185 %cmp = icmp ne i32 %add, 0 186 ret i1 %cmp 187} 188 189define zeroext i1 @xadd_cmp0_i16(i16* %x) nounwind { 190; X64-LABEL: xadd_cmp0_i16: 191; X64: # %bb.0: 192; X64-NEXT: movw $1, %ax 193; X64-NEXT: lock xaddw %ax, (%rdi) 194; X64-NEXT: testw %ax, %ax 195; X64-NEXT: sete %al 196; X64-NEXT: retq 197; 198; X86-LABEL: xadd_cmp0_i16: 199; X86: # %bb.0: 200; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 201; X86-NEXT: movw $1, %cx 202; X86-NEXT: lock xaddw %cx, (%eax) 203; X86-NEXT: testw %cx, %cx 204; X86-NEXT: sete %al 205; X86-NEXT: retl 206 %add = atomicrmw add i16* %x, i16 1 seq_cst 207 %cmp = icmp eq i16 %add, 0 208 ret i1 %cmp 209} 210 211define zeroext i1 @xadd_cmp0_i8(i8* %x) nounwind { 212; X64-LABEL: xadd_cmp0_i8: 213; X64: # %bb.0: 214; X64-NEXT: movb $1, %al 215; X64-NEXT: lock xaddb %al, (%rdi) 216; X64-NEXT: testb %al, %al 217; X64-NEXT: setne %al 218; X64-NEXT: retq 219; 220; X86-LABEL: xadd_cmp0_i8: 221; X86: # %bb.0: 222; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 223; X86-NEXT: movb $1, %cl 224; X86-NEXT: lock xaddb %cl, (%eax) 225; X86-NEXT: testb %cl, %cl 226; X86-NEXT: setne %al 227; X86-NEXT: retl 228 %add = atomicrmw add i8* %x, i8 1 seq_cst 229 %cmp = icmp ne i8 %add, 0 230 ret i1 %cmp 231} 232