1; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16 | FileCheck %s
2
3@var = global i128 0
4
5define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) {
6; CHECK-LABEL: val_compare_and_swap:
7; CHECK: movq %rsi, %rax
8; CHECK: movq %rcx, %rbx
9; CHECK: movq %r8, %rcx
10; CHECK: lock
11; CHECK: cmpxchg16b (%rdi)
12
13  %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire
14  %val = extractvalue { i128, i1 } %pair, 0
15  ret i128 %val
16}
17
18define void @fetch_and_nand(i128* %p, i128 %bits) {
19; CHECK-LABEL: fetch_and_nand:
20; CHECK-DAG:     movq %rdx, [[INCHI:%[a-z0-9]+]]
21; CHECK-DAG:     movq (%rdi), %rax
22; CHECK-DAG:     movq 8(%rdi), %rdx
23
24; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
25; CHECK:         movq %rdx, %rcx
26; CHECK:         andq [[INCHI]], %rcx
27; CHECK:         movq %rax, %rbx
28  ; INCLO equivalent comes in in %rsi, so it makes sense it stays there.
29; CHECK:         andq %rsi, %rbx
30; CHECK:         notq %rbx
31; CHECK:         notq %rcx
32; CHECK:         lock
33; CHECK:         cmpxchg16b (%rdi)
34; CHECK:         jne [[LOOP]]
35
36; CHECK:         movq %rax, _var
37; CHECK:         movq %rdx, _var+8
38  %val = atomicrmw nand i128* %p, i128 %bits release
39  store i128 %val, i128* @var, align 16
40  ret void
41}
42
43define void @fetch_and_or(i128* %p, i128 %bits) {
44; CHECK-LABEL: fetch_and_or:
45; CHECK-DAG:     movq %rdx, [[INCHI:%[a-z0-9]+]]
46; CHECK-DAG:     movq (%rdi), %rax
47; CHECK-DAG:     movq 8(%rdi), %rdx
48
49; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
50; CHECK:         movq %rax, %rbx
51  ; INCLO equivalent comes in in %rsi, so it makes sense it stays there.
52; CHECK:         orq %rsi, %rbx
53; CHECK:         movq %rdx, %rcx
54; CHECK:         orq [[INCHI]], %rcx
55; CHECK:         lock
56; CHECK:         cmpxchg16b (%rdi)
57; CHECK:         jne [[LOOP]]
58
59; CHECK:         movq %rax, _var
60; CHECK:         movq %rdx, _var+8
61
62  %val = atomicrmw or i128* %p, i128 %bits seq_cst
63  store i128 %val, i128* @var, align 16
64  ret void
65}
66
67define void @fetch_and_add(i128* %p, i128 %bits) {
68; CHECK-LABEL: fetch_and_add:
69; CHECK-DAG:     movq %rdx, [[INCHI:%[a-z0-9]+]]
70; CHECK-DAG:     movq (%rdi), %rax
71; CHECK-DAG:     movq 8(%rdi), %rdx
72
73; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
74; CHECK:         movq %rax, %rbx
75  ; INCLO equivalent comes in in %rsi, so it makes sense it stays there.
76; CHECK:         addq %rsi, %rbx
77; CHECK:         movq %rdx, %rcx
78; CHECK:         adcq [[INCHI]], %rcx
79; CHECK:         lock
80; CHECK:         cmpxchg16b (%rdi)
81; CHECK:         jne [[LOOP]]
82
83; CHECK:         movq %rax, _var
84; CHECK:         movq %rdx, _var+8
85
86  %val = atomicrmw add i128* %p, i128 %bits seq_cst
87  store i128 %val, i128* @var, align 16
88  ret void
89}
90
91define void @fetch_and_sub(i128* %p, i128 %bits) {
92; CHECK-LABEL: fetch_and_sub:
93; CHECK-DAG:     movq %rdx, [[INCHI:%[a-z0-9]+]]
94; CHECK-DAG:     movq (%rdi), %rax
95; CHECK-DAG:     movq 8(%rdi), %rdx
96
97; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
98; CHECK:         movq %rax, %rbx
99  ; INCLO equivalent comes in in %rsi, so it makes sense it stays there.
100; CHECK:         subq %rsi, %rbx
101; CHECK:         movq %rdx, %rcx
102; CHECK:         sbbq [[INCHI]], %rcx
103; CHECK:         lock
104; CHECK:         cmpxchg16b (%rdi)
105; CHECK:         jne [[LOOP]]
106
107; CHECK:         movq %rax, _var
108; CHECK:         movq %rdx, _var+8
109
110  %val = atomicrmw sub i128* %p, i128 %bits seq_cst
111  store i128 %val, i128* @var, align 16
112  ret void
113}
114
115define void @fetch_and_min(i128* %p, i128 %bits) {
116; CHECK-LABEL: fetch_and_min:
117; CHECK-DAG:     movq %rdx, [[INCHI:%[a-z0-9]+]]
118; CHECK-DAG:     movq (%rdi), %rax
119; CHECK-DAG:     movq 8(%rdi), %rdx
120
121; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
122; CHECK:         cmpq %rsi, %rax
123; CHECK:         setbe [[CMP:%[a-z0-9]+]]
124; CHECK:         cmpq [[INCHI]], %rdx
125; CHECK:         setle [[HICMP:%[a-z0-9]+]]
126; CHECK:         je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
127
128; CHECK:         movb [[HICMP]], [[CMP]]
129; CHECK: [[USE_LO]]:
130; CHECK:         testb [[CMP]], [[CMP]]
131; CHECK:         movq %rsi, %rbx
132; CHECK:         cmovneq %rax, %rbx
133; CHECK:         movq [[INCHI]], %rcx
134; CHECK:         cmovneq %rdx, %rcx
135; CHECK:         lock
136; CHECK:         cmpxchg16b (%rdi)
137; CHECK:         jne [[LOOP]]
138
139; CHECK:         movq %rax, _var
140; CHECK:         movq %rdx, _var+8
141
142  %val = atomicrmw min i128* %p, i128 %bits seq_cst
143  store i128 %val, i128* @var, align 16
144  ret void
145}
146
147define void @fetch_and_max(i128* %p, i128 %bits) {
148; CHECK-LABEL: fetch_and_max:
149; CHECK-DAG:     movq %rdx, [[INCHI:%[a-z0-9]+]]
150; CHECK-DAG:     movq (%rdi), %rax
151; CHECK-DAG:     movq 8(%rdi), %rdx
152
153; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
154; CHECK:         cmpq %rsi, %rax
155; CHECK:         setae [[CMP:%[a-z0-9]+]]
156; CHECK:         cmpq [[INCHI]], %rdx
157; CHECK:         setge [[HICMP:%[a-z0-9]+]]
158; CHECK:         je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
159
160; CHECK:         movb [[HICMP]], [[CMP]]
161; CHECK: [[USE_LO]]:
162; CHECK:         testb [[CMP]], [[CMP]]
163; CHECK:         movq %rsi, %rbx
164; CHECK:         cmovneq %rax, %rbx
165; CHECK:         movq [[INCHI]], %rcx
166; CHECK:         cmovneq %rdx, %rcx
167; CHECK:         lock
168; CHECK:         cmpxchg16b (%rdi)
169; CHECK:         jne [[LOOP]]
170
171; CHECK:         movq %rax, _var
172; CHECK:         movq %rdx, _var+8
173
174  %val = atomicrmw max i128* %p, i128 %bits seq_cst
175  store i128 %val, i128* @var, align 16
176  ret void
177}
178
179define void @fetch_and_umin(i128* %p, i128 %bits) {
180; CHECK-LABEL: fetch_and_umin:
181; CHECK-DAG:     movq %rdx, [[INCHI:%[a-z0-9]+]]
182; CHECK-DAG:     movq (%rdi), %rax
183; CHECK-DAG:     movq 8(%rdi), %rdx
184
185; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
186; CHECK:         cmpq %rsi, %rax
187; CHECK:         setbe [[CMP:%[a-z0-9]+]]
188; CHECK:         cmpq [[INCHI]], %rdx
189; CHECK:         setbe [[HICMP:%[a-z0-9]+]]
190; CHECK:         je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
191
192; CHECK:         movb [[HICMP]], [[CMP]]
193; CHECK: [[USE_LO]]:
194; CHECK:         testb [[CMP]], [[CMP]]
195; CHECK:         movq %rsi, %rbx
196; CHECK:         cmovneq %rax, %rbx
197; CHECK:         movq [[INCHI]], %rcx
198; CHECK:         cmovneq %rdx, %rcx
199; CHECK:         lock
200; CHECK:         cmpxchg16b (%rdi)
201; CHECK:         jne [[LOOP]]
202
203; CHECK:         movq %rax, _var
204; CHECK:         movq %rdx, _var+8
205
206  %val = atomicrmw umin i128* %p, i128 %bits seq_cst
207  store i128 %val, i128* @var, align 16
208  ret void
209}
210
211define void @fetch_and_umax(i128* %p, i128 %bits) {
212; CHECK-LABEL: fetch_and_umax:
213; CHECK-DAG:     movq %rdx, [[INCHI:%[a-z0-9]+]]
214; CHECK-DAG:     movq (%rdi), %rax
215; CHECK-DAG:     movq 8(%rdi), %rdx
216
217; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
218; CHECK:         cmpq %rax, %rsi
219; CHECK:         setb [[CMP:%[a-z0-9]+]]
220; CHECK:         cmpq [[INCHI]], %rdx
221; CHECK:         seta [[HICMP:%[a-z0-9]+]]
222; CHECK:         je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
223
224; CHECK:         movb [[HICMP]], [[CMP]]
225; CHECK: [[USE_LO]]:
226; CHECK:         testb [[CMP]], [[CMP]]
227; CHECK:         movq %rsi, %rbx
228; CHECK:         cmovneq %rax, %rbx
229; CHECK:         movq [[INCHI]], %rcx
230; CHECK:         cmovneq %rdx, %rcx
231; CHECK:         lock
232; CHECK:         cmpxchg16b (%rdi)
233; CHECK:         jne [[LOOP]]
234
235; CHECK:         movq %rax, _var
236; CHECK:         movq %rdx, _var+8
237
238  %val = atomicrmw umax i128* %p, i128 %bits seq_cst
239  store i128 %val, i128* @var, align 16
240  ret void
241}
242
243define i128 @atomic_load_seq_cst(i128* %p) {
244; CHECK-LABEL: atomic_load_seq_cst:
245; CHECK: xorl %eax, %eax
246; CHECK: xorl %edx, %edx
247; CHECK: xorl %ebx, %ebx
248; CHECK: xorl %ecx, %ecx
249; CHECK: lock
250; CHECK: cmpxchg16b (%rdi)
251
252   %r = load atomic i128* %p seq_cst, align 16
253   ret i128 %r
254}
255
256define i128 @atomic_load_relaxed(i128* %p) {
257; CHECK: atomic_load_relaxed:
258; CHECK: xorl %eax, %eax
259; CHECK: xorl %edx, %edx
260; CHECK: xorl %ebx, %ebx
261; CHECK: xorl %ecx, %ecx
262; CHECK: lock
263; CHECK: cmpxchg16b (%rdi)
264
265   %r = load atomic i128* %p monotonic, align 16
266   ret i128 %r
267}
268
269define void @atomic_store_seq_cst(i128* %p, i128 %in) {
270; CHECK-LABEL: atomic_store_seq_cst:
271; CHECK:         movq %rdx, %rcx
272; CHECK:         movq %rsi, %rbx
273; CHECK:         movq (%rdi), %rax
274; CHECK:         movq 8(%rdi), %rdx
275
276; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
277; CHECK:         lock
278; CHECK:         cmpxchg16b (%rdi)
279; CHECK:         jne [[LOOP]]
280; CHECK-NOT:     callq ___sync_lock_test_and_set_16
281
282   store atomic i128 %in, i128* %p seq_cst, align 16
283   ret void
284}
285
286define void @atomic_store_release(i128* %p, i128 %in) {
287; CHECK-LABEL: atomic_store_release:
288; CHECK:         movq %rdx, %rcx
289; CHECK:         movq %rsi, %rbx
290; CHECK:         movq (%rdi), %rax
291; CHECK:         movq 8(%rdi), %rdx
292
293; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
294; CHECK:         lock
295; CHECK:         cmpxchg16b (%rdi)
296; CHECK:         jne [[LOOP]]
297
298   store atomic i128 %in, i128* %p release, align 16
299   ret void
300}
301
302define void @atomic_store_relaxed(i128* %p, i128 %in) {
303; CHECK-LABEL: atomic_store_relaxed:
304; CHECK:         movq %rdx, %rcx
305; CHECK:         movq %rsi, %rbx
306; CHECK:         movq (%rdi), %rax
307; CHECK:         movq 8(%rdi), %rdx
308
309; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
310; CHECK:         lock
311; CHECK:         cmpxchg16b (%rdi)
312; CHECK:         jne [[LOOP]]
313
314   store atomic i128 %in, i128* %p unordered, align 16
315   ret void
316}
317