1; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
2
3
4; CHECK-LABEL: atom0
5define i32 @atom0(i32* %addr, i32 %val) {
6; CHECK: atom.add.u32
7  %ret = atomicrmw add i32* %addr, i32 %val seq_cst
8  ret i32 %ret
9}
10
11; CHECK-LABEL: atom1
12define i64 @atom1(i64* %addr, i64 %val) {
13; CHECK: atom.add.u64
14  %ret = atomicrmw add i64* %addr, i64 %val seq_cst
15  ret i64 %ret
16}
17
18; CHECK-LABEL: atom2
19define i32 @atom2(i32* %subr, i32 %val) {
20; CHECK: neg.s32
21; CHECK: atom.add.u32
22  %ret = atomicrmw sub i32* %subr, i32 %val seq_cst
23  ret i32 %ret
24}
25
26; CHECK-LABEL: atom3
27define i64 @atom3(i64* %subr, i64 %val) {
28; CHECK: neg.s64
29; CHECK: atom.add.u64
30  %ret = atomicrmw sub i64* %subr, i64 %val seq_cst
31  ret i64 %ret
32}
33
34; CHECK-LABEL: atom4
35define i32 @atom4(i32* %subr, i32 %val) {
36; CHECK: atom.and.b32
37  %ret = atomicrmw and i32* %subr, i32 %val seq_cst
38  ret i32 %ret
39}
40
41; CHECK-LABEL: atom5
42define i64 @atom5(i64* %subr, i64 %val) {
43; CHECK: atom.and.b64
44  %ret = atomicrmw and i64* %subr, i64 %val seq_cst
45  ret i64 %ret
46}
47
48;; NAND not yet supported
49;define i32 @atom6(i32* %subr, i32 %val) {
50;  %ret = atomicrmw nand i32* %subr, i32 %val seq_cst
51;  ret i32 %ret
52;}
53
54;define i64 @atom7(i64* %subr, i64 %val) {
55;  %ret = atomicrmw nand i64* %subr, i64 %val seq_cst
56;  ret i64 %ret
57;}
58
59; CHECK-LABEL: atom8
60define i32 @atom8(i32* %subr, i32 %val) {
61; CHECK: atom.or.b32
62  %ret = atomicrmw or i32* %subr, i32 %val seq_cst
63  ret i32 %ret
64}
65
66; CHECK-LABEL: atom9
67define i64 @atom9(i64* %subr, i64 %val) {
68; CHECK: atom.or.b64
69  %ret = atomicrmw or i64* %subr, i64 %val seq_cst
70  ret i64 %ret
71}
72
73; CHECK-LABEL: atom10
74define i32 @atom10(i32* %subr, i32 %val) {
75; CHECK: atom.xor.b32
76  %ret = atomicrmw xor i32* %subr, i32 %val seq_cst
77  ret i32 %ret
78}
79
80; CHECK-LABEL: atom11
81define i64 @atom11(i64* %subr, i64 %val) {
82; CHECK: atom.xor.b64
83  %ret = atomicrmw xor i64* %subr, i64 %val seq_cst
84  ret i64 %ret
85}
86
87; CHECK-LABEL: atom12
88define i32 @atom12(i32* %subr, i32 %val) {
89; CHECK: atom.max.s32
90  %ret = atomicrmw max i32* %subr, i32 %val seq_cst
91  ret i32 %ret
92}
93
94; CHECK-LABEL: atom13
95define i64 @atom13(i64* %subr, i64 %val) {
96; CHECK: atom.max.s64
97  %ret = atomicrmw max i64* %subr, i64 %val seq_cst
98  ret i64 %ret
99}
100
101; CHECK-LABEL: atom14
102define i32 @atom14(i32* %subr, i32 %val) {
103; CHECK: atom.min.s32
104  %ret = atomicrmw min i32* %subr, i32 %val seq_cst
105  ret i32 %ret
106}
107
108; CHECK-LABEL: atom15
109define i64 @atom15(i64* %subr, i64 %val) {
110; CHECK: atom.min.s64
111  %ret = atomicrmw min i64* %subr, i64 %val seq_cst
112  ret i64 %ret
113}
114
115; CHECK-LABEL: atom16
116define i32 @atom16(i32* %subr, i32 %val) {
117; CHECK: atom.max.u32
118  %ret = atomicrmw umax i32* %subr, i32 %val seq_cst
119  ret i32 %ret
120}
121
122; CHECK-LABEL: atom17
123define i64 @atom17(i64* %subr, i64 %val) {
124; CHECK: atom.max.u64
125  %ret = atomicrmw umax i64* %subr, i64 %val seq_cst
126  ret i64 %ret
127}
128
129; CHECK-LABEL: atom18
130define i32 @atom18(i32* %subr, i32 %val) {
131; CHECK: atom.min.u32
132  %ret = atomicrmw umin i32* %subr, i32 %val seq_cst
133  ret i32 %ret
134}
135
136; CHECK-LABEL: atom19
137define i64 @atom19(i64* %subr, i64 %val) {
138; CHECK: atom.min.u64
139  %ret = atomicrmw umin i64* %subr, i64 %val seq_cst
140  ret i64 %ret
141}
142
143declare float @llvm.nvvm.atomic.load.add.f32.p0f32(float* %addr, float %val)
144
145; CHECK-LABEL: atomic_add_f32_generic
146define float @atomic_add_f32_generic(float* %addr, float %val) {
147; CHECK: atom.add.f32
148  %ret = call float @llvm.nvvm.atomic.load.add.f32.p0f32(float* %addr, float %val)
149  ret float %ret
150}
151
152declare float @llvm.nvvm.atomic.load.add.f32.p1f32(float addrspace(1)* %addr, float %val)
153
154; CHECK-LABEL: atomic_add_f32_addrspace1
155define float @atomic_add_f32_addrspace1(float addrspace(1)* %addr, float %val) {
156; CHECK: atom.global.add.f32
157  %ret = call float @llvm.nvvm.atomic.load.add.f32.p1f32(float addrspace(1)* %addr, float %val)
158  ret float %ret
159}
160
161declare float @llvm.nvvm.atomic.load.add.f32.p3f32(float addrspace(3)* %addr, float %val)
162
163; CHECK-LABEL: atomic_add_f32_addrspace3
164define float @atomic_add_f32_addrspace3(float addrspace(3)* %addr, float %val) {
165; CHECK: atom.shared.add.f32
166  %ret = call float @llvm.nvvm.atomic.load.add.f32.p3f32(float addrspace(3)* %addr, float %val)
167  ret float %ret
168}
169
170; CHECK-LABEL: atomic_cmpxchg_i32
171define i32 @atomic_cmpxchg_i32(i32* %addr, i32 %cmp, i32 %new) {
172; CHECK: atom.cas.b32
173  %pairold = cmpxchg i32* %addr, i32 %cmp, i32 %new seq_cst seq_cst
174  ret i32 %new
175}
176
177; CHECK-LABEL: atomic_cmpxchg_i64
178define i64 @atomic_cmpxchg_i64(i64* %addr, i64 %cmp, i64 %new) {
179; CHECK: atom.cas.b64
180  %pairold = cmpxchg i64* %addr, i64 %cmp, i64 %new seq_cst seq_cst
181  ret i64 %new
182}
183