1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // REQUIRES: amdgpu-registered-target
3 // RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \
4 // RUN:   -triple=amdgcn-amd-amdhsa  | opt -S | FileCheck %s
5 
6 // CHECK-LABEL: @_Z29test_non_volatile_parameter32Pj(
7 // CHECK-NEXT:  entry:
8 // CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i32*, align 8, addrspace(5)
9 // CHECK-NEXT:    [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[PTR_ADDR]] to i32**
10 // CHECK-NEXT:    [[RES:%.*]] = alloca i32, align 4, addrspace(5)
11 // CHECK-NEXT:    [[RES_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[RES]] to i32*
12 // CHECK-NEXT:    store i32* [[PTR:%.*]], i32** [[PTR_ADDR_ASCAST]], align 8
13 // CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8
14 // CHECK-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8
15 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
16 // CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* [[TMP0]], i32 [[TMP2]], i32 7, i32 2, i1 false)
17 // CHECK-NEXT:    store i32 [[TMP3]], i32* [[RES_ASCAST]], align 4
18 // CHECK-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8
19 // CHECK-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8
20 // CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
21 // CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* [[TMP4]], i32 [[TMP6]], i32 7, i32 2, i1 false)
22 // CHECK-NEXT:    store i32 [[TMP7]], i32* [[RES_ASCAST]], align 4
23 // CHECK-NEXT:    ret void
24 //
test_non_volatile_parameter32(__UINT32_TYPE__ * ptr)25 __attribute__((device)) void test_non_volatile_parameter32(__UINT32_TYPE__ *ptr) {
26   __UINT32_TYPE__ res;
27   res = __builtin_amdgcn_atomic_inc32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
28 
29   res = __builtin_amdgcn_atomic_dec32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
30 }
31 
32 // CHECK-LABEL: @_Z29test_non_volatile_parameter64Py(
33 // CHECK-NEXT:  entry:
34 // CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i64*, align 8, addrspace(5)
35 // CHECK-NEXT:    [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[PTR_ADDR]] to i64**
36 // CHECK-NEXT:    [[RES:%.*]] = alloca i64, align 8, addrspace(5)
37 // CHECK-NEXT:    [[RES_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[RES]] to i64*
38 // CHECK-NEXT:    store i64* [[PTR:%.*]], i64** [[PTR_ADDR_ASCAST]], align 8
39 // CHECK-NEXT:    [[TMP0:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8
40 // CHECK-NEXT:    [[TMP1:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8
41 // CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8
42 // CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* [[TMP0]], i64 [[TMP2]], i32 7, i32 2, i1 false)
43 // CHECK-NEXT:    store i64 [[TMP3]], i64* [[RES_ASCAST]], align 8
44 // CHECK-NEXT:    [[TMP4:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8
45 // CHECK-NEXT:    [[TMP5:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8
46 // CHECK-NEXT:    [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
47 // CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* [[TMP4]], i64 [[TMP6]], i32 7, i32 2, i1 false)
48 // CHECK-NEXT:    store i64 [[TMP7]], i64* [[RES_ASCAST]], align 8
49 // CHECK-NEXT:    ret void
50 //
test_non_volatile_parameter64(__UINT64_TYPE__ * ptr)51 __attribute__((device)) void test_non_volatile_parameter64(__UINT64_TYPE__ *ptr) {
52   __UINT64_TYPE__ res;
53   res = __builtin_amdgcn_atomic_inc64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
54 
55   res = __builtin_amdgcn_atomic_dec64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
56 }
57 
58 // CHECK-LABEL: @_Z25test_volatile_parameter32PVj(
59 // CHECK-NEXT:  entry:
60 // CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i32*, align 8, addrspace(5)
61 // CHECK-NEXT:    [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[PTR_ADDR]] to i32**
62 // CHECK-NEXT:    [[RES:%.*]] = alloca i32, align 4, addrspace(5)
63 // CHECK-NEXT:    [[RES_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[RES]] to i32*
64 // CHECK-NEXT:    store i32* [[PTR:%.*]], i32** [[PTR_ADDR_ASCAST]], align 8
65 // CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8
66 // CHECK-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8
67 // CHECK-NEXT:    [[TMP2:%.*]] = load volatile i32, i32* [[TMP1]], align 4
68 // CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* [[TMP0]], i32 [[TMP2]], i32 7, i32 2, i1 true)
69 // CHECK-NEXT:    store i32 [[TMP3]], i32* [[RES_ASCAST]], align 4
70 // CHECK-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8
71 // CHECK-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8
72 // CHECK-NEXT:    [[TMP6:%.*]] = load volatile i32, i32* [[TMP5]], align 4
73 // CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* [[TMP4]], i32 [[TMP6]], i32 7, i32 2, i1 true)
74 // CHECK-NEXT:    store i32 [[TMP7]], i32* [[RES_ASCAST]], align 4
75 // CHECK-NEXT:    ret void
76 //
test_volatile_parameter32(volatile __UINT32_TYPE__ * ptr)77 __attribute__((device)) void test_volatile_parameter32(volatile __UINT32_TYPE__ *ptr) {
78   __UINT32_TYPE__ res;
79   res = __builtin_amdgcn_atomic_inc32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
80 
81   res = __builtin_amdgcn_atomic_dec32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
82 }
83 
84 // CHECK-LABEL: @_Z25test_volatile_parameter64PVy(
85 // CHECK-NEXT:  entry:
86 // CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i64*, align 8, addrspace(5)
87 // CHECK-NEXT:    [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[PTR_ADDR]] to i64**
88 // CHECK-NEXT:    [[RES:%.*]] = alloca i64, align 8, addrspace(5)
89 // CHECK-NEXT:    [[RES_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[RES]] to i64*
90 // CHECK-NEXT:    store i64* [[PTR:%.*]], i64** [[PTR_ADDR_ASCAST]], align 8
91 // CHECK-NEXT:    [[TMP0:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8
92 // CHECK-NEXT:    [[TMP1:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8
93 // CHECK-NEXT:    [[TMP2:%.*]] = load volatile i64, i64* [[TMP1]], align 8
94 // CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* [[TMP0]], i64 [[TMP2]], i32 7, i32 2, i1 true)
95 // CHECK-NEXT:    store i64 [[TMP3]], i64* [[RES_ASCAST]], align 8
96 // CHECK-NEXT:    [[TMP4:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8
97 // CHECK-NEXT:    [[TMP5:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8
98 // CHECK-NEXT:    [[TMP6:%.*]] = load volatile i64, i64* [[TMP5]], align 8
99 // CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* [[TMP4]], i64 [[TMP6]], i32 7, i32 2, i1 true)
100 // CHECK-NEXT:    store i64 [[TMP7]], i64* [[RES_ASCAST]], align 8
101 // CHECK-NEXT:    ret void
102 //
test_volatile_parameter64(volatile __UINT64_TYPE__ * ptr)103 __attribute__((device)) void test_volatile_parameter64(volatile __UINT64_TYPE__ *ptr) {
104   __UINT64_TYPE__ res;
105   res = __builtin_amdgcn_atomic_inc64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
106 
107   res = __builtin_amdgcn_atomic_dec64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
108 }
109 
110 // CHECK-LABEL: @_Z13test_shared32v(
111 // CHECK-NEXT:  entry:
112 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4
113 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), i32 [[TMP0]], i32 7, i32 2, i1 false)
114 // CHECK-NEXT:    store i32 [[TMP1]], i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4
115 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4
116 // CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), i32 [[TMP2]], i32 7, i32 2, i1 false)
117 // CHECK-NEXT:    store i32 [[TMP3]], i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4
118 // CHECK-NEXT:    ret void
119 //
test_shared32()120 __attribute__((device)) void test_shared32() {
121   __attribute__((shared)) __UINT32_TYPE__ val;
122 
123   val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST, "workgroup");
124 
125   val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "workgroup");
126 }
127 
128 // CHECK-LABEL: @_Z13test_shared64v(
129 // CHECK-NEXT:  entry:
130 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8
131 // CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), i64 [[TMP0]], i32 7, i32 2, i1 false)
132 // CHECK-NEXT:    store i64 [[TMP1]], i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8
133 // CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8
134 // CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), i64 [[TMP2]], i32 7, i32 2, i1 false)
135 // CHECK-NEXT:    store i64 [[TMP3]], i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8
136 // CHECK-NEXT:    ret void
137 //
test_shared64()138 __attribute__((device)) void test_shared64() {
139   __attribute__((shared)) __UINT64_TYPE__ val;
140 
141   val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST, "workgroup");
142 
143   val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "workgroup");
144 }
145 
146 __attribute__((device)) __UINT32_TYPE__ global_val32;
147 // CHECK-LABEL: @_Z13test_global32v(
148 // CHECK-NEXT:  entry:
149 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4
150 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), i32 [[TMP0]], i32 7, i32 2, i1 false)
151 // CHECK-NEXT:    store i32 [[TMP1]], i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4
152 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4
153 // CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), i32 [[TMP2]], i32 7, i32 2, i1 false)
154 // CHECK-NEXT:    store i32 [[TMP3]], i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4
155 // CHECK-NEXT:    ret void
156 //
test_global32()157 __attribute__((device)) void test_global32() {
158   global_val32 = __builtin_amdgcn_atomic_inc32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup");
159 
160   global_val32 = __builtin_amdgcn_atomic_dec32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup");
161 }
162 
163 __attribute__((device)) __UINT64_TYPE__ global_val64;
164 // CHECK-LABEL: @_Z13test_global64v(
165 // CHECK-NEXT:  entry:
166 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8
167 // CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), i64 [[TMP0]], i32 7, i32 2, i1 false)
168 // CHECK-NEXT:    store i64 [[TMP1]], i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8
169 // CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8
170 // CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), i64 [[TMP2]], i32 7, i32 2, i1 false)
171 // CHECK-NEXT:    store i64 [[TMP3]], i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8
172 // CHECK-NEXT:    ret void
173 //
test_global64()174 __attribute__((device)) void test_global64() {
175   global_val64 = __builtin_amdgcn_atomic_inc64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup");
176 
177   global_val64 = __builtin_amdgcn_atomic_dec64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup");
178 }
179 
180 __attribute__((constant)) __UINT32_TYPE__ cval32;
181 // CHECK-LABEL: @_Z15test_constant32v(
182 // CHECK-NEXT:  entry:
183 // CHECK-NEXT:    [[LOCAL_VAL:%.*]] = alloca i32, align 4, addrspace(5)
184 // CHECK-NEXT:    [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[LOCAL_VAL]] to i32*
185 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), align 4
186 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), i32 [[TMP0]], i32 7, i32 2, i1 false)
187 // CHECK-NEXT:    store i32 [[TMP1]], i32* [[LOCAL_VAL_ASCAST]], align 4
188 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), align 4
189 // CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), i32 [[TMP2]], i32 7, i32 2, i1 false)
190 // CHECK-NEXT:    store i32 [[TMP3]], i32* [[LOCAL_VAL_ASCAST]], align 4
191 // CHECK-NEXT:    ret void
192 //
test_constant32()193 __attribute__((device)) void test_constant32() {
194   __UINT32_TYPE__ local_val;
195 
196   local_val = __builtin_amdgcn_atomic_inc32(&cval32, cval32, __ATOMIC_SEQ_CST, "workgroup");
197 
198   local_val = __builtin_amdgcn_atomic_dec32(&cval32, cval32, __ATOMIC_SEQ_CST, "workgroup");
199 }
200 
201 __attribute__((constant)) __UINT64_TYPE__ cval64;
202 // CHECK-LABEL: @_Z15test_constant64v(
203 // CHECK-NEXT:  entry:
204 // CHECK-NEXT:    [[LOCAL_VAL:%.*]] = alloca i64, align 8, addrspace(5)
205 // CHECK-NEXT:    [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[LOCAL_VAL]] to i64*
206 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), align 8
207 // CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), i64 [[TMP0]], i32 7, i32 2, i1 false)
208 // CHECK-NEXT:    store i64 [[TMP1]], i64* [[LOCAL_VAL_ASCAST]], align 8
209 // CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), align 8
210 // CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), i64 [[TMP2]], i32 7, i32 2, i1 false)
211 // CHECK-NEXT:    store i64 [[TMP3]], i64* [[LOCAL_VAL_ASCAST]], align 8
212 // CHECK-NEXT:    ret void
213 //
test_constant64()214 __attribute__((device)) void test_constant64() {
215   __UINT64_TYPE__ local_val;
216 
217   local_val = __builtin_amdgcn_atomic_inc64(&cval64, cval64, __ATOMIC_SEQ_CST, "workgroup");
218 
219   local_val = __builtin_amdgcn_atomic_dec64(&cval64, cval64, __ATOMIC_SEQ_CST, "workgroup");
220 }
221 
222 // CHECK-LABEL: @_Z12test_order32v(
223 // CHECK-NEXT:  entry:
224 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4
225 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 [[TMP0]], i32 2, i32 2, i1 false)
226 // CHECK-NEXT:    store i32 [[TMP1]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4
227 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4
228 // CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 [[TMP2]], i32 4, i32 2, i1 false)
229 // CHECK-NEXT:    store i32 [[TMP3]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4
230 // CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4
231 // CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 [[TMP4]], i32 4, i32 2, i1 false)
232 // CHECK-NEXT:    store i32 [[TMP5]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4
233 // CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4
234 // CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 [[TMP6]], i32 5, i32 2, i1 false)
235 // CHECK-NEXT:    store i32 [[TMP7]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4
236 // CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4
237 // CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 [[TMP8]], i32 6, i32 2, i1 false)
238 // CHECK-NEXT:    store i32 [[TMP9]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4
239 // CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4
240 // CHECK-NEXT:    [[TMP11:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 [[TMP10]], i32 7, i32 2, i1 false)
241 // CHECK-NEXT:    store i32 [[TMP11]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4
242 // CHECK-NEXT:    ret void
243 //
test_order32()244 __attribute__((device)) void test_order32() {
245   __attribute__((shared)) __UINT32_TYPE__ val;
246 
247   val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_RELAXED, "workgroup");
248 
249   val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_CONSUME, "workgroup");
250 
251   val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE, "workgroup");
252 
253   val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_RELEASE, "workgroup");
254 
255   val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_ACQ_REL, "workgroup");
256 
257   val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "workgroup");
258 }
259 
260 // CHECK-LABEL: @_Z12test_order64v(
261 // CHECK-NEXT:  entry:
262 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8
263 // CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 [[TMP0]], i32 2, i32 2, i1 false)
264 // CHECK-NEXT:    store i64 [[TMP1]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8
265 // CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8
266 // CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 [[TMP2]], i32 4, i32 2, i1 false)
267 // CHECK-NEXT:    store i64 [[TMP3]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8
268 // CHECK-NEXT:    [[TMP4:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8
269 // CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 [[TMP4]], i32 4, i32 2, i1 false)
270 // CHECK-NEXT:    store i64 [[TMP5]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8
271 // CHECK-NEXT:    [[TMP6:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8
272 // CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 [[TMP6]], i32 5, i32 2, i1 false)
273 // CHECK-NEXT:    store i64 [[TMP7]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8
274 // CHECK-NEXT:    [[TMP8:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8
275 // CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 [[TMP8]], i32 6, i32 2, i1 false)
276 // CHECK-NEXT:    store i64 [[TMP9]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8
277 // CHECK-NEXT:    [[TMP10:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8
278 // CHECK-NEXT:    [[TMP11:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 [[TMP10]], i32 7, i32 2, i1 false)
279 // CHECK-NEXT:    store i64 [[TMP11]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8
280 // CHECK-NEXT:    ret void
281 //
test_order64()282 __attribute__((device)) void test_order64() {
283   __attribute__((shared)) __UINT64_TYPE__ val;
284 
285   val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_RELAXED, "workgroup");
286 
287   val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_CONSUME, "workgroup");
288 
289   val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE, "workgroup");
290 
291   val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_RELEASE, "workgroup");
292 
293   val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_ACQ_REL, "workgroup");
294 
295   val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "workgroup");
296 }
297 
298 // CHECK-LABEL: @_Z12test_scope32v(
299 // CHECK-NEXT:  entry:
300 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4
301 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 [[TMP0]], i32 7, i32 1, i1 false)
302 // CHECK-NEXT:    store i32 [[TMP1]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4
303 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4
304 // CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 [[TMP2]], i32 7, i32 2, i1 false)
305 // CHECK-NEXT:    store i32 [[TMP3]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4
306 // CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4
307 // CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 [[TMP4]], i32 7, i32 3, i1 false)
308 // CHECK-NEXT:    store i32 [[TMP5]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4
309 // CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4
310 // CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 [[TMP6]], i32 7, i32 4, i1 false)
311 // CHECK-NEXT:    store i32 [[TMP7]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4
312 // CHECK-NEXT:    ret void
313 //
test_scope32()314 __attribute__((device)) void test_scope32() {
315   __attribute__((shared)) __UINT32_TYPE__ val;
316 
317   val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST, "");
318 
319   val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "workgroup");
320 
321   val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "agent");
322 
323   val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "wavefront");
324 }
325 
326 // CHECK-LABEL: @_Z12test_scope64v(
327 // CHECK-NEXT:  entry:
328 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8
329 // CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 [[TMP0]], i32 7, i32 1, i1 false)
330 // CHECK-NEXT:    store i64 [[TMP1]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8
331 // CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8
332 // CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 [[TMP2]], i32 7, i32 2, i1 false)
333 // CHECK-NEXT:    store i64 [[TMP3]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8
334 // CHECK-NEXT:    [[TMP4:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8
335 // CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 [[TMP4]], i32 7, i32 3, i1 false)
336 // CHECK-NEXT:    store i64 [[TMP5]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8
337 // CHECK-NEXT:    [[TMP6:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8
338 // CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 [[TMP6]], i32 7, i32 4, i1 false)
339 // CHECK-NEXT:    store i64 [[TMP7]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8
340 // CHECK-NEXT:    ret void
341 //
test_scope64()342 __attribute__((device)) void test_scope64() {
343   __attribute__((shared)) __UINT64_TYPE__ val;
344 
345   val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST, "");
346 
347   val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "workgroup");
348 
349   val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "agent");
350 
351   val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "wavefront");
352 }
353