1; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3
4
5; FUNC-LABEL: {{^}}ngroups_x:
6; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
7; EG: MOV [[VAL]], KC0[0].X
8
9; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
10; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
11; SI: buffer_store_dword [[VVAL]]
12define void @ngroups_x (i32 addrspace(1)* %out) {
13entry:
14  %0 = call i32 @llvm.r600.read.ngroups.x() #0
15  store i32 %0, i32 addrspace(1)* %out
16  ret void
17}
18
19; FUNC-LABEL: {{^}}ngroups_y:
20; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
21; EG: MOV [[VAL]], KC0[0].Y
22
23; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
24; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
25; SI: buffer_store_dword [[VVAL]]
26define void @ngroups_y (i32 addrspace(1)* %out) {
27entry:
28  %0 = call i32 @llvm.r600.read.ngroups.y() #0
29  store i32 %0, i32 addrspace(1)* %out
30  ret void
31}
32
33; FUNC-LABEL: {{^}}ngroups_z:
34; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
35; EG: MOV [[VAL]], KC0[0].Z
36
37; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
38; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
39; SI: buffer_store_dword [[VVAL]]
40define void @ngroups_z (i32 addrspace(1)* %out) {
41entry:
42  %0 = call i32 @llvm.r600.read.ngroups.z() #0
43  store i32 %0, i32 addrspace(1)* %out
44  ret void
45}
46
47; FUNC-LABEL: {{^}}global_size_x:
48; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
49; EG: MOV [[VAL]], KC0[0].W
50
51; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
52; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
53; SI: buffer_store_dword [[VVAL]]
54define void @global_size_x (i32 addrspace(1)* %out) {
55entry:
56  %0 = call i32 @llvm.r600.read.global.size.x() #0
57  store i32 %0, i32 addrspace(1)* %out
58  ret void
59}
60
61; FUNC-LABEL: {{^}}global_size_y:
62; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
63; EG: MOV [[VAL]], KC0[1].X
64
65; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
66; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
67; SI: buffer_store_dword [[VVAL]]
68define void @global_size_y (i32 addrspace(1)* %out) {
69entry:
70  %0 = call i32 @llvm.r600.read.global.size.y() #0
71  store i32 %0, i32 addrspace(1)* %out
72  ret void
73}
74
75; FUNC-LABEL: {{^}}global_size_z:
76; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
77; EG: MOV [[VAL]], KC0[1].Y
78
79; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
80; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
81; SI: buffer_store_dword [[VVAL]]
82define void @global_size_z (i32 addrspace(1)* %out) {
83entry:
84  %0 = call i32 @llvm.r600.read.global.size.z() #0
85  store i32 %0, i32 addrspace(1)* %out
86  ret void
87}
88
89; FUNC-LABEL: {{^}}local_size_x:
90; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
91; EG: MOV [[VAL]], KC0[1].Z
92
93; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
94; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
95; SI: buffer_store_dword [[VVAL]]
96define void @local_size_x (i32 addrspace(1)* %out) {
97entry:
98  %0 = call i32 @llvm.r600.read.local.size.x() #0
99  store i32 %0, i32 addrspace(1)* %out
100  ret void
101}
102
103; FUNC-LABEL: {{^}}local_size_y:
104; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
105; EG: MOV [[VAL]], KC0[1].W
106
107; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
108; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
109; SI: buffer_store_dword [[VVAL]]
110define void @local_size_y (i32 addrspace(1)* %out) {
111entry:
112  %0 = call i32 @llvm.r600.read.local.size.y() #0
113  store i32 %0, i32 addrspace(1)* %out
114  ret void
115}
116
117; FUNC-LABEL: {{^}}local_size_z:
118; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
119; EG: MOV [[VAL]], KC0[2].X
120
121; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
122; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
123; SI: buffer_store_dword [[VVAL]]
124define void @local_size_z (i32 addrspace(1)* %out) {
125entry:
126  %0 = call i32 @llvm.r600.read.local.size.z() #0
127  store i32 %0, i32 addrspace(1)* %out
128  ret void
129}
130
131; FUNC-LABEL: {{^}}get_work_dim:
132; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
133; EG: MOV [[VAL]], KC0[2].Z
134
135; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
136; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
137; SI: buffer_store_dword [[VVAL]]
138define void @get_work_dim (i32 addrspace(1)* %out) {
139entry:
140  %0 = call i32 @llvm.AMDGPU.read.workdim() #0
141  store i32 %0, i32 addrspace(1)* %out
142  ret void
143}
144
145; The tgid values are stored in sgprs offset by the number of user sgprs.
146; Currently we always use exactly 2 user sgprs for the pointer to the
147; kernel arguments, but this may change in the future.
148
149; FUNC-LABEL: {{^}}tgid_x:
150; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4
151; SI: buffer_store_dword [[VVAL]]
152define void @tgid_x (i32 addrspace(1)* %out) {
153entry:
154  %0 = call i32 @llvm.r600.read.tgid.x() #0
155  store i32 %0, i32 addrspace(1)* %out
156  ret void
157}
158
159; FUNC-LABEL: {{^}}tgid_y:
160; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s5
161; SI: buffer_store_dword [[VVAL]]
162define void @tgid_y (i32 addrspace(1)* %out) {
163entry:
164  %0 = call i32 @llvm.r600.read.tgid.y() #0
165  store i32 %0, i32 addrspace(1)* %out
166  ret void
167}
168
169; FUNC-LABEL: {{^}}tgid_z:
170; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6
171; SI: buffer_store_dword [[VVAL]]
172define void @tgid_z (i32 addrspace(1)* %out) {
173entry:
174  %0 = call i32 @llvm.r600.read.tgid.z() #0
175  store i32 %0, i32 addrspace(1)* %out
176  ret void
177}
178
179; FUNC-LABEL: {{^}}tidig_x:
180; SI: buffer_store_dword v0
181define void @tidig_x (i32 addrspace(1)* %out) {
182entry:
183  %0 = call i32 @llvm.r600.read.tidig.x() #0
184  store i32 %0, i32 addrspace(1)* %out
185  ret void
186}
187
188; FUNC-LABEL: {{^}}tidig_y:
189; SI: buffer_store_dword v1
190define void @tidig_y (i32 addrspace(1)* %out) {
191entry:
192  %0 = call i32 @llvm.r600.read.tidig.y() #0
193  store i32 %0, i32 addrspace(1)* %out
194  ret void
195}
196
197; FUNC-LABEL: {{^}}tidig_z:
198; SI: buffer_store_dword v2
199define void @tidig_z (i32 addrspace(1)* %out) {
200entry:
201  %0 = call i32 @llvm.r600.read.tidig.z() #0
202  store i32 %0, i32 addrspace(1)* %out
203  ret void
204}
205
206declare i32 @llvm.r600.read.ngroups.x() #0
207declare i32 @llvm.r600.read.ngroups.y() #0
208declare i32 @llvm.r600.read.ngroups.z() #0
209
210declare i32 @llvm.r600.read.global.size.x() #0
211declare i32 @llvm.r600.read.global.size.y() #0
212declare i32 @llvm.r600.read.global.size.z() #0
213
214declare i32 @llvm.r600.read.local.size.x() #0
215declare i32 @llvm.r600.read.local.size.y() #0
216declare i32 @llvm.r600.read.local.size.z() #0
217
218declare i32 @llvm.r600.read.tgid.x() #0
219declare i32 @llvm.r600.read.tgid.y() #0
220declare i32 @llvm.r600.read.tgid.z() #0
221
222declare i32 @llvm.r600.read.tidig.x() #0
223declare i32 @llvm.r600.read.tidig.y() #0
224declare i32 @llvm.r600.read.tidig.z() #0
225
226declare i32 @llvm.AMDGPU.read.workdim() #0
227
228attributes #0 = { readnone }
229