1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX7,UNPACKED-TID %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-xnack -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A,PACKED-TID %s
3
4; GCN-LABEL: {{^}}use_workitem_id_x:
5; GCN: s_waitcnt
6; GCN: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v0
7; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
8; GCN-NEXT: s_waitcnt
9; GCN-NEXT: s_setpc_b64
10define void @use_workitem_id_x() #1 {
11  %val = call i32 @llvm.amdgcn.workitem.id.x()
12  store volatile i32 %val, i32 addrspace(1)* undef
13  ret void
14}
15
16; GCN-LABEL: {{^}}use_workitem_id_y:
17; GCN: s_waitcnt
18; GCN: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10
19; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
20; GCN-NEXT: s_waitcnt
21; GCN-NEXT: s_setpc_b64
22define void @use_workitem_id_y() #1 {
23  %val = call i32 @llvm.amdgcn.workitem.id.y()
24  store volatile i32 %val, i32 addrspace(1)* undef
25  ret void
26}
27
28; GCN-LABEL: {{^}}use_workitem_id_z:
29; GCN: s_waitcnt
30; GCN: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10
31; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
32; GCN-NEXT: s_waitcnt
33; GCN-NEXT: s_setpc_b64
34define void @use_workitem_id_z() #1 {
35  %val = call i32 @llvm.amdgcn.workitem.id.z()
36  store volatile i32 %val, i32 addrspace(1)* undef
37  ret void
38}
39
40; GCN-LABEL: {{^}}use_workitem_id_xy:
41; GCN: s_waitcnt
42; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v0
43; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v0, 10, 10
44; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
45; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
46; GCN-NEXT: s_waitcnt
47; GCN-NEXT: s_setpc_b64
48define void @use_workitem_id_xy() #1 {
49  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
50  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
51  store volatile i32 %val0, i32 addrspace(1)* undef
52  store volatile i32 %val1, i32 addrspace(1)* undef
53  ret void
54}
55
56; GCN-LABEL: {{^}}use_workitem_id_xyz:
57; GCN: s_waitcnt
58; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v0
59; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v0, 10, 10
60; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v0, 20, 10
61; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
62; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
63; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
64; GCN-NEXT: s_waitcnt
65; GCN-NEXT: s_setpc_b64
66define void @use_workitem_id_xyz() #1 {
67  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
68  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
69  %val2 = call i32 @llvm.amdgcn.workitem.id.z()
70  store volatile i32 %val0, i32 addrspace(1)* undef
71  store volatile i32 %val1, i32 addrspace(1)* undef
72  store volatile i32 %val2, i32 addrspace(1)* undef
73  ret void
74}
75
76; GCN-LABEL: {{^}}use_workitem_id_xz:
77; GCN: s_waitcnt
78; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v0
79; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v0, 20, 10
80; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]]
81; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
82; GCN-NEXT: s_waitcnt
83; GCN-NEXT: s_setpc_b64
84define void @use_workitem_id_xz() #1 {
85  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
86  %val1 = call i32 @llvm.amdgcn.workitem.id.z()
87  store volatile i32 %val0, i32 addrspace(1)* undef
88  store volatile i32 %val1, i32 addrspace(1)* undef
89  ret void
90}
91
92; GCN-LABEL: {{^}}use_workitem_id_yz:
93; GCN: s_waitcnt
94; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v0, 10, 10
95; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v0, 20, 10
96; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
97; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
98; GCN-NEXT: s_waitcnt
99; GCN-NEXT: s_setpc_b64
100define void @use_workitem_id_yz() #1 {
101  %val0 = call i32 @llvm.amdgcn.workitem.id.y()
102  %val1 = call i32 @llvm.amdgcn.workitem.id.z()
103  store volatile i32 %val0, i32 addrspace(1)* undef
104  store volatile i32 %val1, i32 addrspace(1)* undef
105  ret void
106}
107
108; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_x:
109
110; GCN-NOT: v0
111; GCN: s_swappc_b64
112; GCN-NOT: v0
113
114; GCN: .amdhsa_system_vgpr_workitem_id 0
115define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
116  call void @use_workitem_id_x()
117  ret void
118}
119
120; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y:
121
122; GCN-NOT: v0
123; GCN-NOT: v1
124; UNPACKED-TID: v_lshlrev_b32_e32 v0, 10, v1
125; UNPACKED-TID-NOT: v0
126; UNPACKED-TID-NOT: v1
127; GCN: s_swappc_b64
128
129; GCN: .amdhsa_system_vgpr_workitem_id 1
130define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
131  call void @use_workitem_id_y()
132  ret void
133}
134
135; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z:
136
137; GCN-NOT: v0
138; GCN-NOT: v2
139; UNPACKED-TID: v_lshlrev_b32_e32 v0, 20, v2
140; UNPACKED-TID-NOT: v0
141; UNPACKED-TID-NOT: v1
142; GCN: s_swappc_b64
143
144; GCN: .amdhsa_system_vgpr_workitem_id 2
145define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
146  call void @use_workitem_id_z()
147  ret void
148}
149
150; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xy:
151; UNPACKED-TID-NOT: v0
152; UNPACKED-TID-NOT: v1
153; UNPACKED-TID: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
154; UNPACKED-TID: v_or_b32_e32 v0, v0, [[IDY]]
155; GCN-NOT: v0
156; GCN-NOT: v1
157; GCN: s_swappc_b64
158define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 {
159  call void @use_workitem_id_xy()
160  ret void
161}
162
163; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xz:
164; UNPACKED-TID-NOT: v0
165; UNPACKED-TID-NOT: v2
166; UNPACKED-TID: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
167; UNPACKED-TID: v_or_b32_e32 v0, v0, [[IDZ]]
168; GCN-NOT: v0
169; GCN-NOT: v2
170; GCN: s_swappc_b64
171define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 {
172  call void @use_workitem_id_xz()
173  ret void
174}
175
176; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_yz:
177; UNPACKED-TID-NOT: v1
178; UNPACKED-TID-NOT: v2
179; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
180; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
181; UNPACKED-TID: v_or_b32_e32 v0, [[IDY]], [[IDZ]]
182; GCN-NOT: v1
183; GCN-NOT: v2
184; GCN: s_swappc_b64
185define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 {
186  call void @use_workitem_id_yz()
187  ret void
188}
189
190; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xyz:
191; UNPACKED-TID-NOT: v0
192; UNPACKED-TID-NOT: v1
193; UNPACKED-TID-NOT: v2
194; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1
195; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2
196; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, [[IDY]]
197; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, [[IDZ]]
198; GCN-NOT: v0
199; GCN-NOT: v1
200; GCN-NOT: v2
201; GCN: s_swappc_b64
202define amdgpu_kernel void @kern_indirect_use_workitem_id_xyz() #1 {
203  call void @use_workitem_id_xyz()
204  ret void
205}
206
207; GCN-LABEL: {{^}}func_indirect_use_workitem_id_x:
208; GCN-NOT: v0
209; GCN: s_swappc_b64
210; GCN-NOT: v0
211define void @func_indirect_use_workitem_id_x() #1 {
212  call void @use_workitem_id_x()
213  ret void
214}
215
216; GCN-LABEL: {{^}}func_indirect_use_workitem_id_y:
217; GCN-NOT: v0
218; GCN: s_swappc_b64
219; GCN-NOT: v0
220define void @func_indirect_use_workitem_id_y() #1 {
221  call void @use_workitem_id_y()
222  ret void
223}
224
225; GCN-LABEL: {{^}}func_indirect_use_workitem_id_z:
226; GCN-NOT: v0
227; GCN: s_swappc_b64
228; GCN-NOT: v0
229define void @func_indirect_use_workitem_id_z() #1 {
230  call void @use_workitem_id_z()
231  ret void
232}
233
234; GCN-LABEL: {{^}}other_arg_use_workitem_id_x:
235; GCN: s_waitcnt
236; GCN-DAG: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v1
237; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
238; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
239define void @other_arg_use_workitem_id_x(i32 %arg0) #1 {
240  %val = call i32 @llvm.amdgcn.workitem.id.x()
241  store volatile i32 %arg0, i32 addrspace(1)* undef
242  store volatile i32 %val, i32 addrspace(1)* undef
243  ret void
244}
245
246; GCN-LABEL: {{^}}other_arg_use_workitem_id_y:
247; GCN: s_waitcnt
248; GCN-DAG: v_bfe_u32 [[ID:v[0-9]+]], v1, 10, 10
249; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
250; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
251define void @other_arg_use_workitem_id_y(i32 %arg0) #1 {
252  %val = call i32 @llvm.amdgcn.workitem.id.y()
253  store volatile i32 %arg0, i32 addrspace(1)* undef
254  store volatile i32 %val, i32 addrspace(1)* undef
255  ret void
256}
257
258; GCN-LABEL: {{^}}other_arg_use_workitem_id_z:
259; GCN: s_waitcnt
260; GCN-DAG: v_bfe_u32 [[ID:v[0-9]+]], v1, 20, 10
261; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
262; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
263define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {
264  %val = call i32 @llvm.amdgcn.workitem.id.z()
265  store volatile i32 %arg0, i32 addrspace(1)* undef
266  store volatile i32 %val, i32 addrspace(1)* undef
267  ret void
268}
269
270
271; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_x:
272
273; GCN: v_mov_b32_e32 v1, v0
274; GCN: v_mov_b32_e32 v0, 0x22b
275; GCN: s_swappc_b64
276
277; GCN: .amdhsa_system_vgpr_workitem_id 0
278define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
279  call void @other_arg_use_workitem_id_x(i32 555)
280  ret void
281}
282
283
284; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_y:
285
286; UNPACKED-TID: v_lshlrev_b32_e32 v1, 10, v1
287; PACKED-TID:   v_mov_b32_e32 v1, v0
288; GCN-NOT: v1
289; GCN: v_mov_b32_e32 v0, 0x22b
290; GCN-NOT: v1
291; GCN: s_swappc_b64
292; GCN-NOT: v0
293
294; GCN: .amdhsa_system_vgpr_workitem_id 1
295define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
296  call void @other_arg_use_workitem_id_y(i32 555)
297  ret void
298}
299
300; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z:
301
302; GCN-DAG: v_mov_b32_e32 v0, 0x22b
303; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v1, 20, v2
304; PACKED-TID-DAG: v_mov_b32_e32 v1, v0
305; GCN: s_swappc_b64
306; GCN-NOT: v0
307
308; GCN: .amdhsa_system_vgpr_workitem_id 2
309define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 {
310  call void @other_arg_use_workitem_id_z(i32 555)
311  ret void
312}
313
314; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x:
315; GCN: buffer_load_dword v32, off, s[0:3], s32{{$}}
316; GCN: v_and_b32_e32 v32, 0x3ff, v32
317; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
318; GCN: s_setpc_b64
319define void @too_many_args_use_workitem_id_x(
320  i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
321  i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
322  i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
323  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
324  %val = call i32 @llvm.amdgcn.workitem.id.x()
325  store volatile i32 %val, i32 addrspace(1)* undef
326
327  store volatile i32 %arg0, i32 addrspace(1)* undef
328  store volatile i32 %arg1, i32 addrspace(1)* undef
329  store volatile i32 %arg2, i32 addrspace(1)* undef
330  store volatile i32 %arg3, i32 addrspace(1)* undef
331  store volatile i32 %arg4, i32 addrspace(1)* undef
332  store volatile i32 %arg5, i32 addrspace(1)* undef
333  store volatile i32 %arg6, i32 addrspace(1)* undef
334  store volatile i32 %arg7, i32 addrspace(1)* undef
335
336  store volatile i32 %arg8, i32 addrspace(1)* undef
337  store volatile i32 %arg9, i32 addrspace(1)* undef
338  store volatile i32 %arg10, i32 addrspace(1)* undef
339  store volatile i32 %arg11, i32 addrspace(1)* undef
340  store volatile i32 %arg12, i32 addrspace(1)* undef
341  store volatile i32 %arg13, i32 addrspace(1)* undef
342  store volatile i32 %arg14, i32 addrspace(1)* undef
343  store volatile i32 %arg15, i32 addrspace(1)* undef
344
345  store volatile i32 %arg16, i32 addrspace(1)* undef
346  store volatile i32 %arg17, i32 addrspace(1)* undef
347  store volatile i32 %arg18, i32 addrspace(1)* undef
348  store volatile i32 %arg19, i32 addrspace(1)* undef
349  store volatile i32 %arg20, i32 addrspace(1)* undef
350  store volatile i32 %arg21, i32 addrspace(1)* undef
351  store volatile i32 %arg22, i32 addrspace(1)* undef
352  store volatile i32 %arg23, i32 addrspace(1)* undef
353
354  store volatile i32 %arg24, i32 addrspace(1)* undef
355  store volatile i32 %arg25, i32 addrspace(1)* undef
356  store volatile i32 %arg26, i32 addrspace(1)* undef
357  store volatile i32 %arg27, i32 addrspace(1)* undef
358  store volatile i32 %arg28, i32 addrspace(1)* undef
359  store volatile i32 %arg29, i32 addrspace(1)* undef
360  store volatile i32 %arg30, i32 addrspace(1)* undef
361  store volatile i32 %arg31, i32 addrspace(1)* undef
362
363  ret void
364}
365
366; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x:
367
368; GCN: s_mov_b32 s32, 0
369; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}}
370; GCN: s_swappc_b64
371
372; GCN: .amdhsa_system_vgpr_workitem_id 0
373define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
374  call void @too_many_args_use_workitem_id_x(
375    i32 10, i32 20, i32 30, i32 40,
376    i32 50, i32 60, i32 70, i32 80,
377    i32 90, i32 100, i32 110, i32 120,
378    i32 130, i32 140, i32 150, i32 160,
379    i32 170, i32 180, i32 190, i32 200,
380    i32 210, i32 220, i32 230, i32 240,
381    i32 250, i32 260, i32 270, i32 280,
382    i32 290, i32 300, i32 310, i32 320)
383  ret void
384}
385
386; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x:
387; GCN: s_mov_b32 s33, s32
388; GCN: buffer_store_dword v1, off, s[0:3], s32{{$}}
389; GCN: s_swappc_b64
390define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
391  store volatile i32 %arg0, i32 addrspace(1)* undef
392  call void @too_many_args_use_workitem_id_x(
393    i32 10, i32 20, i32 30, i32 40,
394    i32 50, i32 60, i32 70, i32 80,
395    i32 90, i32 100, i32 110, i32 120,
396    i32 130, i32 140, i32 150, i32 160,
397    i32 170, i32 180, i32 190, i32 200,
398    i32 210, i32 220, i32 230, i32 240,
399    i32 250, i32 260, i32 270, i32 280,
400    i32 290, i32 300, i32 310, i32 320)
401  ret void
402}
403
404; Requires loading and storing to stack slot.
405; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x:
406; GCN-DAG: s_addk_i32 s32, 0x400{{$}}
407; GCN-DAG: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
408; GCN-DAG: buffer_load_dword v32, off, s[0:3], s33{{$}}
409
410; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}}
411
412; GCN: s_swappc_b64
413
414; GCN: s_addk_i32 s32, 0xfc00{{$}}
415; GCN: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
416; GCN: s_setpc_b64
417define void @too_many_args_call_too_many_args_use_workitem_id_x(
418  i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
419  i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
420  i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
421  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
422  call void @too_many_args_use_workitem_id_x(
423    i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
424    i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
425    i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
426    i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31)
427  ret void
428}
429
430; stack layout:
431; frame[0] = byval arg32
432; frame[1] = stack passed workitem ID x
433; frame[2] = VGPR spill slot
434
435; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_byval:
436; GFX7: buffer_load_dword v32, off, s[0:3], s32 offset:4
437; GFX90A: buffer_load_dword v32, off, s[0:3], s32 offset:4
438; GCN-DAG: s_waitcnt
439; GFX7: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
440; GFX90A: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v32,
441; GFX7: buffer_load_dword v0, off, s[0:3], s32 glc{{$}}
442; GFX90A: buffer_load_dword v0, off, s[0:3], s32 glc{{$}}
443; GCN: s_setpc_b64
444define void @too_many_args_use_workitem_id_x_byval(
445  i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
446  i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
447  i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
448  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, i32 addrspace(5)* byval(i32) %arg32) #1 {
449  %val = call i32 @llvm.amdgcn.workitem.id.x()
450  store volatile i32 %val, i32 addrspace(1)* undef
451
452  store volatile i32 %arg0, i32 addrspace(1)* undef
453  store volatile i32 %arg1, i32 addrspace(1)* undef
454  store volatile i32 %arg2, i32 addrspace(1)* undef
455  store volatile i32 %arg3, i32 addrspace(1)* undef
456  store volatile i32 %arg4, i32 addrspace(1)* undef
457  store volatile i32 %arg5, i32 addrspace(1)* undef
458  store volatile i32 %arg6, i32 addrspace(1)* undef
459  store volatile i32 %arg7, i32 addrspace(1)* undef
460
461  store volatile i32 %arg8, i32 addrspace(1)* undef
462  store volatile i32 %arg9, i32 addrspace(1)* undef
463  store volatile i32 %arg10, i32 addrspace(1)* undef
464  store volatile i32 %arg11, i32 addrspace(1)* undef
465  store volatile i32 %arg12, i32 addrspace(1)* undef
466  store volatile i32 %arg13, i32 addrspace(1)* undef
467  store volatile i32 %arg14, i32 addrspace(1)* undef
468  store volatile i32 %arg15, i32 addrspace(1)* undef
469
470  store volatile i32 %arg16, i32 addrspace(1)* undef
471  store volatile i32 %arg17, i32 addrspace(1)* undef
472  store volatile i32 %arg18, i32 addrspace(1)* undef
473  store volatile i32 %arg19, i32 addrspace(1)* undef
474  store volatile i32 %arg20, i32 addrspace(1)* undef
475  store volatile i32 %arg21, i32 addrspace(1)* undef
476  store volatile i32 %arg22, i32 addrspace(1)* undef
477  store volatile i32 %arg23, i32 addrspace(1)* undef
478
479  store volatile i32 %arg24, i32 addrspace(1)* undef
480  store volatile i32 %arg25, i32 addrspace(1)* undef
481  store volatile i32 %arg26, i32 addrspace(1)* undef
482  store volatile i32 %arg27, i32 addrspace(1)* undef
483  store volatile i32 %arg28, i32 addrspace(1)* undef
484  store volatile i32 %arg29, i32 addrspace(1)* undef
485  store volatile i32 %arg30, i32 addrspace(1)* undef
486  store volatile i32 %arg31, i32 addrspace(1)* undef
487  %private = load volatile i32, i32 addrspace(5)* %arg32
488  ret void
489}
490
491; sp[0] = byval
492; sp[1] = ??
493; sp[2] = stack passed workitem ID x
494
495; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
496; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
497
498; GCN: buffer_store_dword [[K]], off, s[0:3], 0 offset:4
499; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], 0 offset:4
500; GCN: s_movk_i32 s32, 0x400
501; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4
502
503; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
504; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
505; GCN: s_swappc_b64
506
507; GCN: .amdhsa_system_vgpr_workitem_id 0
508define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 {
509  %alloca = alloca i32, align 4, addrspace(5)
510  store volatile i32 999, i32 addrspace(5)* %alloca
511  call void @too_many_args_use_workitem_id_x_byval(
512    i32 10, i32 20, i32 30, i32 40,
513    i32 50, i32 60, i32 70, i32 80,
514    i32 90, i32 100, i32 110, i32 120,
515    i32 130, i32 140, i32 150, i32 160,
516    i32 170, i32 180, i32 190, i32 200,
517    i32 210, i32 220, i32 230, i32 240,
518    i32 250, i32 260, i32 270, i32 280,
519    i32 290, i32 300, i32 310, i32 320,
520    i32 addrspace(5)* byval(i32) %alloca)
521  ret void
522}
523
524; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
525; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
526; GFX7: buffer_store_dword [[K]], off, s[0:3], s33{{$}}
527; GFX90A: buffer_store_dword [[K]], off, s[0:3], s33{{$}}
528; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}}
529; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
530; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
531; GCN: s_swappc_b64
532define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
533  %alloca = alloca i32, align 4, addrspace(5)
534  store volatile i32 999, i32 addrspace(5)* %alloca
535  call void @too_many_args_use_workitem_id_x_byval(
536    i32 10, i32 20, i32 30, i32 40,
537    i32 50, i32 60, i32 70, i32 80,
538    i32 90, i32 100, i32 110, i32 120,
539    i32 130, i32 140, i32 150, i32 160,
540    i32 170, i32 180, i32 190, i32 200,
541    i32 210, i32 220, i32 230, i32 240,
542    i32 250, i32 260, i32 270, i32 280,
543    i32 290, i32 300, i32 310, i32 320,
544    i32 addrspace(5)* byval(i32) %alloca)
545  ret void
546}
547
548; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz:
549; GFX90A: buffer_load_dword v32, off, s[0:3], s32{{$}}
550; GFX90A: v_and_b32_e32 v33, 0x3ff, v32
551; GFX90A: v_bfe_u32 v34, v32, 10, 10
552; GCN90A: v_bfe_u32 v32, v32, 20, 10
553; GFX7:   buffer_load_dword v32, off, s[0:3], s32{{$}}
554; GFX7:   v_and_b32_e32 v33, 0x3ff, v32
555; GFX7:   v_bfe_u32 v33, v32, 10, 10
556; GCN7:   v_bfe_u32 v32, v32, 20, 10
557; GFX7:   flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v33{{$}}
558; GFX7:   flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32{{$}}
559; GFX90A: global_store_dword v{{\[[0-9]+:[0-9]+]}}, v33, off{{$}}
560; GFX90A: global_store_dword v{{\[[0-9]+:[0-9]+]}}, v34, off{{$}}
561; GFX90A: global_store_dword v{{\[[0-9]+:[0-9]+]}}, v32, off{{$}}
562
563; GFX7-COUNT-32: flat_store_dword v{{\[[0-9]+:[0-9]+]}}
564; GFX90A-COUNT-32: global_store_dword v{{\[[0-9]+:[0-9]+]}}
565; GCN-NEXT: s_waitcnt
566; GCN-NEXT: s_setpc_b64
567define void @too_many_args_use_workitem_id_xyz(
568  i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
569  i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
570  i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
571  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
572  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
573  store volatile i32 %val0, i32 addrspace(1)* undef
574  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
575  store volatile i32 %val1, i32 addrspace(1)* undef
576  %val2 = call i32 @llvm.amdgcn.workitem.id.z()
577  store volatile i32 %val2, i32 addrspace(1)* undef
578
579  store volatile i32 %arg0, i32 addrspace(1)* undef
580  store volatile i32 %arg1, i32 addrspace(1)* undef
581  store volatile i32 %arg2, i32 addrspace(1)* undef
582  store volatile i32 %arg3, i32 addrspace(1)* undef
583  store volatile i32 %arg4, i32 addrspace(1)* undef
584  store volatile i32 %arg5, i32 addrspace(1)* undef
585  store volatile i32 %arg6, i32 addrspace(1)* undef
586  store volatile i32 %arg7, i32 addrspace(1)* undef
587
588  store volatile i32 %arg8, i32 addrspace(1)* undef
589  store volatile i32 %arg9, i32 addrspace(1)* undef
590  store volatile i32 %arg10, i32 addrspace(1)* undef
591  store volatile i32 %arg11, i32 addrspace(1)* undef
592  store volatile i32 %arg12, i32 addrspace(1)* undef
593  store volatile i32 %arg13, i32 addrspace(1)* undef
594  store volatile i32 %arg14, i32 addrspace(1)* undef
595  store volatile i32 %arg15, i32 addrspace(1)* undef
596
597  store volatile i32 %arg16, i32 addrspace(1)* undef
598  store volatile i32 %arg17, i32 addrspace(1)* undef
599  store volatile i32 %arg18, i32 addrspace(1)* undef
600  store volatile i32 %arg19, i32 addrspace(1)* undef
601  store volatile i32 %arg20, i32 addrspace(1)* undef
602  store volatile i32 %arg21, i32 addrspace(1)* undef
603  store volatile i32 %arg22, i32 addrspace(1)* undef
604  store volatile i32 %arg23, i32 addrspace(1)* undef
605
606  store volatile i32 %arg24, i32 addrspace(1)* undef
607  store volatile i32 %arg25, i32 addrspace(1)* undef
608  store volatile i32 %arg26, i32 addrspace(1)* undef
609  store volatile i32 %arg27, i32 addrspace(1)* undef
610  store volatile i32 %arg28, i32 addrspace(1)* undef
611  store volatile i32 %arg29, i32 addrspace(1)* undef
612  store volatile i32 %arg30, i32 addrspace(1)* undef
613  store volatile i32 %arg31, i32 addrspace(1)* undef
614
615  ret void
616}
617
618; frame[0] = ID { Z, Y, X }
619
620; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_xyz:
621
622; GCN-DAG: s_mov_b32 s32, 0
623
624; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v1, 10, v1
625; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, v1
626; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v2, 20, v2
627; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, v2
628; PACKED-TID-NOT: v0
629; PACKED-TID-NOT: v1
630; PACKED-TID-NOT: v2
631; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}}
632; GCN: s_swappc_b64
633
634; GCN: .amdhsa_system_vgpr_workitem_id 2
635define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 {
636  call void @too_many_args_use_workitem_id_xyz(
637    i32 10, i32 20, i32 30, i32 40,
638    i32 50, i32 60, i32 70, i32 80,
639    i32 90, i32 100, i32 110, i32 120,
640    i32 130, i32 140, i32 150, i32 160,
641    i32 170, i32 180, i32 190, i32 200,
642    i32 210, i32 220, i32 230, i32 240,
643    i32 250, i32 260, i32 270, i32 280,
644    i32 290, i32 300, i32 310, i32 320)
645  ret void
646}
647
648; workitem ID X in register, yz on stack
649; v31 = workitem ID X
650; frame[0] = workitem { Z, Y, X }
651
652; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_stack_yz:
653; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31
654; GCN-DAG: {{flat|global}}_store_dword v[0:1], [[IDX]]
655; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10
656; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDY]]
657; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10
658; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDZ]]
659
660; GCN-COUNT-31: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}
661; GCN-NEXT: s_waitcnt
662; GCN: s_setpc_b64
663; GCN: ScratchSize: 0
664define void @too_many_args_use_workitem_id_x_stack_yz(
665  i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
666  i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
667  i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
668  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30) #1 {
669  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
670  store volatile i32 %val0, i32 addrspace(1)* undef
671  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
672  store volatile i32 %val1, i32 addrspace(1)* undef
673  %val2 = call i32 @llvm.amdgcn.workitem.id.z()
674  store volatile i32 %val2, i32 addrspace(1)* undef
675
676  store volatile i32 %arg0, i32 addrspace(1)* undef
677  store volatile i32 %arg1, i32 addrspace(1)* undef
678  store volatile i32 %arg2, i32 addrspace(1)* undef
679  store volatile i32 %arg3, i32 addrspace(1)* undef
680  store volatile i32 %arg4, i32 addrspace(1)* undef
681  store volatile i32 %arg5, i32 addrspace(1)* undef
682  store volatile i32 %arg6, i32 addrspace(1)* undef
683  store volatile i32 %arg7, i32 addrspace(1)* undef
684
685  store volatile i32 %arg8, i32 addrspace(1)* undef
686  store volatile i32 %arg9, i32 addrspace(1)* undef
687  store volatile i32 %arg10, i32 addrspace(1)* undef
688  store volatile i32 %arg11, i32 addrspace(1)* undef
689  store volatile i32 %arg12, i32 addrspace(1)* undef
690  store volatile i32 %arg13, i32 addrspace(1)* undef
691  store volatile i32 %arg14, i32 addrspace(1)* undef
692  store volatile i32 %arg15, i32 addrspace(1)* undef
693
694  store volatile i32 %arg16, i32 addrspace(1)* undef
695  store volatile i32 %arg17, i32 addrspace(1)* undef
696  store volatile i32 %arg18, i32 addrspace(1)* undef
697  store volatile i32 %arg19, i32 addrspace(1)* undef
698  store volatile i32 %arg20, i32 addrspace(1)* undef
699  store volatile i32 %arg21, i32 addrspace(1)* undef
700  store volatile i32 %arg22, i32 addrspace(1)* undef
701  store volatile i32 %arg23, i32 addrspace(1)* undef
702
703  store volatile i32 %arg24, i32 addrspace(1)* undef
704  store volatile i32 %arg25, i32 addrspace(1)* undef
705  store volatile i32 %arg26, i32 addrspace(1)* undef
706  store volatile i32 %arg27, i32 addrspace(1)* undef
707  store volatile i32 %arg28, i32 addrspace(1)* undef
708  store volatile i32 %arg29, i32 addrspace(1)* undef
709  store volatile i32 %arg30, i32 addrspace(1)* undef
710
711  ret void
712}
713
714; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_stack_yz:
715
716; GCN-NOT: v0
717; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v1, 10, v1
718; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, v1
719; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v2, 20, v2
720; UNPACKED-TID-DAG: v_or_b32_e32 v31, v0, v2
721; PACKED-TID: v_mov_b32_e32 v31, v0
722
723; GCN: s_mov_b32 s32, 0
724; GCN: s_swappc_b64
725
726; GCN: .amdhsa_system_vgpr_workitem_id 2
727define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 {
728  call void @too_many_args_use_workitem_id_x_stack_yz(
729    i32 10, i32 20, i32 30, i32 40,
730    i32 50, i32 60, i32 70, i32 80,
731    i32 90, i32 100, i32 110, i32 120,
732    i32 130, i32 140, i32 150, i32 160,
733    i32 170, i32 180, i32 190, i32 200,
734    i32 210, i32 220, i32 230, i32 240,
735    i32 250, i32 260, i32 270, i32 280,
736    i32 290, i32 300, i32 310)
737  ret void
738}
739
740declare i32 @llvm.amdgcn.workitem.id.x() #0
741declare i32 @llvm.amdgcn.workitem.id.y() #0
742declare i32 @llvm.amdgcn.workitem.id.z() #0
743
744attributes #0 = { nounwind readnone speculatable "amdgpu-flat-work-group-size"="1,512" }
745attributes #1 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" }
746