1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,CIVI %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,GFX9 %s
3
4; GCN-LABEL: {{^}}use_dispatch_ptr:
5; GCN: s_load_dword s{{[0-9]+}}, s[4:5]
6define hidden void @use_dispatch_ptr() #1 {
7  %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
8  %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
9  %value = load volatile i32, i32 addrspace(4)* %header_ptr
10  ret void
11}
12
13; GCN-LABEL: {{^}}kern_indirect_use_dispatch_ptr:
14; GCN: enable_sgpr_dispatch_ptr = 1
15; GCN-NOT: s[4:5]
16; GCN-NOT: s4
17; GCN-NOT: s5
18define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 {
19  call void @use_dispatch_ptr()
20  ret void
21}
22
23; GCN-LABEL: {{^}}use_queue_ptr:
24; GCN: s_load_dword s{{[0-9]+}}, s[4:5]
25define hidden void @use_queue_ptr() #1 {
26  %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
27  %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
28  %value = load volatile i32, i32 addrspace(4)* %header_ptr
29  ret void
30}
31
32; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr:
33; GCN: enable_sgpr_queue_ptr = 1
34; GCN-NOT: s[4:5]
35; GCN-NOT: s4
36; GCN-NOT: s5
37define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 {
38  call void @use_queue_ptr()
39  ret void
40}
41
42; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast:
43; CIVI: s_load_dword [[APERTURE_LOAD:s[0-9]+]], s[4:5], 0x10
44; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]]
45; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16
46; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
47; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}}
48; CIVI: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}
49define hidden void @use_queue_ptr_addrspacecast() #1 {
50  %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32*
51  store volatile i32 0, i32* %asc
52  ret void
53}
54
55; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr_addrspacecast:
56; CIVI: enable_sgpr_queue_ptr = 1
57; CIVI-NOT: s[4:5]
58; CIVI-NOT: s4
59; CIVI-NOT: s5
60define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 {
61  call void @use_queue_ptr_addrspacecast()
62  ret void
63}
64
65; Not really supported in callable functions.
66; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
67; GCN: s_mov_b64 [[PTR:s\[[0-9]+:[0-9]+\]]], 0{{$}}
68; GCN: s_load_dword s{{[0-9]+}}, [[PTR]], 0x0{{$}}
69define hidden void @use_kernarg_segment_ptr() #1 {
70  %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
71  %header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
72  %value = load volatile i32, i32 addrspace(4)* %header_ptr
73  ret void
74}
75
76; GCN-LABEL: {{^}}kern_indirect_use_kernarg_segment_ptr:
77; GCN: enable_sgpr_kernarg_segment_ptr = 1
78define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) #1 {
79  call void @use_kernarg_segment_ptr()
80  ret void
81}
82
83; GCN-LABEL: {{^}}use_dispatch_id:
84; GCN: ; use s[4:5]
85define hidden void @use_dispatch_id() #1 {
86  %id = call i64 @llvm.amdgcn.dispatch.id()
87  call void asm sideeffect "; use $0", "s"(i64 %id)
88  ret void
89}
90
91; No kernarg segment so that there is a mov to check. With kernarg
92; pointer enabled, it happens to end up in the right place anyway.
93
94; GCN-LABEL: {{^}}kern_indirect_use_dispatch_id:
95; GCN: enable_sgpr_dispatch_id = 1
96; GCN-NOT: s[4:5]
97; GCN-NOT: s4
98; GCN-NOT: s5
99define amdgpu_kernel void @kern_indirect_use_dispatch_id() #1 {
100  call void @use_dispatch_id()
101  ret void
102}
103
104; GCN-LABEL: {{^}}use_workgroup_id_x:
105; GCN: s_waitcnt
106; GCN: ; use s4
107define hidden void @use_workgroup_id_x() #1 {
108  %val = call i32 @llvm.amdgcn.workgroup.id.x()
109  call void asm sideeffect "; use $0", "s"(i32 %val)
110  ret void
111}
112
113; GCN-LABEL: {{^}}use_stack_workgroup_id_x:
114; GCN: s_waitcnt
115; GCN-NOT: s32
116; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}}
117; GCN: ; use s4
118; GCN: s_setpc_b64
119define hidden void @use_stack_workgroup_id_x() #1 {
120  %alloca = alloca i32, addrspace(5)
121  store volatile i32 0, i32 addrspace(5)* %alloca
122  %val = call i32 @llvm.amdgcn.workgroup.id.x()
123  call void asm sideeffect "; use $0", "s"(i32 %val)
124  ret void
125}
126
127; GCN-LABEL: {{^}}use_workgroup_id_y:
128; GCN: s_waitcnt
129; GCN: ; use s4
130define hidden void @use_workgroup_id_y() #1 {
131  %val = call i32 @llvm.amdgcn.workgroup.id.y()
132  call void asm sideeffect "; use $0", "s"(i32 %val)
133  ret void
134}
135
136; GCN-LABEL: {{^}}use_workgroup_id_z:
137; GCN: s_waitcnt
138; GCN: ; use s4
139define hidden void @use_workgroup_id_z() #1 {
140  %val = call i32 @llvm.amdgcn.workgroup.id.z()
141  call void asm sideeffect "; use $0", "s"(i32 %val)
142  ret void
143}
144
145; GCN-LABEL: {{^}}use_workgroup_id_xy:
146; GCN: ; use s4
147; GCN: ; use s5
148define hidden void @use_workgroup_id_xy() #1 {
149  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
150  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
151  call void asm sideeffect "; use $0", "s"(i32 %val0)
152  call void asm sideeffect "; use $0", "s"(i32 %val1)
153  ret void
154}
155
156; GCN-LABEL: {{^}}use_workgroup_id_xyz:
157; GCN: ; use s4
158; GCN: ; use s5
159; GCN: ; use s6
160define hidden void @use_workgroup_id_xyz() #1 {
161  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
162  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
163  %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
164  call void asm sideeffect "; use $0", "s"(i32 %val0)
165  call void asm sideeffect "; use $0", "s"(i32 %val1)
166  call void asm sideeffect "; use $0", "s"(i32 %val2)
167  ret void
168}
169
170; GCN-LABEL: {{^}}use_workgroup_id_xz:
171; GCN: ; use s4
172; GCN: ; use s5
173define hidden void @use_workgroup_id_xz() #1 {
174  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
175  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
176  call void asm sideeffect "; use $0", "s"(i32 %val0)
177  call void asm sideeffect "; use $0", "s"(i32 %val1)
178  ret void
179}
180
181; GCN-LABEL: {{^}}use_workgroup_id_yz:
182; GCN: ; use s4
183; GCN: ; use s5
184define hidden void @use_workgroup_id_yz() #1 {
185  %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
186  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
187  call void asm sideeffect "; use $0", "s"(i32 %val0)
188  call void asm sideeffect "; use $0", "s"(i32 %val1)
189  ret void
190}
191
192; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_x:
193; GCN: enable_sgpr_workgroup_id_x = 1
194; GCN: enable_sgpr_workgroup_id_y = 0
195; GCN: enable_sgpr_workgroup_id_z = 0
196
197; GCN-NOT: s6
198; GCN: s_mov_b32 s4, s6
199; GCN-NEXT: s_getpc_b64 s[6:7]
200; GCN-NEXT: s_add_u32 s6, s6, use_workgroup_id_x@rel32@lo+4
201; GCN-NEXT: s_addc_u32 s7, s7, use_workgroup_id_x@rel32@hi+12
202; GCN: s_mov_b32 s32, 0
203; GCN: s_swappc_b64
204; GCN-NEXT: s_endpgm
205define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 {
206  call void @use_workgroup_id_x()
207  ret void
208}
209
210; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_y:
211; GCN: enable_sgpr_workgroup_id_x = 1
212; GCN: enable_sgpr_workgroup_id_y = 1
213; GCN: enable_sgpr_workgroup_id_z = 0
214
215; GCN: s_mov_b32 s4, s7
216; GCN: s_mov_b32 s32, 0
217; GCN: s_swappc_b64
218define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 {
219  call void @use_workgroup_id_y()
220  ret void
221}
222
223; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_z:
224; GCN: enable_sgpr_workgroup_id_x = 1
225; GCN: enable_sgpr_workgroup_id_y = 0
226; GCN: enable_sgpr_workgroup_id_z = 1
227
228; GCN: s_mov_b32 s4, s7
229
230; GCN: s_mov_b32 s32, 0
231; GCN: s_swappc_b64
232define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 {
233  call void @use_workgroup_id_z()
234  ret void
235}
236
237; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xy:
238; GCN: enable_sgpr_workgroup_id_x = 1
239; GCN: enable_sgpr_workgroup_id_y = 1
240; GCN: enable_sgpr_workgroup_id_z = 0
241
242; GCN: s_mov_b32 s5, s7
243; GCN: s_mov_b32 s4, s6
244
245; GCN: s_mov_b32 s32, 0
246; GCN: s_swappc_b64
247define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 {
248  call void @use_workgroup_id_xy()
249  ret void
250}
251
252; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xyz:
253; GCN: enable_sgpr_workgroup_id_x = 1
254; GCN: enable_sgpr_workgroup_id_y = 1
255; GCN: enable_sgpr_workgroup_id_z = 1
256
257; GCN: s_mov_b32 s4, s6
258; GCN: s_mov_b32 s5, s7
259; GCN: s_mov_b32 s6, s8
260
261; GCN: s_mov_b32 s32, 0
262; GCN: s_swappc_b64
263define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 {
264  call void @use_workgroup_id_xyz()
265  ret void
266}
267
268; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xz:
269; GCN: enable_sgpr_workgroup_id_x = 1
270; GCN: enable_sgpr_workgroup_id_y = 0
271; GCN: enable_sgpr_workgroup_id_z = 1
272
273; GCN: s_mov_b32 s5, s7
274; GCN: s_mov_b32 s4, s6
275
276; GCN: s_mov_b32 s32, 0
277; GCN: s_swappc_b64
278define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 {
279  call void @use_workgroup_id_xz()
280  ret void
281}
282
283; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_yz:
284; GCN: enable_sgpr_workgroup_id_x = 1
285; GCN: enable_sgpr_workgroup_id_y = 1
286; GCN: enable_sgpr_workgroup_id_z = 1
287
288; GCN: s_mov_b32 s4, s7
289; GCN: s_mov_b32 s5, s8
290
291; GCN: s_mov_b32 s32, 0
292; GCN: s_swappc_b64
293define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 {
294  call void @use_workgroup_id_yz()
295  ret void
296}
297
298; Argument is in right place already
299; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x:
300; GCN-NOT: s4
301; GCN: v_readlane_b32 s4, v40, 0
302define hidden void @func_indirect_use_workgroup_id_x() #1 {
303  call void @use_workgroup_id_x()
304  ret void
305}
306
307; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y:
308; GCN-NOT: s4
309; GCN: v_readlane_b32 s4, v40, 0
310define hidden void @func_indirect_use_workgroup_id_y() #1 {
311  call void @use_workgroup_id_y()
312  ret void
313}
314
315; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z:
316; GCN-NOT: s4
317; GCN: v_readlane_b32 s4, v40, 0
318define hidden void @func_indirect_use_workgroup_id_z() #1 {
319  call void @use_workgroup_id_z()
320  ret void
321}
322
323; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x:
324; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
325; GCN: ; use s4
326define hidden void @other_arg_use_workgroup_id_x(i32 %arg0) #1 {
327  %val = call i32 @llvm.amdgcn.workgroup.id.x()
328  store volatile i32 %arg0, i32 addrspace(1)* undef
329  call void asm sideeffect "; use $0", "s"(i32 %val)
330  ret void
331}
332
333; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y:
334; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
335; GCN: ; use s4
336define hidden void @other_arg_use_workgroup_id_y(i32 %arg0) #1 {
337  %val = call i32 @llvm.amdgcn.workgroup.id.y()
338  store volatile i32 %arg0, i32 addrspace(1)* undef
339  call void asm sideeffect "; use $0", "s"(i32 %val)
340  ret void
341}
342
343; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z:
344; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
345; GCN: ; use s4
346define hidden void @other_arg_use_workgroup_id_z(i32 %arg0) #1 {
347  %val = call i32 @llvm.amdgcn.workgroup.id.z()
348  store volatile i32 %arg0, i32 addrspace(1)* undef
349  call void asm sideeffect "; use $0", "s"(i32 %val)
350  ret void
351}
352
353; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_x:
354; GCN: enable_sgpr_workgroup_id_x = 1
355; GCN: enable_sgpr_workgroup_id_y = 0
356; GCN: enable_sgpr_workgroup_id_z = 0
357
358; GCN-DAG: v_mov_b32_e32 v0, 0x22b
359; GCN-DAG: s_mov_b32 s4, s6
360
361; GCN-DAG: s_mov_b32 s32, 0
362; GCN-NOT: s4
363; GCN: s_swappc_b64
364define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 {
365  call void @other_arg_use_workgroup_id_x(i32 555)
366  ret void
367}
368
369; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_y:
370; GCN: enable_sgpr_workgroup_id_x = 1
371; GCN: enable_sgpr_workgroup_id_y = 1
372; GCN: enable_sgpr_workgroup_id_z = 0
373
374; GCN-DAG: v_mov_b32_e32 v0, 0x22b
375; GCN-DAG: s_mov_b32 s4, s7
376
377; GCN-DAG: s_mov_b32 s32, 0
378; GCN: s_swappc_b64
379define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 {
380  call void @other_arg_use_workgroup_id_y(i32 555)
381  ret void
382}
383
384; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_z:
385; GCN: enable_sgpr_workgroup_id_x = 1
386; GCN: enable_sgpr_workgroup_id_y = 0
387; GCN: enable_sgpr_workgroup_id_z = 1
388
389; GCN-DAG: v_mov_b32_e32 v0, 0x22b
390
391; GCN: s_mov_b32 s32, 0
392; GCN: s_swappc_b64
393define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 {
394  call void @other_arg_use_workgroup_id_z(i32 555)
395  ret void
396}
397
398; GCN-LABEL: {{^}}use_every_sgpr_input:
399; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32{{$}}
400; GCN: s_load_dword s{{[0-9]+}}, s[4:5]
401; GCN: s_load_dword s{{[0-9]+}}, s[6:7]
402; GCN: s_load_dword s{{[0-9]+}}, s[8:9]
403
404; GCN: ; use s[10:11]
405; GCN: ; use s12
406; GCN: ; use s13
407; GCN: ; use s14
408define hidden void @use_every_sgpr_input() #1 {
409  %alloca = alloca i32, align 4, addrspace(5)
410  store volatile i32 0, i32 addrspace(5)* %alloca
411
412  %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
413  %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
414  %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
415
416  %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
417  %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
418  %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
419
420  %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
421  %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
422  %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc
423
424  %val3 = call i64 @llvm.amdgcn.dispatch.id()
425  call void asm sideeffect "; use $0", "s"(i64 %val3)
426
427  %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
428  call void asm sideeffect "; use $0", "s"(i32 %val4)
429
430  %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
431  call void asm sideeffect "; use $0", "s"(i32 %val5)
432
433  %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
434  call void asm sideeffect "; use $0", "s"(i32 %val6)
435
436  ret void
437}
438
439; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input:
440; GCN: enable_sgpr_workgroup_id_x = 1
441; GCN: enable_sgpr_workgroup_id_y = 1
442; GCN: enable_sgpr_workgroup_id_z = 1
443; GCN: enable_sgpr_workgroup_info = 0
444
445; GCN: enable_sgpr_private_segment_buffer = 1
446; GCN: enable_sgpr_dispatch_ptr = 1
447; GCN: enable_sgpr_queue_ptr = 1
448; GCN: enable_sgpr_kernarg_segment_ptr = 1
449; GCN: enable_sgpr_dispatch_id = 1
450; GCN: enable_sgpr_flat_scratch_init = 1
451
452; GCN: s_mov_b32 s12, s14
453; GCN: s_mov_b32 s13, s15
454; GCN: s_mov_b32 s14, s16
455; GCN: s_mov_b32 s32, 0
456; GCN: s_swappc_b64
457define amdgpu_kernel void @kern_indirect_use_every_sgpr_input() #1 {
458  call void @use_every_sgpr_input()
459  ret void
460}
461
462; GCN-LABEL: {{^}}func_indirect_use_every_sgpr_input:
463; GCN-NOT: s6
464; GCN-NOT: s7
465; GCN-NOT: s8
466; GCN-NOT: s9
467; GCN-NOT: s10
468; GCN-NOT: s11
469; GCN-NOT: s12
470; GCN-NOT: s13
471; GCN-NOT: s[6:7]
472; GCN-NOT: s[8:9]
473; GCN-NOT: s[10:11]
474; GCN-NOT: s[12:13]
475; GCN: s_or_saveexec_b64 s[16:17], -1
476define hidden void @func_indirect_use_every_sgpr_input() #1 {
477  call void @use_every_sgpr_input()
478  ret void
479}
480
481; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz:
482; GCN: s_mov_b32 s4, s12
483; GCN: s_mov_b32 s5, s13
484; GCN: s_mov_b32 s6, s14
485; GCN: ; use s[10:11]
486; GCN: ; use s12
487; GCN: ; use s13
488; GCN: ; use s14
489
490; GCN: s_swappc_b64
491define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
492  %alloca = alloca i32, align 4, addrspace(5)
493  store volatile i32 0, i32 addrspace(5)* %alloca
494
495  %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
496  %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
497  %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
498
499  %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
500  %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
501  %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
502
503  %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
504  %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
505  %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc
506
507  %val3 = call i64 @llvm.amdgcn.dispatch.id()
508  call void asm sideeffect "; use $0", "s"(i64 %val3)
509
510  %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
511  call void asm sideeffect "; use $0", "s"(i32 %val4)
512
513  %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
514  call void asm sideeffect "; use $0", "s"(i32 %val5)
515
516  %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
517  call void asm sideeffect "; use $0", "s"(i32 %val6)
518
519  call void @use_workgroup_id_xyz()
520  ret void
521}
522
523; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill:
524; GCN-DAG: s_mov_b32 s33, s32
525; GCN-DAG: s_addk_i32 s32, 0x400
526; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[4:5]
527; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[6:7]
528
529
530; GCN: s_mov_b32 s4, s12
531; GCN: s_mov_b32 s5, s13
532; GCN: s_mov_b32 s6, s14
533
534; GCN: s_mov_b64 s{{\[}}[[LO_Z:[0-9]+]]{{\:}}[[HI_Z:[0-9]+]]{{\]}}, s[8:9]
535
536; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s12
537; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-57-9][0-9]*]], s13
538; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-68-9][0-9]*]], s14
539
540
541
542; GCN: s_swappc_b64
543
544; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33{{$}}
545; GCN-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[LO_X]]:[[HI_X]]{{\]}}, 0x0
546; GCN-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[LO_Y]]:[[HI_Y]]{{\]}}, 0x0
547; GCN-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[LO_Z]]:[[HI_Z]]{{\]}}, 0x0
548; GCN: ; use
549; GCN: ; use [[SAVE_X]]
550; GCN: ; use [[SAVE_Y]]
551; GCN: ; use [[SAVE_Z]]
552define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 {
553  %alloca = alloca i32, align 4, addrspace(5)
554  call void @use_workgroup_id_xyz()
555
556  store volatile i32 0, i32 addrspace(5)* %alloca
557
558  %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
559  %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
560  %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
561
562  %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
563  %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
564  %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
565
566  %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
567  %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
568  %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc
569
570  %val3 = call i64 @llvm.amdgcn.dispatch.id()
571  call void asm sideeffect "; use $0", "s"(i64 %val3)
572
573  %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
574  call void asm sideeffect "; use $0", "s"(i32 %val4)
575
576  %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
577  call void asm sideeffect "; use $0", "s"(i32 %val5)
578
579  %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
580  call void asm sideeffect "; use $0", "s"(i32 %val6)
581
582  ret void
583}
584
585declare i32 @llvm.amdgcn.workgroup.id.x() #0
586declare i32 @llvm.amdgcn.workgroup.id.y() #0
587declare i32 @llvm.amdgcn.workgroup.id.z() #0
588declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
589declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
590declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
591declare i64 @llvm.amdgcn.dispatch.id() #0
592declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
593
594attributes #0 = { nounwind readnone speculatable }
595attributes #1 = { nounwind noinline }
596