1// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
2// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack -filetype=obj < %s > %t
3// RUN: llvm-readelf -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s
4// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
5
6// READOBJ: Section Headers
7// READOBJ: .text   PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
8// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}}        000100 {{[0-9]+}}  A {{[0-9]+}} {{[0-9]+}} 64
9
10// READOBJ: Relocation section '.rela.rodata' at offset
11// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
12// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
13// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
14// READOBJ: 00000000000000d0 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 310
15
16// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
17// READOBJ: {{[0-9]+}}: 0000000000000100  0 FUNC    LOCAL  PROTECTED 2 complete
18// READOBJ: {{[0-9]+}}: 0000000000000040 64 OBJECT  LOCAL  DEFAULT   3 complete.kd
19// READOBJ: {{[0-9]+}}: 0000000000000300  0 FUNC    LOCAL  PROTECTED 2 disabled_user_sgpr
20// READOBJ: {{[0-9]+}}: 00000000000000c0 64 OBJECT  LOCAL  DEFAULT   3 disabled_user_sgpr.kd
21// READOBJ: {{[0-9]+}}: 0000000000000000  0 FUNC    LOCAL  PROTECTED 2 minimal
22// READOBJ: {{[0-9]+}}: 0000000000000000 64 OBJECT  LOCAL  DEFAULT   3 minimal.kd
23// READOBJ: {{[0-9]+}}: 0000000000000200  0 FUNC    LOCAL  PROTECTED 2 special_sgpr
24// READOBJ: {{[0-9]+}}: 0000000000000080 64 OBJECT  LOCAL  DEFAULT   3 special_sgpr.kd
25
26// OBJDUMP: Contents of section .rodata
27// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
28// minimal
29// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
30// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
31// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
32// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
33// complete
34// OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000
35// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
36// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
37// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000
38// special_sgpr
39// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
40// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
41// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
42// OBJDUMP-NEXT: 00b0 00010000 80000000 00000000 00000000
43// disabled_user_sgpr
44// OBJDUMP-NEXT: 00c0 00000000 00000000 00000000 00000000
45// OBJDUMP-NEXT: 00d0 00000000 00000000 00000000 00000000
46// OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000
47// OBJDUMP-NEXT: 00f0 0000ac00 80000000 00000000 00000000
48
49.text
50// ASM: .text
51
52.amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack"
53// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack"
54
55.p2align 8
56.type minimal,@function
57minimal:
58  s_endpgm
59
60.p2align 8
61.type complete,@function
62complete:
63  s_endpgm
64
65.p2align 8
66.type special_sgpr,@function
67special_sgpr:
68  s_endpgm
69
70.p2align 8
71.type disabled_user_sgpr,@function
72disabled_user_sgpr:
73  s_endpgm
74
75.rodata
76// ASM: .rodata
77
78// Test that only specifying required directives is allowed, and that defaulted
79// values are omitted.
80.p2align 6
81.amdhsa_kernel minimal
82  .amdhsa_next_free_vgpr 0
83  .amdhsa_next_free_sgpr 0
84.end_amdhsa_kernel
85
86// ASM: .amdhsa_kernel minimal
87// ASM: .amdhsa_next_free_vgpr 0
88// ASM-NEXT: .amdhsa_next_free_sgpr 0
89// ASM: .end_amdhsa_kernel
90
91// Test that we can specify all available directives with non-default values.
92.p2align 6
93.amdhsa_kernel complete
94  .amdhsa_group_segment_fixed_size 1
95  .amdhsa_private_segment_fixed_size 1
96  .amdhsa_user_sgpr_private_segment_buffer 1
97  .amdhsa_user_sgpr_dispatch_ptr 1
98  .amdhsa_user_sgpr_queue_ptr 1
99  .amdhsa_user_sgpr_kernarg_segment_ptr 1
100  .amdhsa_user_sgpr_dispatch_id 1
101  .amdhsa_user_sgpr_flat_scratch_init 1
102  .amdhsa_user_sgpr_private_segment_size 1
103  .amdhsa_system_sgpr_private_segment_wavefront_offset 1
104  .amdhsa_system_sgpr_workgroup_id_x 0
105  .amdhsa_system_sgpr_workgroup_id_y 1
106  .amdhsa_system_sgpr_workgroup_id_z 1
107  .amdhsa_system_sgpr_workgroup_info 1
108  .amdhsa_system_vgpr_workitem_id 1
109  .amdhsa_next_free_vgpr 9
110  .amdhsa_next_free_sgpr 27
111  .amdhsa_reserve_vcc 0
112  .amdhsa_reserve_flat_scratch 0
113  .amdhsa_reserve_xnack_mask 0
114  .amdhsa_float_round_mode_32 1
115  .amdhsa_float_round_mode_16_64 1
116  .amdhsa_float_denorm_mode_32 1
117  .amdhsa_float_denorm_mode_16_64 0
118  .amdhsa_dx10_clamp 0
119  .amdhsa_ieee_mode 0
120  .amdhsa_fp16_overflow 1
121  .amdhsa_exception_fp_ieee_invalid_op 1
122  .amdhsa_exception_fp_denorm_src 1
123  .amdhsa_exception_fp_ieee_div_zero 1
124  .amdhsa_exception_fp_ieee_overflow 1
125  .amdhsa_exception_fp_ieee_underflow 1
126  .amdhsa_exception_fp_ieee_inexact 1
127  .amdhsa_exception_int_div_zero 1
128.end_amdhsa_kernel
129
130// ASM: .amdhsa_kernel complete
131// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
132// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
133// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
134// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
135// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
136// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
137// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
138// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
139// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
140// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
141// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
142// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
143// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
144// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
145// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
146// ASM-NEXT: .amdhsa_next_free_vgpr 9
147// ASM-NEXT: .amdhsa_next_free_sgpr 27
148// ASM-NEXT: .amdhsa_reserve_vcc 0
149// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
150// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
151// ASM-NEXT: .amdhsa_float_round_mode_32 1
152// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
153// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
154// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
155// ASM-NEXT: .amdhsa_dx10_clamp 0
156// ASM-NEXT: .amdhsa_ieee_mode 0
157// ASM-NEXT: .amdhsa_fp16_overflow 1
158// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
159// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
160// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
161// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
162// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
163// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
164// ASM-NEXT: .amdhsa_exception_int_div_zero 1
165// ASM-NEXT: .end_amdhsa_kernel
166
167// Test that we are including special SGPR usage in the granulated count.
168.p2align 6
169.amdhsa_kernel special_sgpr
170  // Same next_free_sgpr as "complete", but...
171  .amdhsa_next_free_sgpr 27
172  // ...on GFX9 this should require an additional 6 SGPRs, pushing us from
173  // 3 granules to 4
174  .amdhsa_reserve_flat_scratch 1
175
176  .amdhsa_reserve_vcc 0
177  .amdhsa_reserve_xnack_mask 0
178
179  .amdhsa_float_denorm_mode_16_64 0
180  .amdhsa_dx10_clamp 0
181  .amdhsa_ieee_mode 0
182  .amdhsa_next_free_vgpr 0
183.end_amdhsa_kernel
184
185// ASM: .amdhsa_kernel special_sgpr
186// ASM: .amdhsa_next_free_vgpr 0
187// ASM-NEXT: .amdhsa_next_free_sgpr 27
188// ASM-NEXT: .amdhsa_reserve_vcc 0
189// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
190// ASM: .amdhsa_float_denorm_mode_16_64 0
191// ASM-NEXT: .amdhsa_dx10_clamp 0
192// ASM-NEXT: .amdhsa_ieee_mode 0
193// ASM: .end_amdhsa_kernel
194
195// Test that explicitly disabling user_sgpr's does not affect the user_sgpr
196// count, i.e. this should produce the same descriptor as minimal.
197.p2align 6
198.amdhsa_kernel disabled_user_sgpr
199  .amdhsa_user_sgpr_private_segment_buffer 0
200  .amdhsa_next_free_vgpr 0
201  .amdhsa_next_free_sgpr 0
202.end_amdhsa_kernel
203
204// ASM: .amdhsa_kernel disabled_user_sgpr
205// ASM: .amdhsa_next_free_vgpr 0
206// ASM-NEXT: .amdhsa_next_free_sgpr 0
207// ASM: .end_amdhsa_kernel
208
209.section .foo
210
211.byte .amdgcn.gfx_generation_number
212// ASM: .byte 9
213
214.byte .amdgcn.gfx_generation_minor
215// ASM: .byte 0
216
217.byte .amdgcn.gfx_generation_stepping
218// ASM: .byte 4
219
220.byte .amdgcn.next_free_vgpr
221// ASM: .byte 0
222.byte .amdgcn.next_free_sgpr
223// ASM: .byte 0
224
225v_mov_b32_e32 v7, s10
226
227.byte .amdgcn.next_free_vgpr
228// ASM: .byte 8
229.byte .amdgcn.next_free_sgpr
230// ASM: .byte 11
231
232.set .amdgcn.next_free_vgpr, 0
233.set .amdgcn.next_free_sgpr, 0
234
235.byte .amdgcn.next_free_vgpr
236// ASM: .byte 0
237.byte .amdgcn.next_free_sgpr
238// ASM: .byte 0
239
240v_mov_b32_e32 v16, s3
241
242.byte .amdgcn.next_free_vgpr
243// ASM: .byte 17
244.byte .amdgcn.next_free_sgpr
245// ASM: .byte 4
246
247// Metadata
248
249.amdgpu_metadata
250  amdhsa.version:
251    - 3
252    - 0
253  amdhsa.kernels:
254    - .name:       amd_kernel_code_t_test_all
255      .symbol: amd_kernel_code_t_test_all@kd
256      .kernarg_segment_size: 8
257      .group_segment_fixed_size: 16
258      .private_segment_fixed_size: 32
259      .kernarg_segment_align: 64
260      .wavefront_size: 128
261      .sgpr_count: 14
262      .vgpr_count: 40
263      .max_flat_workgroup_size: 256
264    - .name:       amd_kernel_code_t_minimal
265      .symbol: amd_kernel_code_t_minimal@kd
266      .kernarg_segment_size: 8
267      .group_segment_fixed_size: 16
268      .private_segment_fixed_size: 32
269      .kernarg_segment_align: 64
270      .wavefront_size: 128
271      .sgpr_count: 14
272      .vgpr_count: 40
273      .max_flat_workgroup_size: 256
274.end_amdgpu_metadata
275
276// ASM:      	.amdgpu_metadata
277// ASM:      amdhsa.kernels:
278// ASM:        - .group_segment_fixed_size: 16
279// ASM:          .kernarg_segment_align: 64
280// ASM:          .kernarg_segment_size: 8
281// ASM:          .max_flat_workgroup_size: 256
282// ASM:          .name:           amd_kernel_code_t_test_all
283// ASM:          .private_segment_fixed_size: 32
284// ASM:          .sgpr_count:     14
285// ASM:          .symbol:         'amd_kernel_code_t_test_all@kd'
286// ASM:          .vgpr_count:     40
287// ASM:          .wavefront_size: 128
288// ASM:        - .group_segment_fixed_size: 16
289// ASM:          .kernarg_segment_align: 64
290// ASM:          .kernarg_segment_size: 8
291// ASM:          .max_flat_workgroup_size: 256
292// ASM:          .name:           amd_kernel_code_t_minimal
293// ASM:          .private_segment_fixed_size: 32
294// ASM:          .sgpr_count:     14
295// ASM:          .symbol:         'amd_kernel_code_t_minimal@kd'
296// ASM:          .vgpr_count:     40
297// ASM:          .wavefront_size: 128
298// ASM:      amdhsa.version:
299// ASM-NEXT:   - 3
300// ASM-NEXT:   - 0
301// ASM:      	.end_amdgpu_metadata
302