1// RUN: llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
2// RUN: llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack -filetype=obj < %s > %t
3// RUN: llvm-readelf -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s
4// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
5
6// big endian not supported
7// XFAIL: host-byteorder-big-endian
8
9// READOBJ: Section Headers
10// READOBJ: .text   PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
11// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}}        000100 {{[0-9]+}}  A {{[0-9]+}} {{[0-9]+}} 64
12
13// READOBJ: Relocation section '.rela.rodata' at offset
14// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
15// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
16// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
17// READOBJ: 00000000000000d0 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 310
18
19// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
20// READOBJ: {{[0-9]+}}: 0000000000000100  0 FUNC    LOCAL  PROTECTED 2 complete
21// READOBJ: {{[0-9]+}}: 0000000000000040 64 OBJECT  LOCAL  DEFAULT   3 complete.kd
22// READOBJ: {{[0-9]+}}: 0000000000000300  0 FUNC    LOCAL  PROTECTED 2 disabled_user_sgpr
23// READOBJ: {{[0-9]+}}: 00000000000000c0 64 OBJECT  LOCAL  DEFAULT   3 disabled_user_sgpr.kd
24// READOBJ: {{[0-9]+}}: 0000000000000000  0 FUNC    LOCAL  PROTECTED 2 minimal
25// READOBJ: {{[0-9]+}}: 0000000000000000 64 OBJECT  LOCAL  DEFAULT   3 minimal.kd
26// READOBJ: {{[0-9]+}}: 0000000000000200  0 FUNC    LOCAL  PROTECTED 2 special_sgpr
27// READOBJ: {{[0-9]+}}: 0000000000000080 64 OBJECT  LOCAL  DEFAULT   3 special_sgpr.kd
28
29// OBJDUMP: Contents of section .rodata
30// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
31// minimal
32// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
33// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
34// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
35// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
36// complete
37// OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000
38// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
39// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
40// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000
41// special_sgpr
42// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
43// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
44// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
45// OBJDUMP-NEXT: 00b0 00010000 80000000 00000000 00000000
46// disabled_user_sgpr
47// OBJDUMP-NEXT: 00c0 00000000 00000000 00000000 00000000
48// OBJDUMP-NEXT: 00d0 00000000 00000000 00000000 00000000
49// OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000
50// OBJDUMP-NEXT: 00f0 0000ac00 80000000 00000000 00000000
51
52.text
53// ASM: .text
54
55.amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack"
56// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack"
57
58.p2align 8
59.type minimal,@function
60minimal:
61  s_endpgm
62
63.p2align 8
64.type complete,@function
65complete:
66  s_endpgm
67
68.p2align 8
69.type special_sgpr,@function
70special_sgpr:
71  s_endpgm
72
73.p2align 8
74.type disabled_user_sgpr,@function
75disabled_user_sgpr:
76  s_endpgm
77
78.rodata
79// ASM: .rodata
80
81// Test that only specifying required directives is allowed, and that defaulted
82// values are omitted.
83.p2align 6
84.amdhsa_kernel minimal
85  .amdhsa_next_free_vgpr 0
86  .amdhsa_next_free_sgpr 0
87.end_amdhsa_kernel
88
89// ASM: .amdhsa_kernel minimal
90// ASM: .amdhsa_next_free_vgpr 0
91// ASM-NEXT: .amdhsa_next_free_sgpr 0
92// ASM: .end_amdhsa_kernel
93
94// Test that we can specify all available directives with non-default values.
95.p2align 6
96.amdhsa_kernel complete
97  .amdhsa_group_segment_fixed_size 1
98  .amdhsa_private_segment_fixed_size 1
99  .amdhsa_user_sgpr_private_segment_buffer 1
100  .amdhsa_user_sgpr_dispatch_ptr 1
101  .amdhsa_user_sgpr_queue_ptr 1
102  .amdhsa_user_sgpr_kernarg_segment_ptr 1
103  .amdhsa_user_sgpr_dispatch_id 1
104  .amdhsa_user_sgpr_flat_scratch_init 1
105  .amdhsa_user_sgpr_private_segment_size 1
106  .amdhsa_system_sgpr_private_segment_wavefront_offset 1
107  .amdhsa_system_sgpr_workgroup_id_x 0
108  .amdhsa_system_sgpr_workgroup_id_y 1
109  .amdhsa_system_sgpr_workgroup_id_z 1
110  .amdhsa_system_sgpr_workgroup_info 1
111  .amdhsa_system_vgpr_workitem_id 1
112  .amdhsa_next_free_vgpr 9
113  .amdhsa_next_free_sgpr 27
114  .amdhsa_reserve_vcc 0
115  .amdhsa_reserve_flat_scratch 0
116  .amdhsa_reserve_xnack_mask 0
117  .amdhsa_float_round_mode_32 1
118  .amdhsa_float_round_mode_16_64 1
119  .amdhsa_float_denorm_mode_32 1
120  .amdhsa_float_denorm_mode_16_64 0
121  .amdhsa_dx10_clamp 0
122  .amdhsa_ieee_mode 0
123  .amdhsa_fp16_overflow 1
124  .amdhsa_exception_fp_ieee_invalid_op 1
125  .amdhsa_exception_fp_denorm_src 1
126  .amdhsa_exception_fp_ieee_div_zero 1
127  .amdhsa_exception_fp_ieee_overflow 1
128  .amdhsa_exception_fp_ieee_underflow 1
129  .amdhsa_exception_fp_ieee_inexact 1
130  .amdhsa_exception_int_div_zero 1
131.end_amdhsa_kernel
132
133// ASM: .amdhsa_kernel complete
134// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
135// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
136// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
137// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
138// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
139// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
140// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
141// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
142// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
143// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
144// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
145// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
146// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
147// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
148// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
149// ASM-NEXT: .amdhsa_next_free_vgpr 9
150// ASM-NEXT: .amdhsa_next_free_sgpr 27
151// ASM-NEXT: .amdhsa_reserve_vcc 0
152// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
153// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
154// ASM-NEXT: .amdhsa_float_round_mode_32 1
155// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
156// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
157// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
158// ASM-NEXT: .amdhsa_dx10_clamp 0
159// ASM-NEXT: .amdhsa_ieee_mode 0
160// ASM-NEXT: .amdhsa_fp16_overflow 1
161// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
162// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
163// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
164// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
165// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
166// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
167// ASM-NEXT: .amdhsa_exception_int_div_zero 1
168// ASM-NEXT: .end_amdhsa_kernel
169
170// Test that we are including special SGPR usage in the granulated count.
171.p2align 6
172.amdhsa_kernel special_sgpr
173  // Same next_free_sgpr as "complete", but...
174  .amdhsa_next_free_sgpr 27
175  // ...on GFX9 this should require an additional 6 SGPRs, pushing us from
176  // 3 granules to 4
177  .amdhsa_reserve_flat_scratch 1
178
179  .amdhsa_reserve_vcc 0
180  .amdhsa_reserve_xnack_mask 0
181
182  .amdhsa_float_denorm_mode_16_64 0
183  .amdhsa_dx10_clamp 0
184  .amdhsa_ieee_mode 0
185  .amdhsa_next_free_vgpr 0
186.end_amdhsa_kernel
187
188// ASM: .amdhsa_kernel special_sgpr
189// ASM: .amdhsa_next_free_vgpr 0
190// ASM-NEXT: .amdhsa_next_free_sgpr 27
191// ASM-NEXT: .amdhsa_reserve_vcc 0
192// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
193// ASM: .amdhsa_float_denorm_mode_16_64 0
194// ASM-NEXT: .amdhsa_dx10_clamp 0
195// ASM-NEXT: .amdhsa_ieee_mode 0
196// ASM: .end_amdhsa_kernel
197
198// Test that explicitly disabling user_sgpr's does not affect the user_sgpr
199// count, i.e. this should produce the same descriptor as minimal.
200.p2align 6
201.amdhsa_kernel disabled_user_sgpr
202  .amdhsa_user_sgpr_private_segment_buffer 0
203  .amdhsa_next_free_vgpr 0
204  .amdhsa_next_free_sgpr 0
205.end_amdhsa_kernel
206
207// ASM: .amdhsa_kernel disabled_user_sgpr
208// ASM: .amdhsa_next_free_vgpr 0
209// ASM-NEXT: .amdhsa_next_free_sgpr 0
210// ASM: .end_amdhsa_kernel
211
212.section .foo
213
214.byte .amdgcn.gfx_generation_number
215// ASM: .byte 9
216
217.byte .amdgcn.gfx_generation_minor
218// ASM: .byte 0
219
220.byte .amdgcn.gfx_generation_stepping
221// ASM: .byte 4
222
223.byte .amdgcn.next_free_vgpr
224// ASM: .byte 0
225.byte .amdgcn.next_free_sgpr
226// ASM: .byte 0
227
228v_mov_b32_e32 v7, s10
229
230.byte .amdgcn.next_free_vgpr
231// ASM: .byte 8
232.byte .amdgcn.next_free_sgpr
233// ASM: .byte 11
234
235.set .amdgcn.next_free_vgpr, 0
236.set .amdgcn.next_free_sgpr, 0
237
238.byte .amdgcn.next_free_vgpr
239// ASM: .byte 0
240.byte .amdgcn.next_free_sgpr
241// ASM: .byte 0
242
243v_mov_b32_e32 v16, s3
244
245.byte .amdgcn.next_free_vgpr
246// ASM: .byte 17
247.byte .amdgcn.next_free_sgpr
248// ASM: .byte 4
249
250// Metadata
251
252.amdgpu_metadata
253  amdhsa.version:
254    - 3
255    - 0
256  amdhsa.kernels:
257    - .name:       amd_kernel_code_t_test_all
258      .symbol: amd_kernel_code_t_test_all@kd
259      .kernarg_segment_size: 8
260      .group_segment_fixed_size: 16
261      .private_segment_fixed_size: 32
262      .kernarg_segment_align: 64
263      .wavefront_size: 128
264      .sgpr_count: 14
265      .vgpr_count: 40
266      .max_flat_workgroup_size: 256
267    - .name:       amd_kernel_code_t_minimal
268      .symbol: amd_kernel_code_t_minimal@kd
269      .kernarg_segment_size: 8
270      .group_segment_fixed_size: 16
271      .private_segment_fixed_size: 32
272      .kernarg_segment_align: 64
273      .wavefront_size: 128
274      .sgpr_count: 14
275      .vgpr_count: 40
276      .max_flat_workgroup_size: 256
277.end_amdgpu_metadata
278
279// ASM:      	.amdgpu_metadata
280// ASM:      amdhsa.kernels:
281// ASM:        - .group_segment_fixed_size: 16
282// ASM:          .kernarg_segment_align: 64
283// ASM:          .kernarg_segment_size: 8
284// ASM:          .max_flat_workgroup_size: 256
285// ASM:          .name:           amd_kernel_code_t_test_all
286// ASM:          .private_segment_fixed_size: 32
287// ASM:          .sgpr_count:     14
288// ASM:          .symbol:         'amd_kernel_code_t_test_all@kd'
289// ASM:          .vgpr_count:     40
290// ASM:          .wavefront_size: 128
291// ASM:        - .group_segment_fixed_size: 16
292// ASM:          .kernarg_segment_align: 64
293// ASM:          .kernarg_segment_size: 8
294// ASM:          .max_flat_workgroup_size: 256
295// ASM:          .name:           amd_kernel_code_t_minimal
296// ASM:          .private_segment_fixed_size: 32
297// ASM:          .sgpr_count:     14
298// ASM:          .symbol:         'amd_kernel_code_t_minimal@kd'
299// ASM:          .vgpr_count:     40
300// ASM:          .wavefront_size: 128
301// ASM:      amdhsa.version:
302// ASM-NEXT:   - 3
303// ASM-NEXT:   - 0
304// ASM:      	.end_amdgpu_metadata
305