1// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
2// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=3 -mattr=+xnack -filetype=obj < %s > %t
3// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
4// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
5
6// READOBJ: Section Headers
7// READOBJ: .text   PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
8// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}}        0000c0 {{[0-9]+}}  A {{[0-9]+}} {{[0-9]+}} 64
9
10// READOBJ: Relocation section '.rela.rodata' at offset
11// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
12// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
13// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
14
15// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
16// READOBJ:      0000000000000000  0 FUNC    LOCAL  PROTECTED 2 minimal
17// READOBJ-NEXT: 0000000000000100  0 FUNC    LOCAL  PROTECTED 2 complete
18// READOBJ-NEXT: 0000000000000200  0 FUNC    LOCAL  PROTECTED 2 special_sgpr
19// READOBJ-NEXT: 0000000000000000 64 OBJECT  LOCAL  DEFAULT   3 minimal.kd
20// READOBJ-NEXT: 0000000000000040 64 OBJECT  LOCAL  DEFAULT   3 complete.kd
21// READOBJ-NEXT: 0000000000000080 64 OBJECT  LOCAL  DEFAULT   3 special_sgpr.kd
22
23// OBJDUMP: Contents of section .rodata
24// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
25// minimal
26// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
27// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
28// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
29// OBJDUMP-NEXT: 0030 0000ac60 80000000 00000000 00000000
30// complete
31// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
32// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
33// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
34// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f040000 00000000
35// special_sgpr
36// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
37// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
38// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
39// OBJDUMP-NEXT: 00b0 00000060 80000000 00000000 00000000
40
41.text
42// ASM: .text
43
44.amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
45// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
46
47.p2align 8
48.type minimal,@function
49minimal:
50  s_endpgm
51
52.p2align 8
53.type complete,@function
54complete:
55  s_endpgm
56
57.p2align 8
58.type special_sgpr,@function
59special_sgpr:
60  s_endpgm
61
62.rodata
63// ASM: .rodata
64
65// Test that only specifying required directives is allowed, and that defaulted
66// values are omitted.
67.p2align 6
68.amdhsa_kernel minimal
69  .amdhsa_next_free_vgpr 0
70  .amdhsa_next_free_sgpr 0
71.end_amdhsa_kernel
72
73// ASM: .amdhsa_kernel minimal
74// ASM: .amdhsa_next_free_vgpr 0
75// ASM-NEXT: .amdhsa_next_free_sgpr 0
76// ASM: .end_amdhsa_kernel
77
78// Test that we can specify all available directives with non-default values.
79.p2align 6
80.amdhsa_kernel complete
81  .amdhsa_group_segment_fixed_size 1
82  .amdhsa_private_segment_fixed_size 1
83  .amdhsa_kernarg_size 8
84  .amdhsa_user_sgpr_private_segment_buffer 1
85  .amdhsa_user_sgpr_dispatch_ptr 1
86  .amdhsa_user_sgpr_queue_ptr 1
87  .amdhsa_user_sgpr_kernarg_segment_ptr 1
88  .amdhsa_user_sgpr_dispatch_id 1
89  .amdhsa_user_sgpr_flat_scratch_init 1
90  .amdhsa_user_sgpr_private_segment_size 1
91  .amdhsa_wavefront_size32 1
92  .amdhsa_system_sgpr_private_segment_wavefront_offset 1
93  .amdhsa_system_sgpr_workgroup_id_x 0
94  .amdhsa_system_sgpr_workgroup_id_y 1
95  .amdhsa_system_sgpr_workgroup_id_z 1
96  .amdhsa_system_sgpr_workgroup_info 1
97  .amdhsa_system_vgpr_workitem_id 1
98  .amdhsa_next_free_vgpr 9
99  .amdhsa_next_free_sgpr 27
100  .amdhsa_reserve_vcc 0
101  .amdhsa_reserve_flat_scratch 0
102  .amdhsa_reserve_xnack_mask 1
103  .amdhsa_float_round_mode_32 1
104  .amdhsa_float_round_mode_16_64 1
105  .amdhsa_float_denorm_mode_32 1
106  .amdhsa_float_denorm_mode_16_64 0
107  .amdhsa_dx10_clamp 0
108  .amdhsa_ieee_mode 0
109  .amdhsa_fp16_overflow 1
110  .amdhsa_workgroup_processor_mode 1
111  .amdhsa_memory_ordered 1
112  .amdhsa_forward_progress 1
113  .amdhsa_exception_fp_ieee_invalid_op 1
114  .amdhsa_exception_fp_denorm_src 1
115  .amdhsa_exception_fp_ieee_div_zero 1
116  .amdhsa_exception_fp_ieee_overflow 1
117  .amdhsa_exception_fp_ieee_underflow 1
118  .amdhsa_exception_fp_ieee_inexact 1
119  .amdhsa_exception_int_div_zero 1
120.end_amdhsa_kernel
121
122// ASM: .amdhsa_kernel complete
123// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
124// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
125// ASM-NEXT: .amdhsa_kernarg_size 8
126// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
127// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
128// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
129// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
130// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
131// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
132// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
133// ASM-NEXT: .amdhsa_wavefront_size32 1
134// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
135// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
136// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
137// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
138// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
139// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
140// ASM-NEXT: .amdhsa_next_free_vgpr 9
141// ASM-NEXT: .amdhsa_next_free_sgpr 27
142// ASM-NEXT: .amdhsa_reserve_vcc 0
143// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
144// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
145// ASM-NEXT: .amdhsa_float_round_mode_32 1
146// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
147// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
148// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
149// ASM-NEXT: .amdhsa_dx10_clamp 0
150// ASM-NEXT: .amdhsa_ieee_mode 0
151// ASM-NEXT: .amdhsa_fp16_overflow 1
152// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
153// ASM-NEXT: .amdhsa_memory_ordered 1
154// ASM-NEXT: .amdhsa_forward_progress 1
155// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
156// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
157// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
158// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
159// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
160// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
161// ASM-NEXT: .amdhsa_exception_int_div_zero 1
162// ASM-NEXT: .end_amdhsa_kernel
163
164// Test that we are including special SGPR usage in the granulated count.
165.p2align 6
166.amdhsa_kernel special_sgpr
167  // Same next_free_sgpr as "complete", but...
168  .amdhsa_next_free_sgpr 27
169  // ...on GFX10+ this should require an additional 6 SGPRs, pushing us from
170  // 3 granules to 4
171  .amdhsa_reserve_flat_scratch 1
172
173  .amdhsa_reserve_vcc 0
174  .amdhsa_reserve_xnack_mask 1
175
176  .amdhsa_float_denorm_mode_16_64 0
177  .amdhsa_dx10_clamp 0
178  .amdhsa_ieee_mode 0
179  .amdhsa_next_free_vgpr 0
180.end_amdhsa_kernel
181
182// ASM: .amdhsa_kernel special_sgpr
183// ASM: .amdhsa_next_free_vgpr 0
184// ASM-NEXT: .amdhsa_next_free_sgpr 27
185// ASM-NEXT: .amdhsa_reserve_vcc 0
186// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
187// ASM: .amdhsa_float_denorm_mode_16_64 0
188// ASM-NEXT: .amdhsa_dx10_clamp 0
189// ASM-NEXT: .amdhsa_ieee_mode 0
190// ASM: .end_amdhsa_kernel
191
192.section .foo
193
194.byte .amdgcn.gfx_generation_number
195// ASM: .byte 10
196
197.byte .amdgcn.next_free_vgpr
198// ASM: .byte 0
199.byte .amdgcn.next_free_sgpr
200// ASM: .byte 0
201
202v_mov_b32_e32 v7, s10
203
204.byte .amdgcn.next_free_vgpr
205// ASM: .byte 8
206.byte .amdgcn.next_free_sgpr
207// ASM: .byte 11
208
209.set .amdgcn.next_free_vgpr, 0
210.set .amdgcn.next_free_sgpr, 0
211
212.byte .amdgcn.next_free_vgpr
213// ASM: .byte 0
214.byte .amdgcn.next_free_sgpr
215// ASM: .byte 0
216
217v_mov_b32_e32 v16, s3
218
219.byte .amdgcn.next_free_vgpr
220// ASM: .byte 17
221.byte .amdgcn.next_free_sgpr
222// ASM: .byte 4
223