1// RUN: llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
2// RUN: llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -filetype=obj < %s > %t
3// RUN: llvm-readobj -elf-output-style=GNU -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s
4// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
5
6// big endian not supported
7// XFAIL: host-byteorder-big-endian
8
9// READOBJ: Section Headers
10// READOBJ: .text   PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
11// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}}        0000c0 {{[0-9]+}}  A {{[0-9]+}} {{[0-9]+}} 64
12
13// READOBJ: Relocation section '.rela.rodata' at offset
14// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
15// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
16// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
17
18// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
19// READOBJ: {{[0-9]+}}: 0000000000000100  0 FUNC    LOCAL  PROTECTED 2 complete
20// READOBJ: {{[0-9]+}}: 0000000000000040 64 OBJECT  LOCAL  DEFAULT   3 complete.kd
21// READOBJ: {{[0-9]+}}: 0000000000000000  0 FUNC    LOCAL  PROTECTED 2 minimal
22// READOBJ: {{[0-9]+}}: 0000000000000000 64 OBJECT  LOCAL  DEFAULT   3 minimal.kd
23// READOBJ: {{[0-9]+}}: 0000000000000200  0 FUNC    LOCAL  PROTECTED 2 special_sgpr
24// READOBJ: {{[0-9]+}}: 0000000000000080 64 OBJECT  LOCAL  DEFAULT   3 special_sgpr.kd
25
26// OBJDUMP: Contents of section .rodata
27// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
28// minimal
29// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
30// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
31// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
32// OBJDUMP-NEXT: 0030 0000ac60 80000000 00000000 00000000
33// complete
34// OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000
35// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
36// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
37// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f040000 00000000
38// special_sgpr
39// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
40// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
41// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
42// OBJDUMP-NEXT: 00b0 00000060 80000000 00000000 00000000
43
44.text
45// ASM: .text
46
47.amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
48// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
49
50.p2align 8
51.type minimal,@function
52minimal:
53  s_endpgm
54
55.p2align 8
56.type complete,@function
57complete:
58  s_endpgm
59
60.p2align 8
61.type special_sgpr,@function
62special_sgpr:
63  s_endpgm
64
65.rodata
66// ASM: .rodata
67
68// Test that only specifying required directives is allowed, and that defaulted
69// values are omitted.
70.p2align 6
71.amdhsa_kernel minimal
72  .amdhsa_next_free_vgpr 0
73  .amdhsa_next_free_sgpr 0
74.end_amdhsa_kernel
75
76// ASM: .amdhsa_kernel minimal
77// ASM: .amdhsa_next_free_vgpr 0
78// ASM-NEXT: .amdhsa_next_free_sgpr 0
79// ASM: .end_amdhsa_kernel
80
81// Test that we can specify all available directives with non-default values.
82.p2align 6
83.amdhsa_kernel complete
84  .amdhsa_group_segment_fixed_size 1
85  .amdhsa_private_segment_fixed_size 1
86  .amdhsa_user_sgpr_private_segment_buffer 1
87  .amdhsa_user_sgpr_dispatch_ptr 1
88  .amdhsa_user_sgpr_queue_ptr 1
89  .amdhsa_user_sgpr_kernarg_segment_ptr 1
90  .amdhsa_user_sgpr_dispatch_id 1
91  .amdhsa_user_sgpr_flat_scratch_init 1
92  .amdhsa_user_sgpr_private_segment_size 1
93  .amdhsa_wavefront_size32 1
94  .amdhsa_system_sgpr_private_segment_wavefront_offset 1
95  .amdhsa_system_sgpr_workgroup_id_x 0
96  .amdhsa_system_sgpr_workgroup_id_y 1
97  .amdhsa_system_sgpr_workgroup_id_z 1
98  .amdhsa_system_sgpr_workgroup_info 1
99  .amdhsa_system_vgpr_workitem_id 1
100  .amdhsa_next_free_vgpr 9
101  .amdhsa_next_free_sgpr 27
102  .amdhsa_reserve_vcc 0
103  .amdhsa_reserve_flat_scratch 0
104  .amdhsa_reserve_xnack_mask 0
105  .amdhsa_float_round_mode_32 1
106  .amdhsa_float_round_mode_16_64 1
107  .amdhsa_float_denorm_mode_32 1
108  .amdhsa_float_denorm_mode_16_64 0
109  .amdhsa_dx10_clamp 0
110  .amdhsa_ieee_mode 0
111  .amdhsa_fp16_overflow 1
112  .amdhsa_workgroup_processor_mode 1
113  .amdhsa_memory_ordered 1
114  .amdhsa_forward_progress 1
115  .amdhsa_exception_fp_ieee_invalid_op 1
116  .amdhsa_exception_fp_denorm_src 1
117  .amdhsa_exception_fp_ieee_div_zero 1
118  .amdhsa_exception_fp_ieee_overflow 1
119  .amdhsa_exception_fp_ieee_underflow 1
120  .amdhsa_exception_fp_ieee_inexact 1
121  .amdhsa_exception_int_div_zero 1
122.end_amdhsa_kernel
123
124// ASM: .amdhsa_kernel complete
125// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
126// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
127// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
128// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
129// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
130// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
131// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
132// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
133// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
134// ASM-NEXT: .amdhsa_wavefront_size32 1
135// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
136// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
137// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
138// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
139// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
140// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
141// ASM-NEXT: .amdhsa_next_free_vgpr 9
142// ASM-NEXT: .amdhsa_next_free_sgpr 27
143// ASM-NEXT: .amdhsa_reserve_vcc 0
144// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
145// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
146// ASM-NEXT: .amdhsa_float_round_mode_32 1
147// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
148// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
149// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
150// ASM-NEXT: .amdhsa_dx10_clamp 0
151// ASM-NEXT: .amdhsa_ieee_mode 0
152// ASM-NEXT: .amdhsa_fp16_overflow 1
153// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
154// ASM-NEXT: .amdhsa_memory_ordered 1
155// ASM-NEXT: .amdhsa_forward_progress 1
156// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
157// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
158// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
159// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
160// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
161// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
162// ASM-NEXT: .amdhsa_exception_int_div_zero 1
163// ASM-NEXT: .end_amdhsa_kernel
164
165// Test that we are including special SGPR usage in the granulated count.
166.p2align 6
167.amdhsa_kernel special_sgpr
168  // Same next_free_sgpr as "complete", but...
169  .amdhsa_next_free_sgpr 27
170  // ...on GFX10+ this should require an additional 6 SGPRs, pushing us from
171  // 3 granules to 4
172  .amdhsa_reserve_flat_scratch 1
173
174  .amdhsa_reserve_vcc 0
175  .amdhsa_reserve_xnack_mask 0
176
177  .amdhsa_float_denorm_mode_16_64 0
178  .amdhsa_dx10_clamp 0
179  .amdhsa_ieee_mode 0
180  .amdhsa_next_free_vgpr 0
181.end_amdhsa_kernel
182
183// ASM: .amdhsa_kernel special_sgpr
184// ASM: .amdhsa_next_free_vgpr 0
185// ASM-NEXT: .amdhsa_next_free_sgpr 27
186// ASM-NEXT: .amdhsa_reserve_vcc 0
187// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
188// ASM: .amdhsa_float_denorm_mode_16_64 0
189// ASM-NEXT: .amdhsa_dx10_clamp 0
190// ASM-NEXT: .amdhsa_ieee_mode 0
191// ASM: .end_amdhsa_kernel
192
193.section .foo
194
195.byte .amdgcn.gfx_generation_number
196// ASM: .byte 10
197
198.byte .amdgcn.next_free_vgpr
199// ASM: .byte 0
200.byte .amdgcn.next_free_sgpr
201// ASM: .byte 0
202
203v_mov_b32_e32 v7, s10
204
205.byte .amdgcn.next_free_vgpr
206// ASM: .byte 8
207.byte .amdgcn.next_free_sgpr
208// ASM: .byte 11
209
210.set .amdgcn.next_free_vgpr, 0
211.set .amdgcn.next_free_sgpr, 0
212
213.byte .amdgcn.next_free_vgpr
214// ASM: .byte 0
215.byte .amdgcn.next_free_sgpr
216// ASM: .byte 0
217
218v_mov_b32_e32 v16, s3
219
220.byte .amdgcn.next_free_vgpr
221// ASM: .byte 17
222.byte .amdgcn.next_free_sgpr
223// ASM: .byte 4
224