1 /*
2  * Copyright 2020 Advanced Micro Devices, Inc.
3  * Copyright 2020 Valve Corporation
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * on the rights to use, copy, modify, merge, publish, distribute, sub
10  * license, and/or sell copies of the Software, and to permit persons to whom
11  * the Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23  * USE OR OTHER DEALINGS IN THE SOFTWARE.
24  */
25 
26 #ifndef AC_SQTT_H
27 #define AC_SQTT_H
28 
29 #include <stdint.h>
30 #include <stdbool.h>
31 
32 #include <assert.h>
33 #include "c11_compat.h"
34 #include "ac_rgp.h"
35 
36 struct radeon_cmdbuf;
37 struct radeon_info;
38 
39 struct ac_thread_trace_data {
40    struct radeon_cmdbuf *start_cs[2];
41    struct radeon_cmdbuf *stop_cs[2];
42    /* struct radeon_winsys_bo or struct pb_buffer */
43    void *bo;
44    void *ptr;
45    uint32_t buffer_size;
46    int start_frame;
47    char *trigger_file;
48 
49    struct rgp_code_object rgp_code_object;
50    struct rgp_loader_events rgp_loader_events;
51    struct rgp_pso_correlation rgp_pso_correlation;
52 };
53 
54 #define SQTT_BUFFER_ALIGN_SHIFT 12
55 
56 struct ac_thread_trace_info {
57    uint32_t cur_offset;
58    uint32_t trace_status;
59    union {
60       uint32_t gfx9_write_counter;
61       uint32_t gfx10_dropped_cntr;
62    };
63 };
64 
65 struct ac_thread_trace_se {
66    struct ac_thread_trace_info info;
67    void *data_ptr;
68    uint32_t shader_engine;
69    uint32_t compute_unit;
70 };
71 
72 struct ac_thread_trace {
73    struct ac_thread_trace_data *data;
74    uint32_t num_traces;
75    struct ac_thread_trace_se traces[4];
76 };
77 
78 uint64_t
79 ac_thread_trace_get_info_offset(unsigned se);
80 
81 uint64_t
82 ac_thread_trace_get_data_offset(const struct radeon_info *rad_info,
83                                 const struct ac_thread_trace_data *data, unsigned se);
84 uint64_t
85 ac_thread_trace_get_info_va(uint64_t va, unsigned se);
86 
87 uint64_t
88 ac_thread_trace_get_data_va(const struct radeon_info *rad_info,
89                             const struct ac_thread_trace_data *data, uint64_t va, unsigned se);
90 
91 bool
92 ac_is_thread_trace_complete(struct radeon_info *rad_info,
93                             const struct ac_thread_trace_data *data,
94                             const struct ac_thread_trace_info *info);
95 
96 uint32_t
97 ac_get_expected_buffer_size(struct radeon_info *rad_info,
98                             const struct ac_thread_trace_info *info);
99 
100 /**
101  * Identifiers for RGP SQ thread-tracing markers (Table 1)
102  */
103 enum rgp_sqtt_marker_identifier
104 {
105    RGP_SQTT_MARKER_IDENTIFIER_EVENT = 0x0,
106    RGP_SQTT_MARKER_IDENTIFIER_CB_START = 0x1,
107    RGP_SQTT_MARKER_IDENTIFIER_CB_END = 0x2,
108    RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START = 0x3,
109    RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END = 0x4,
110    RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT = 0x5,
111    RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API = 0x6,
112    RGP_SQTT_MARKER_IDENTIFIER_SYNC = 0x7,
113    RGP_SQTT_MARKER_IDENTIFIER_PRESENT = 0x8,
114    RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION = 0x9,
115    RGP_SQTT_MARKER_IDENTIFIER_RENDER_PASS = 0xA,
116    RGP_SQTT_MARKER_IDENTIFIER_RESERVED2 = 0xB,
117    RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE = 0xC,
118    RGP_SQTT_MARKER_IDENTIFIER_RESERVED4 = 0xD,
119    RGP_SQTT_MARKER_IDENTIFIER_RESERVED5 = 0xE,
120    RGP_SQTT_MARKER_IDENTIFIER_RESERVED6 = 0xF
121 };
122 
123 /**
124  * RGP SQ thread-tracing marker for the start of a command buffer. (Table 2)
125  */
126 struct rgp_sqtt_marker_cb_start {
127    union {
128       struct {
129          uint32_t identifier : 4;
130          uint32_t ext_dwords : 3;
131          uint32_t cb_id : 20;
132          uint32_t queue : 5;
133       };
134       uint32_t dword01;
135    };
136    union {
137       uint32_t device_id_low;
138       uint32_t dword02;
139    };
140    union {
141       uint32_t device_id_high;
142       uint32_t dword03;
143    };
144    union {
145       uint32_t queue_flags;
146       uint32_t dword04;
147    };
148 };
149 
150 static_assert(sizeof(struct rgp_sqtt_marker_cb_start) == 16,
151               "rgp_sqtt_marker_cb_start doesn't match RGP spec");
152 
153 /**
154  *
155  * RGP SQ thread-tracing marker for the end of a command buffer. (Table 3)
156  */
157 struct rgp_sqtt_marker_cb_end {
158    union {
159       struct {
160          uint32_t identifier : 4;
161          uint32_t ext_dwords : 3;
162          uint32_t cb_id : 20;
163          uint32_t reserved : 5;
164       };
165       uint32_t dword01;
166    };
167    union {
168       uint32_t device_id_low;
169       uint32_t dword02;
170    };
171    union {
172       uint32_t device_id_high;
173       uint32_t dword03;
174    };
175 };
176 
177 static_assert(sizeof(struct rgp_sqtt_marker_cb_end) == 12,
178               "rgp_sqtt_marker_cb_end doesn't match RGP spec");
179 
180 /**
181  * API types used in RGP SQ thread-tracing markers for the "General API"
182  * packet.
183  */
184 enum rgp_sqtt_marker_general_api_type
185 {
186    ApiCmdBindPipeline = 0,
187    ApiCmdBindDescriptorSets = 1,
188    ApiCmdBindIndexBuffer = 2,
189    ApiCmdBindVertexBuffers = 3,
190    ApiCmdDraw = 4,
191    ApiCmdDrawIndexed = 5,
192    ApiCmdDrawIndirect = 6,
193    ApiCmdDrawIndexedIndirect = 7,
194    ApiCmdDrawIndirectCountAMD = 8,
195    ApiCmdDrawIndexedIndirectCountAMD = 9,
196    ApiCmdDispatch = 10,
197    ApiCmdDispatchIndirect = 11,
198    ApiCmdCopyBuffer = 12,
199    ApiCmdCopyImage = 13,
200    ApiCmdBlitImage = 14,
201    ApiCmdCopyBufferToImage = 15,
202    ApiCmdCopyImageToBuffer = 16,
203    ApiCmdUpdateBuffer = 17,
204    ApiCmdFillBuffer = 18,
205    ApiCmdClearColorImage = 19,
206    ApiCmdClearDepthStencilImage = 20,
207    ApiCmdClearAttachments = 21,
208    ApiCmdResolveImage = 22,
209    ApiCmdWaitEvents = 23,
210    ApiCmdPipelineBarrier = 24,
211    ApiCmdBeginQuery = 25,
212    ApiCmdEndQuery = 26,
213    ApiCmdResetQueryPool = 27,
214    ApiCmdWriteTimestamp = 28,
215    ApiCmdCopyQueryPoolResults = 29,
216    ApiCmdPushConstants = 30,
217    ApiCmdBeginRenderPass = 31,
218    ApiCmdNextSubpass = 32,
219    ApiCmdEndRenderPass = 33,
220    ApiCmdExecuteCommands = 34,
221    ApiCmdSetViewport = 35,
222    ApiCmdSetScissor = 36,
223    ApiCmdSetLineWidth = 37,
224    ApiCmdSetDepthBias = 38,
225    ApiCmdSetBlendConstants = 39,
226    ApiCmdSetDepthBounds = 40,
227    ApiCmdSetStencilCompareMask = 41,
228    ApiCmdSetStencilWriteMask = 42,
229    ApiCmdSetStencilReference = 43,
230    ApiCmdDrawIndirectCount = 44,
231    ApiCmdDrawIndexedIndirectCount = 45,
232    ApiInvalid = 0xffffffff
233 };
234 
235 /**
236  * RGP SQ thread-tracing marker for a "General API" instrumentation packet.
237  */
238 struct rgp_sqtt_marker_general_api {
239    union {
240       struct {
241          uint32_t identifier : 4;
242          uint32_t ext_dwords : 3;
243          uint32_t api_type : 20;
244          uint32_t is_end : 1;
245          uint32_t reserved : 4;
246       };
247       uint32_t dword01;
248    };
249 };
250 
251 static_assert(sizeof(struct rgp_sqtt_marker_general_api) == 4,
252               "rgp_sqtt_marker_general_api doesn't match RGP spec");
253 
254 /**
255  * API types used in RGP SQ thread-tracing markers (Table 16).
256  */
257 enum rgp_sqtt_marker_event_type
258 {
259    EventCmdDraw = 0,
260    EventCmdDrawIndexed = 1,
261    EventCmdDrawIndirect = 2,
262    EventCmdDrawIndexedIndirect = 3,
263    EventCmdDrawIndirectCountAMD = 4,
264    EventCmdDrawIndexedIndirectCountAMD = 5,
265    EventCmdDispatch = 6,
266    EventCmdDispatchIndirect = 7,
267    EventCmdCopyBuffer = 8,
268    EventCmdCopyImage = 9,
269    EventCmdBlitImage = 10,
270    EventCmdCopyBufferToImage = 11,
271    EventCmdCopyImageToBuffer = 12,
272    EventCmdUpdateBuffer = 13,
273    EventCmdFillBuffer = 14,
274    EventCmdClearColorImage = 15,
275    EventCmdClearDepthStencilImage = 16,
276    EventCmdClearAttachments = 17,
277    EventCmdResolveImage = 18,
278    EventCmdWaitEvents = 19,
279    EventCmdPipelineBarrier = 20,
280    EventCmdResetQueryPool = 21,
281    EventCmdCopyQueryPoolResults = 22,
282    EventRenderPassColorClear = 23,
283    EventRenderPassDepthStencilClear = 24,
284    EventRenderPassResolve = 25,
285    EventInternalUnknown = 26,
286    EventCmdDrawIndirectCount = 27,
287    EventCmdDrawIndexedIndirectCount = 28,
288    EventInvalid = 0xffffffff
289 };
290 
291 /**
292  * "Event (Per-draw/dispatch)" RGP SQ thread-tracing marker. (Table 4)
293  */
294 struct rgp_sqtt_marker_event {
295    union {
296       struct {
297          uint32_t identifier : 4;
298          uint32_t ext_dwords : 3;
299          uint32_t api_type : 24;
300          uint32_t has_thread_dims : 1;
301       };
302       uint32_t dword01;
303    };
304    union {
305       struct {
306          uint32_t cb_id : 20;
307          uint32_t vertex_offset_reg_idx : 4;
308          uint32_t instance_offset_reg_idx : 4;
309          uint32_t draw_index_reg_idx : 4;
310       };
311       uint32_t dword02;
312    };
313    union {
314       uint32_t cmd_id;
315       uint32_t dword03;
316    };
317 };
318 
319 static_assert(sizeof(struct rgp_sqtt_marker_event) == 12,
320               "rgp_sqtt_marker_event doesn't match RGP spec");
321 
322 /**
323  * Per-dispatch specific marker where workgroup dims are included.
324  */
325 struct rgp_sqtt_marker_event_with_dims {
326    struct rgp_sqtt_marker_event event;
327    uint32_t thread_x;
328    uint32_t thread_y;
329    uint32_t thread_z;
330 };
331 
332 static_assert(sizeof(struct rgp_sqtt_marker_event_with_dims) == 24,
333               "rgp_sqtt_marker_event_with_dims doesn't match RGP spec");
334 
335 /**
336  * "Barrier Start" RGP SQTT instrumentation marker (Table 5)
337  */
338 struct rgp_sqtt_marker_barrier_start {
339    union {
340       struct {
341          uint32_t identifier : 4;
342          uint32_t ext_dwords : 3;
343          uint32_t cb_id : 20;
344          uint32_t reserved : 5;
345       };
346       uint32_t dword01;
347    };
348    union {
349       struct {
350          uint32_t driver_reason : 31;
351          uint32_t internal : 1;
352       };
353       uint32_t dword02;
354    };
355 };
356 
357 static_assert(sizeof(struct rgp_sqtt_marker_barrier_start) == 8,
358               "rgp_sqtt_marker_barrier_start doesn't match RGP spec");
359 
360 /**
361  * "Barrier End" RGP SQTT instrumentation marker (Table 6)
362  */
363 struct rgp_sqtt_marker_barrier_end {
364    union {
365       struct {
366          uint32_t identifier : 4;
367          uint32_t ext_dwords : 3;
368          uint32_t cb_id : 20;
369          uint32_t wait_on_eop_ts : 1;
370          uint32_t vs_partial_flush : 1;
371          uint32_t ps_partial_flush : 1;
372          uint32_t cs_partial_flush : 1;
373          uint32_t pfp_sync_me : 1;
374       };
375       uint32_t dword01;
376    };
377    union {
378       struct {
379          uint32_t sync_cp_dma : 1;
380          uint32_t inval_tcp : 1;
381          uint32_t inval_sqI : 1;
382          uint32_t inval_sqK : 1;
383          uint32_t flush_tcc : 1;
384          uint32_t inval_tcc : 1;
385          uint32_t flush_cb : 1;
386          uint32_t inval_cb : 1;
387          uint32_t flush_db : 1;
388          uint32_t inval_db : 1;
389          uint32_t num_layout_transitions : 16;
390          uint32_t inval_gl1 : 1;
391          uint32_t reserved : 5;
392       };
393       uint32_t dword02;
394    };
395 };
396 
397 static_assert(sizeof(struct rgp_sqtt_marker_barrier_end) == 8,
398               "rgp_sqtt_marker_barrier_end doesn't match RGP spec");
399 
400 /**
401  * "Layout Transition" RGP SQTT instrumentation marker (Table 7)
402  */
403 struct rgp_sqtt_marker_layout_transition {
404    union {
405       struct {
406          uint32_t identifier : 4;
407          uint32_t ext_dwords : 3;
408          uint32_t depth_stencil_expand : 1;
409          uint32_t htile_hiz_range_expand : 1;
410          uint32_t depth_stencil_resummarize : 1;
411          uint32_t dcc_decompress : 1;
412          uint32_t fmask_decompress : 1;
413          uint32_t fast_clear_eliminate : 1;
414          uint32_t fmask_color_expand : 1;
415          uint32_t init_mask_ram : 1;
416          uint32_t reserved1 : 17;
417       };
418       uint32_t dword01;
419    };
420    union {
421       struct {
422          uint32_t reserved2 : 32;
423       };
424       uint32_t dword02;
425    };
426 };
427 
428 static_assert(sizeof(struct rgp_sqtt_marker_layout_transition) == 8,
429               "rgp_sqtt_marker_layout_transition doesn't match RGP spec");
430 
431 
432 /**
433  * "User Event" RGP SQTT instrumentation marker (Table 8)
434  */
435 struct rgp_sqtt_marker_user_event {
436    union {
437       struct {
438          uint32_t identifier : 4;
439          uint32_t reserved0 : 8;
440          uint32_t data_type : 8;
441          uint32_t reserved1 : 12;
442       };
443       uint32_t dword01;
444    };
445 };
446 struct rgp_sqtt_marker_user_event_with_length {
447    struct rgp_sqtt_marker_user_event user_event;
448    uint32_t length;
449 };
450 
451 static_assert(sizeof(struct rgp_sqtt_marker_user_event) == 4,
452               "rgp_sqtt_marker_user_event doesn't match RGP spec");
453 
454 enum rgp_sqtt_marker_user_event_type
455 {
456    UserEventTrigger = 0,
457    UserEventPop,
458    UserEventPush,
459    UserEventObjectName,
460 };
461 
462 /**
463  * "Pipeline bind" RGP SQTT instrumentation marker (Table 12)
464  */
465 struct rgp_sqtt_marker_pipeline_bind {
466    union {
467       struct {
468          uint32_t identifier : 4;
469          uint32_t ext_dwords : 3;
470          uint32_t bind_point : 1;
471          uint32_t cb_id : 20;
472          uint32_t reserved : 4;
473       };
474       uint32_t dword01;
475    };
476    union {
477       uint32_t api_pso_hash[2];
478       struct {
479          uint32_t dword02;
480          uint32_t dword03;
481       };
482    };
483 };
484 
485 static_assert(sizeof(struct rgp_sqtt_marker_pipeline_bind) == 12,
486               "rgp_sqtt_marker_pipeline_bind doesn't match RGP spec");
487 
488 
489 bool ac_sqtt_add_pso_correlation(struct ac_thread_trace_data *thread_trace_data,
490                                  uint64_t pipeline_hash);
491 
492 bool ac_sqtt_add_code_object_loader_event(struct ac_thread_trace_data *thread_trace_data,
493                                           uint64_t pipeline_hash,
494                                           uint64_t base_address);
495 
496 #endif
497