1 /*
2  * Copyright 2020 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  */
25 
26 
27 #include "si_pipe.h"
28 #include "si_build_pm4.h"
29 #include "si_compute.h"
30 
31 #include "ac_rgp.h"
32 #include "ac_sqtt.h"
33 #include "util/u_memory.h"
34 #include "tgsi/tgsi_from_mesa.h"
35 
36 static void
37 si_emit_spi_config_cntl(struct si_context* sctx,
38                         struct radeon_cmdbuf *cs, bool enable);
39 
40 static bool
si_thread_trace_init_bo(struct si_context * sctx)41 si_thread_trace_init_bo(struct si_context *sctx)
42 {
43    unsigned max_se = sctx->screen->info.max_se;
44    struct radeon_winsys *ws = sctx->ws;
45    uint64_t size;
46 
47    /* The buffer size and address need to be aligned in HW regs. Align the
48     * size as early as possible so that we do all the allocation & addressing
49     * correctly. */
50    sctx->thread_trace->buffer_size = align64(sctx->thread_trace->buffer_size,
51                                              1u << SQTT_BUFFER_ALIGN_SHIFT);
52 
53    /* Compute total size of the thread trace BO for all SEs. */
54    size = align64(sizeof(struct ac_thread_trace_info) * max_se,
55                   1 << SQTT_BUFFER_ALIGN_SHIFT);
56    size += sctx->thread_trace->buffer_size * (uint64_t)max_se;
57 
58    sctx->thread_trace->bo =
59       ws->buffer_create(ws, size, 4096,
60                         RADEON_DOMAIN_VRAM,
61                         RADEON_FLAG_NO_INTERPROCESS_SHARING |
62                         RADEON_FLAG_GTT_WC |
63                         RADEON_FLAG_NO_SUBALLOC);
64    if (!sctx->thread_trace->bo)
65       return false;
66 
67    return true;
68 }
69 
70 static bool
si_se_is_disabled(struct si_context * sctx,unsigned se)71 si_se_is_disabled(struct si_context* sctx, unsigned se)
72 {
73    /* No active CU on the SE means it is disabled. */
74    return sctx->screen->info.cu_mask[se][0] == 0;
75 }
76 
77 
78 static void
si_emit_thread_trace_start(struct si_context * sctx,struct radeon_cmdbuf * cs,uint32_t queue_family_index)79 si_emit_thread_trace_start(struct si_context* sctx,
80                            struct radeon_cmdbuf *cs,
81                            uint32_t queue_family_index)
82 {
83    struct si_screen *sscreen = sctx->screen;
84    uint32_t shifted_size = sctx->thread_trace->buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
85    unsigned max_se = sscreen->info.max_se;
86 
87    radeon_begin(cs);
88 
89    for (unsigned se = 0; se < max_se; se++) {
90       uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo);
91       uint64_t data_va = ac_thread_trace_get_data_va(&sctx->screen->info, sctx->thread_trace, va, se);
92       uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
93 
94       if (si_se_is_disabled(sctx, se))
95          continue;
96 
97       /* Target SEx and SH0. */
98       radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
99                              S_030800_SE_INDEX(se) |
100                              S_030800_SH_INDEX(0) |
101                              S_030800_INSTANCE_BROADCAST_WRITES(1));
102 
103       /* Select the first active CUs */
104       int first_active_cu = ffs(sctx->screen->info.cu_mask[se][0]);
105 
106       if (sctx->chip_class >= GFX10) {
107          /* Order seems important for the following 2 registers. */
108          radeon_set_privileged_config_reg(R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
109                                           S_008D04_SIZE(shifted_size) |
110                                           S_008D04_BASE_HI(shifted_va >> 32));
111 
112          radeon_set_privileged_config_reg(R_008D00_SQ_THREAD_TRACE_BUF0_BASE, shifted_va);
113 
114          int wgp = first_active_cu / 2;
115          radeon_set_privileged_config_reg(R_008D14_SQ_THREAD_TRACE_MASK,
116                                           S_008D14_WTYPE_INCLUDE(0x7f) | /* all shader stages */
117                                           S_008D14_SA_SEL(0) |
118                                           S_008D14_WGP_SEL(wgp) |
119                                           S_008D14_SIMD_SEL(0));
120 
121          radeon_set_privileged_config_reg(R_008D18_SQ_THREAD_TRACE_TOKEN_MASK,
122                       S_008D18_REG_INCLUDE(V_008D18_REG_INCLUDE_SQDEC |
123                                            V_008D18_REG_INCLUDE_SHDEC |
124                                            V_008D18_REG_INCLUDE_GFXUDEC |
125                                            V_008D18_REG_INCLUDE_CONTEXT |
126                                            V_008D18_REG_INCLUDE_COMP |
127                                            V_008D18_REG_INCLUDE_CONFIG) |
128                       S_008D18_TOKEN_EXCLUDE(V_008D18_TOKEN_EXCLUDE_PERF));
129 
130          /* Should be emitted last (it enables thread traces). */
131          radeon_set_privileged_config_reg(R_008D1C_SQ_THREAD_TRACE_CTRL,
132                                           S_008D1C_MODE(1) |
133                                           S_008D1C_HIWATER(5) |
134                                           S_008D1C_UTIL_TIMER(1) |
135                                           S_008D1C_RT_FREQ(2) | /* 4096 clk */
136                                           S_008D1C_DRAW_EVENT_EN(1) |
137                                           S_008D1C_REG_STALL_EN(1) |
138                                           S_008D1C_SPI_STALL_EN(1) |
139                                           S_008D1C_SQ_STALL_EN(1) |
140                                           S_008D1C_REG_DROP_ON_STALL(0) |
141                                           S_008D1C_LOWATER_OFFSET(
142                                              sctx->chip_class >= GFX10_3 ? 4 : 0));
143       } else {
144          /* Order seems important for the following 4 registers. */
145          radeon_set_uconfig_reg(R_030CDC_SQ_THREAD_TRACE_BASE2,
146                                 S_030CDC_ADDR_HI(shifted_va >> 32));
147 
148          radeon_set_uconfig_reg(R_030CC0_SQ_THREAD_TRACE_BASE, shifted_va);
149 
150          radeon_set_uconfig_reg(R_030CC4_SQ_THREAD_TRACE_SIZE,
151                                 S_030CC4_SIZE(shifted_size));
152 
153          radeon_set_uconfig_reg(R_030CD4_SQ_THREAD_TRACE_CTRL,
154                                 S_030CD4_RESET_BUFFER(1));
155 
156          uint32_t thread_trace_mask = S_030CC8_CU_SEL(first_active_cu) |
157                                       S_030CC8_SH_SEL(0) |
158                                       S_030CC8_SIMD_EN(0xf) |
159                                       S_030CC8_VM_ID_MASK(0) |
160                                       S_030CC8_REG_STALL_EN(1) |
161                                       S_030CC8_SPI_STALL_EN(1) |
162                                       S_030CC8_SQ_STALL_EN(1);
163 
164          radeon_set_uconfig_reg(R_030CC8_SQ_THREAD_TRACE_MASK,
165                                 thread_trace_mask);
166 
167          /* Trace all tokens and registers. */
168          radeon_set_uconfig_reg(R_030CCC_SQ_THREAD_TRACE_TOKEN_MASK,
169                                 S_030CCC_TOKEN_MASK(0xbfff) |
170                                 S_030CCC_REG_MASK(0xff) |
171                                 S_030CCC_REG_DROP_ON_STALL(0));
172 
173          /* Enable SQTT perf counters for all CUs. */
174          radeon_set_uconfig_reg(R_030CD0_SQ_THREAD_TRACE_PERF_MASK,
175                                 S_030CD0_SH0_MASK(0xffff) |
176                                 S_030CD0_SH1_MASK(0xffff));
177 
178          radeon_set_uconfig_reg(R_030CE0_SQ_THREAD_TRACE_TOKEN_MASK2, 0xffffffff);
179 
180          radeon_set_uconfig_reg(R_030CEC_SQ_THREAD_TRACE_HIWATER,
181                                 S_030CEC_HIWATER(4));
182 
183          if (sctx->chip_class == GFX9) {
184             /* Reset thread trace status errors. */
185             radeon_set_uconfig_reg(R_030CE8_SQ_THREAD_TRACE_STATUS,
186                                    S_030CE8_UTC_ERROR(0));
187          }
188 
189          /* Enable the thread trace mode. */
190          uint32_t thread_trace_mode =
191             S_030CD8_MASK_PS(1) |
192             S_030CD8_MASK_VS(1) |
193             S_030CD8_MASK_GS(1) |
194             S_030CD8_MASK_ES(1) |
195             S_030CD8_MASK_HS(1) |
196             S_030CD8_MASK_LS(1) |
197             S_030CD8_MASK_CS(1) |
198             S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */
199             S_030CD8_MODE(1);
200 
201          if (sctx->chip_class == GFX9) {
202             /* Count SQTT traffic in TCC perf counters. */
203             thread_trace_mode |= S_030CD8_TC_PERF_EN(1);
204          }
205 
206          radeon_set_uconfig_reg(R_030CD8_SQ_THREAD_TRACE_MODE,
207                                 thread_trace_mode);
208       }
209    }
210 
211    /* Restore global broadcasting. */
212    radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
213                           S_030800_SE_BROADCAST_WRITES(1) |
214                              S_030800_SH_BROADCAST_WRITES(1) |
215                              S_030800_INSTANCE_BROADCAST_WRITES(1));
216 
217    /* Start the thread trace with a different event based on the queue. */
218    if (queue_family_index == RING_COMPUTE) {
219       radeon_set_sh_reg(R_00B878_COMPUTE_THREAD_TRACE_ENABLE,
220                         S_00B878_THREAD_TRACE_ENABLE(1));
221    } else {
222       radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
223       radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_START) | EVENT_INDEX(0));
224    }
225    radeon_end();
226 }
227 
228 static const uint32_t gfx9_thread_trace_info_regs[] =
229 {
230    R_030CE4_SQ_THREAD_TRACE_WPTR,
231    R_030CE8_SQ_THREAD_TRACE_STATUS,
232    R_030CF0_SQ_THREAD_TRACE_CNTR,
233 };
234 
235 static const uint32_t gfx10_thread_trace_info_regs[] =
236 {
237    R_008D10_SQ_THREAD_TRACE_WPTR,
238    R_008D20_SQ_THREAD_TRACE_STATUS,
239    R_008D24_SQ_THREAD_TRACE_DROPPED_CNTR,
240 };
241 
242 static void
si_copy_thread_trace_info_regs(struct si_context * sctx,struct radeon_cmdbuf * cs,unsigned se_index)243 si_copy_thread_trace_info_regs(struct si_context* sctx,
244              struct radeon_cmdbuf *cs,
245              unsigned se_index)
246 {
247    const uint32_t *thread_trace_info_regs = NULL;
248 
249    switch (sctx->chip_class) {
250    case GFX10_3:
251    case GFX10:
252       thread_trace_info_regs = gfx10_thread_trace_info_regs;
253       break;
254    case GFX9:
255       thread_trace_info_regs = gfx9_thread_trace_info_regs;
256       break;
257    default:
258       unreachable("Unsupported chip_class");
259    }
260 
261    /* Get the VA where the info struct is stored for this SE. */
262    uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo);
263    uint64_t info_va = ac_thread_trace_get_info_va(va, se_index);
264 
265    radeon_begin(cs);
266 
267    /* Copy back the info struct one DWORD at a time. */
268    for (unsigned i = 0; i < 3; i++) {
269       radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
270       radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_PERF) |
271                   COPY_DATA_DST_SEL(COPY_DATA_TC_L2) |
272                   COPY_DATA_WR_CONFIRM);
273       radeon_emit(thread_trace_info_regs[i] >> 2);
274       radeon_emit(0); /* unused */
275       radeon_emit((info_va + i * 4));
276       radeon_emit((info_va + i * 4) >> 32);
277    }
278    radeon_end();
279 }
280 
281 
282 
283 static void
si_emit_thread_trace_stop(struct si_context * sctx,struct radeon_cmdbuf * cs,uint32_t queue_family_index)284 si_emit_thread_trace_stop(struct si_context *sctx,
285                           struct radeon_cmdbuf *cs,
286                           uint32_t queue_family_index)
287 {
288    unsigned max_se = sctx->screen->info.max_se;
289 
290    radeon_begin(cs);
291 
292    /* Stop the thread trace with a different event based on the queue. */
293    if (queue_family_index == RING_COMPUTE) {
294       radeon_set_sh_reg(R_00B878_COMPUTE_THREAD_TRACE_ENABLE,
295                         S_00B878_THREAD_TRACE_ENABLE(0));
296    } else {
297       radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
298       radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_STOP) | EVENT_INDEX(0));
299    }
300 
301    radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
302    radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_FINISH) | EVENT_INDEX(0));
303    radeon_end();
304 
305    for (unsigned se = 0; se < max_se; se++) {
306       if (si_se_is_disabled(sctx, se))
307          continue;
308 
309       radeon_begin(cs);
310 
311       /* Target SEi and SH0. */
312       radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
313                              S_030800_SE_INDEX(se) |
314                              S_030800_SH_INDEX(0) |
315                              S_030800_INSTANCE_BROADCAST_WRITES(1));
316 
317       if (sctx->chip_class >= GFX10) {
318          /* Make sure to wait for the trace buffer. */
319          radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0));
320          radeon_emit(WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */
321          radeon_emit(R_008D20_SQ_THREAD_TRACE_STATUS >> 2);  /* register */
322          radeon_emit(0);
323          radeon_emit(0); /* reference value */
324          radeon_emit(S_008D20_FINISH_DONE(1)); /* mask */
325          radeon_emit(4); /* poll interval */
326 
327          /* Disable the thread trace mode. */
328          radeon_set_privileged_config_reg(R_008D1C_SQ_THREAD_TRACE_CTRL,
329                                           S_008D1C_MODE(0));
330 
331          /* Wait for thread trace completion. */
332          radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0));
333          radeon_emit(WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
334          radeon_emit(R_008D20_SQ_THREAD_TRACE_STATUS >> 2);  /* register */
335          radeon_emit(0);
336          radeon_emit(0); /* reference value */
337          radeon_emit(S_008D20_BUSY(1)); /* mask */
338          radeon_emit(4); /* poll interval */
339       } else {
340          /* Disable the thread trace mode. */
341          radeon_set_uconfig_reg(R_030CD8_SQ_THREAD_TRACE_MODE,
342                                 S_030CD8_MODE(0));
343 
344          /* Wait for thread trace completion. */
345          radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0));
346          radeon_emit(WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
347          radeon_emit(R_030CE8_SQ_THREAD_TRACE_STATUS >> 2);  /* register */
348          radeon_emit(0);
349          radeon_emit(0); /* reference value */
350          radeon_emit(S_030CE8_BUSY(1)); /* mask */
351          radeon_emit(4); /* poll interval */
352       }
353       radeon_end();
354 
355       si_copy_thread_trace_info_regs(sctx, cs, se);
356    }
357 
358    /* Restore global broadcasting. */
359    radeon_begin_again(cs);
360    radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
361                           S_030800_SE_BROADCAST_WRITES(1) |
362                              S_030800_SH_BROADCAST_WRITES(1) |
363                              S_030800_INSTANCE_BROADCAST_WRITES(1));
364    radeon_end();
365 }
366 
367 static void
si_thread_trace_start(struct si_context * sctx,int family,struct radeon_cmdbuf * cs)368 si_thread_trace_start(struct si_context *sctx, int family, struct radeon_cmdbuf *cs)
369 {
370    struct radeon_winsys *ws = sctx->ws;
371 
372    radeon_begin(cs);
373 
374    switch (family) {
375       case RING_GFX:
376          radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
377          radeon_emit(CC0_UPDATE_LOAD_ENABLES(1));
378          radeon_emit(CC1_UPDATE_SHADOW_ENABLES(1));
379          break;
380       case RING_COMPUTE:
381          radeon_emit(PKT3(PKT3_NOP, 0, 0));
382          radeon_emit(0);
383          break;
384    }
385    radeon_end();
386 
387    ws->cs_add_buffer(cs,
388                      sctx->thread_trace->bo,
389                      RADEON_USAGE_READWRITE,
390                      RADEON_DOMAIN_VRAM,
391                      0);
392 
393    si_cp_dma_wait_for_idle(sctx, cs);
394 
395    /* Make sure to wait-for-idle before starting SQTT. */
396    sctx->flags |=
397       SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
398       SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE |
399       SI_CONTEXT_INV_L2 | SI_CONTEXT_PFP_SYNC_ME;
400    sctx->emit_cache_flush(sctx, cs);
401 
402    si_inhibit_clockgating(sctx, cs, true);
403 
404    /* Enable SQG events that collects thread trace data. */
405    si_emit_spi_config_cntl(sctx, cs, true);
406 
407    si_emit_thread_trace_start(sctx, cs, family);
408 }
409 
410 static void
si_thread_trace_stop(struct si_context * sctx,int family,struct radeon_cmdbuf * cs)411 si_thread_trace_stop(struct si_context *sctx, int family, struct radeon_cmdbuf *cs)
412 {
413    struct radeon_winsys *ws = sctx->ws;
414 
415    radeon_begin(cs);
416 
417    switch (family) {
418       case RING_GFX:
419          radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
420          radeon_emit(CC0_UPDATE_LOAD_ENABLES(1));
421          radeon_emit(CC1_UPDATE_SHADOW_ENABLES(1));
422          break;
423       case RING_COMPUTE:
424          radeon_emit(PKT3(PKT3_NOP, 0, 0));
425          radeon_emit(0);
426          break;
427    }
428    radeon_end();
429 
430    ws->cs_add_buffer(cs,
431                      sctx->thread_trace->bo,
432                      RADEON_USAGE_READWRITE,
433                      RADEON_DOMAIN_VRAM,
434                      0);
435 
436    si_cp_dma_wait_for_idle(sctx, cs);
437 
438    /* Make sure to wait-for-idle before stopping SQTT. */
439    sctx->flags |=
440       SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
441       SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE |
442       SI_CONTEXT_INV_L2 | SI_CONTEXT_PFP_SYNC_ME;
443    sctx->emit_cache_flush(sctx, cs);
444 
445    si_emit_thread_trace_stop(sctx, cs, family);
446 
447    /* Restore previous state by disabling SQG events. */
448    si_emit_spi_config_cntl(sctx, cs, false);
449 
450    si_inhibit_clockgating(sctx, cs, false);
451 }
452 
453 
454 static void
si_thread_trace_init_cs(struct si_context * sctx)455 si_thread_trace_init_cs(struct si_context *sctx)
456 {
457    struct radeon_winsys *ws = sctx->ws;
458 
459    /* Thread trace start CS (only handles RING_GFX). */
460    sctx->thread_trace->start_cs[RING_GFX] = CALLOC_STRUCT(radeon_cmdbuf);
461    if (!ws->cs_create(sctx->thread_trace->start_cs[RING_GFX],
462                       sctx->ctx, RING_GFX, NULL, NULL, 0)) {
463       free(sctx->thread_trace->start_cs[RING_GFX]);
464       sctx->thread_trace->start_cs[RING_GFX] = NULL;
465       return;
466    }
467 
468    si_thread_trace_start(sctx, RING_GFX, sctx->thread_trace->start_cs[RING_GFX]);
469 
470    /* Thread trace stop CS. */
471    sctx->thread_trace->stop_cs[RING_GFX] = CALLOC_STRUCT(radeon_cmdbuf);
472    if (!ws->cs_create(sctx->thread_trace->stop_cs[RING_GFX],
473                       sctx->ctx, RING_GFX, NULL, NULL, 0)) {
474       free(sctx->thread_trace->start_cs[RING_GFX]);
475       sctx->thread_trace->start_cs[RING_GFX] = NULL;
476       free(sctx->thread_trace->stop_cs[RING_GFX]);
477       sctx->thread_trace->stop_cs[RING_GFX] = NULL;
478       return;
479    }
480 
481    si_thread_trace_stop(sctx, RING_GFX, sctx->thread_trace->stop_cs[RING_GFX]);
482 }
483 
484 static void
si_begin_thread_trace(struct si_context * sctx,struct radeon_cmdbuf * rcs)485 si_begin_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs)
486 {
487    struct radeon_cmdbuf *cs = sctx->thread_trace->start_cs[RING_GFX];
488    sctx->ws->cs_flush(cs, 0, NULL);
489 }
490 
491 static void
si_end_thread_trace(struct si_context * sctx,struct radeon_cmdbuf * rcs)492 si_end_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs)
493 {
494    struct radeon_cmdbuf *cs = sctx->thread_trace->stop_cs[RING_GFX];
495    sctx->ws->cs_flush(cs, 0, &sctx->last_sqtt_fence);
496 }
497 
498 static bool
si_get_thread_trace(struct si_context * sctx,struct ac_thread_trace * thread_trace)499 si_get_thread_trace(struct si_context *sctx,
500                     struct ac_thread_trace *thread_trace)
501 {
502    unsigned max_se = sctx->screen->info.max_se;
503 
504    memset(thread_trace, 0, sizeof(*thread_trace));
505    thread_trace->num_traces = max_se;
506 
507    sctx->thread_trace->ptr = sctx->ws->buffer_map(sctx->ws, sctx->thread_trace->bo,
508                                                           NULL,
509                                                           PIPE_MAP_READ);
510 
511    if (!sctx->thread_trace->ptr)
512       return false;
513 
514    void *thread_trace_ptr = sctx->thread_trace->ptr;
515 
516    for (unsigned se = 0; se < max_se; se++) {
517       uint64_t info_offset = ac_thread_trace_get_info_offset(se);
518       uint64_t data_offset = ac_thread_trace_get_data_offset(&sctx->screen->info, sctx->thread_trace, se);
519       void *info_ptr = thread_trace_ptr + info_offset;
520       void *data_ptr = thread_trace_ptr + data_offset;
521       struct ac_thread_trace_info *info =
522          (struct ac_thread_trace_info *)info_ptr;
523 
524       struct ac_thread_trace_se thread_trace_se = {0};
525 
526       if (!ac_is_thread_trace_complete(&sctx->screen->info, sctx->thread_trace, info)) {
527          uint32_t expected_size =
528             ac_get_expected_buffer_size(&sctx->screen->info, info);
529          uint32_t available_size = (info->cur_offset * 32) / 1024;
530 
531          fprintf(stderr, "Failed to get the thread trace "
532                  "because the buffer is too small. The "
533                  "hardware needs %d KB but the "
534                  "buffer size is %d KB.\n",
535                  expected_size, available_size);
536          fprintf(stderr, "Please update the buffer size with "
537                  "AMD_THREAD_TRACE_BUFFER_SIZE=<size_in_kbytes>\n");
538          return false;
539       }
540 
541       thread_trace_se.data_ptr = data_ptr;
542       thread_trace_se.info = *info;
543       thread_trace_se.shader_engine = se;
544 
545       int first_active_cu = ffs(sctx->screen->info.cu_mask[se][0]);
546 
547       /* For GFX10+ compute_unit really means WGP */
548       thread_trace_se.compute_unit =
549          sctx->screen->info.chip_class >= GFX10 ? (first_active_cu / 2) : first_active_cu;
550 
551       thread_trace->traces[se] = thread_trace_se;
552    }
553 
554    thread_trace->data = sctx->thread_trace;
555    return true;
556 }
557 
558 
559 bool
si_init_thread_trace(struct si_context * sctx)560 si_init_thread_trace(struct si_context *sctx)
561 {
562    static bool warn_once = true;
563    if (warn_once) {
564       fprintf(stderr, "*************************************************\n");
565       fprintf(stderr, "* WARNING: Thread trace support is experimental *\n");
566       fprintf(stderr, "*************************************************\n");
567       warn_once = false;
568    }
569 
570    sctx->thread_trace = CALLOC_STRUCT(ac_thread_trace_data);
571 
572    if (sctx->chip_class < GFX8) {
573       fprintf(stderr, "GPU hardware not supported: refer to "
574               "the RGP documentation for the list of "
575               "supported GPUs!\n");
576       return false;
577    }
578 
579    if (sctx->chip_class > GFX10_3) {
580       fprintf(stderr, "radeonsi: Thread trace is not supported "
581               "for that GPU!\n");
582       return false;
583    }
584 
585    /* Default buffer size set to 1MB per SE. */
586    sctx->thread_trace->buffer_size = debug_get_num_option("AMD_THREAD_TRACE_BUFFER_SIZE", 1024) * 1024;
587    sctx->thread_trace->start_frame = 10;
588 
589    const char *trigger = getenv("AMD_THREAD_TRACE_TRIGGER");
590    if (trigger) {
591       sctx->thread_trace->start_frame = atoi(trigger);
592       if (sctx->thread_trace->start_frame <= 0) {
593          /* This isn't a frame number, must be a file */
594          sctx->thread_trace->trigger_file = strdup(trigger);
595          sctx->thread_trace->start_frame = -1;
596       }
597    }
598 
599    if (!si_thread_trace_init_bo(sctx))
600       return false;
601 
602    list_inithead(&sctx->thread_trace->rgp_pso_correlation.record);
603    simple_mtx_init(&sctx->thread_trace->rgp_pso_correlation.lock, mtx_plain);
604 
605    list_inithead(&sctx->thread_trace->rgp_loader_events.record);
606    simple_mtx_init(&sctx->thread_trace->rgp_loader_events.lock, mtx_plain);
607 
608    list_inithead(&sctx->thread_trace->rgp_code_object.record);
609    simple_mtx_init(&sctx->thread_trace->rgp_code_object.lock, mtx_plain);
610 
611    si_thread_trace_init_cs(sctx);
612 
613    sctx->sqtt_next_event = EventInvalid;
614 
615    return true;
616 }
617 
618 void
si_destroy_thread_trace(struct si_context * sctx)619 si_destroy_thread_trace(struct si_context *sctx)
620 {
621    struct si_screen *sscreen = sctx->screen;
622    struct pb_buffer *bo = sctx->thread_trace->bo;
623    radeon_bo_reference(sctx->screen->ws, &bo, NULL);
624 
625    if (sctx->thread_trace->trigger_file)
626       free(sctx->thread_trace->trigger_file);
627 
628    sscreen->ws->cs_destroy(sctx->thread_trace->start_cs[RING_GFX]);
629    sscreen->ws->cs_destroy(sctx->thread_trace->stop_cs[RING_GFX]);
630 
631    struct rgp_pso_correlation *pso_correlation = &sctx->thread_trace->rgp_pso_correlation;
632    struct rgp_loader_events *loader_events = &sctx->thread_trace->rgp_loader_events;
633    struct rgp_code_object *code_object = &sctx->thread_trace->rgp_code_object;
634    list_for_each_entry_safe(struct rgp_pso_correlation_record, record,
635                             &pso_correlation->record, list) {
636       list_del(&record->list);
637       free(record);
638    }
639    simple_mtx_destroy(&sctx->thread_trace->rgp_pso_correlation.lock);
640 
641    list_for_each_entry_safe(struct rgp_loader_events_record, record,
642                             &loader_events->record, list) {
643       list_del(&record->list);
644       free(record);
645    }
646    simple_mtx_destroy(&sctx->thread_trace->rgp_loader_events.lock);
647 
648    list_for_each_entry_safe(struct rgp_code_object_record, record,
649              &code_object->record, list) {
650       uint32_t mask = record->shader_stages_mask;
651       int i;
652 
653       /* Free the disassembly. */
654       while (mask) {
655          i = u_bit_scan(&mask);
656          free(record->shader_data[i].code);
657       }
658       list_del(&record->list);
659       free(record);
660    }
661    simple_mtx_destroy(&sctx->thread_trace->rgp_code_object.lock);
662 
663    free(sctx->thread_trace);
664    sctx->thread_trace = NULL;
665 }
666 
667 static uint64_t num_frames = 0;
668 
669 void
si_handle_thread_trace(struct si_context * sctx,struct radeon_cmdbuf * rcs)670 si_handle_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs)
671 {
672    /* Should we enable SQTT yet? */
673    if (!sctx->thread_trace_enabled) {
674       bool frame_trigger = num_frames == sctx->thread_trace->start_frame;
675       bool file_trigger = false;
676       if (sctx->thread_trace->trigger_file &&
677           access(sctx->thread_trace->trigger_file, W_OK) == 0) {
678          if (unlink(sctx->thread_trace->trigger_file) == 0) {
679             file_trigger = true;
680          } else {
681             /* Do not enable tracing if we cannot remove the file,
682              * because by then we'll trace every frame.
683              */
684             fprintf(stderr, "radeonsi: could not remove thread trace trigger file, ignoring\n");
685          }
686       }
687 
688       if (frame_trigger || file_trigger) {
689          /* Wait for last submission */
690          sctx->ws->fence_wait(sctx->ws, sctx->last_gfx_fence, PIPE_TIMEOUT_INFINITE);
691 
692          /* Start SQTT */
693          si_begin_thread_trace(sctx, rcs);
694 
695          sctx->thread_trace_enabled = true;
696          sctx->thread_trace->start_frame = -1;
697 
698          /* Force shader update to make sure si_sqtt_describe_pipeline_bind is called
699           * for the current "pipeline".
700           */
701          sctx->do_update_shaders = true;
702       }
703    } else {
704       struct ac_thread_trace thread_trace = {0};
705 
706       /* Stop SQTT */
707       si_end_thread_trace(sctx, rcs);
708       sctx->thread_trace_enabled = false;
709       sctx->thread_trace->start_frame = -1;
710       assert (sctx->last_sqtt_fence);
711 
712       /* Wait for SQTT to finish and read back the bo */
713       if (sctx->ws->fence_wait(sctx->ws, sctx->last_sqtt_fence, PIPE_TIMEOUT_INFINITE) &&
714           si_get_thread_trace(sctx, &thread_trace)) {
715          ac_dump_rgp_capture(&sctx->screen->info, &thread_trace);
716       } else {
717          fprintf(stderr, "Failed to read the trace\n");
718       }
719    }
720 
721    num_frames++;
722 }
723 
724 
725 static void
si_emit_thread_trace_userdata(struct si_context * sctx,struct radeon_cmdbuf * cs,const void * data,uint32_t num_dwords)726 si_emit_thread_trace_userdata(struct si_context* sctx,
727                               struct radeon_cmdbuf *cs,
728                               const void *data, uint32_t num_dwords)
729 {
730    const uint32_t *dwords = (uint32_t *)data;
731 
732    radeon_begin(cs);
733 
734    while (num_dwords > 0) {
735       uint32_t count = MIN2(num_dwords, 2);
736 
737       /* Without the perfctr bit the CP might not always pass the
738        * write on correctly. */
739       radeon_set_uconfig_reg_seq(R_030D08_SQ_THREAD_TRACE_USERDATA_2, count, sctx->chip_class >= GFX10);
740 
741       radeon_emit_array(dwords, count);
742 
743       dwords += count;
744       num_dwords -= count;
745    }
746    radeon_end();
747 }
748 
749 static void
si_emit_spi_config_cntl(struct si_context * sctx,struct radeon_cmdbuf * cs,bool enable)750 si_emit_spi_config_cntl(struct si_context* sctx,
751            struct radeon_cmdbuf *cs, bool enable)
752 {
753    radeon_begin(cs);
754 
755    if (sctx->chip_class >= GFX9) {
756       uint32_t spi_config_cntl = S_031100_GPR_WRITE_PRIORITY(0x2c688) |
757                                  S_031100_EXP_PRIORITY_ORDER(3) |
758                                  S_031100_ENABLE_SQG_TOP_EVENTS(enable) |
759                                  S_031100_ENABLE_SQG_BOP_EVENTS(enable);
760 
761       if (sctx->chip_class >= GFX10)
762          spi_config_cntl |= S_031100_PS_PKR_PRIORITY_CNTL(3);
763 
764       radeon_set_uconfig_reg(R_031100_SPI_CONFIG_CNTL, spi_config_cntl);
765    } else {
766       /* SPI_CONFIG_CNTL is a protected register on GFX6-GFX8. */
767       radeon_set_privileged_config_reg(R_009100_SPI_CONFIG_CNTL,
768                                        S_009100_ENABLE_SQG_TOP_EVENTS(enable) |
769                                        S_009100_ENABLE_SQG_BOP_EVENTS(enable));
770    }
771    radeon_end();
772 }
773 
774 static uint32_t num_events = 0;
775 void
si_sqtt_write_event_marker(struct si_context * sctx,struct radeon_cmdbuf * rcs,enum rgp_sqtt_marker_event_type api_type,uint32_t vertex_offset_user_data,uint32_t instance_offset_user_data,uint32_t draw_index_user_data)776 si_sqtt_write_event_marker(struct si_context* sctx, struct radeon_cmdbuf *rcs,
777                            enum rgp_sqtt_marker_event_type api_type,
778                            uint32_t vertex_offset_user_data,
779                            uint32_t instance_offset_user_data,
780                            uint32_t draw_index_user_data)
781 {
782    struct rgp_sqtt_marker_event marker = {0};
783 
784    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
785    marker.api_type = api_type == EventInvalid ? EventCmdDraw : api_type;
786    marker.cmd_id = num_events++;
787    marker.cb_id = 0;
788 
789    if (vertex_offset_user_data == UINT_MAX ||
790        instance_offset_user_data == UINT_MAX) {
791       vertex_offset_user_data = 0;
792       instance_offset_user_data = 0;
793    }
794 
795    if (draw_index_user_data == UINT_MAX)
796       draw_index_user_data = vertex_offset_user_data;
797 
798    marker.vertex_offset_reg_idx = vertex_offset_user_data;
799    marker.instance_offset_reg_idx = instance_offset_user_data;
800    marker.draw_index_reg_idx = draw_index_user_data;
801 
802    si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
803 
804    sctx->sqtt_next_event = EventInvalid;
805 }
806 
807 void
si_write_event_with_dims_marker(struct si_context * sctx,struct radeon_cmdbuf * rcs,enum rgp_sqtt_marker_event_type api_type,uint32_t x,uint32_t y,uint32_t z)808 si_write_event_with_dims_marker(struct si_context* sctx, struct radeon_cmdbuf *rcs,
809                                 enum rgp_sqtt_marker_event_type api_type,
810                                 uint32_t x, uint32_t y, uint32_t z)
811 {
812    struct rgp_sqtt_marker_event_with_dims marker = {0};
813 
814    marker.event.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
815    marker.event.api_type = api_type;
816    marker.event.cmd_id = num_events++;
817    marker.event.cb_id = 0;
818    marker.event.has_thread_dims = 1;
819 
820    marker.thread_x = x;
821    marker.thread_y = y;
822    marker.thread_z = z;
823 
824    si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
825    sctx->sqtt_next_event = EventInvalid;
826 }
827 
828 void
si_sqtt_describe_barrier_start(struct si_context * sctx,struct radeon_cmdbuf * rcs)829 si_sqtt_describe_barrier_start(struct si_context* sctx, struct radeon_cmdbuf *rcs)
830 {
831    struct rgp_sqtt_marker_barrier_start marker = {0};
832 
833    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START;
834    marker.cb_id = 0;
835    marker.dword02 = 0xC0000000 + 10; /* RGP_BARRIER_INTERNAL_BASE */
836 
837    si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
838 }
839 
840 void
si_sqtt_describe_barrier_end(struct si_context * sctx,struct radeon_cmdbuf * rcs,unsigned flags)841 si_sqtt_describe_barrier_end(struct si_context* sctx, struct radeon_cmdbuf *rcs,
842                             unsigned flags)
843 {
844    struct rgp_sqtt_marker_barrier_end marker = {0};
845 
846    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END;
847    marker.cb_id = 0;
848 
849    if (flags & SI_CONTEXT_VS_PARTIAL_FLUSH)
850       marker.vs_partial_flush = true;
851    if (flags & SI_CONTEXT_PS_PARTIAL_FLUSH)
852       marker.ps_partial_flush = true;
853    if (flags & SI_CONTEXT_CS_PARTIAL_FLUSH)
854       marker.cs_partial_flush = true;
855 
856    if (flags & SI_CONTEXT_PFP_SYNC_ME)
857       marker.pfp_sync_me = true;
858 
859    if (flags & SI_CONTEXT_INV_VCACHE)
860       marker.inval_tcp = true;
861    if (flags & SI_CONTEXT_INV_ICACHE)
862       marker.inval_sqI = true;
863    if (flags & SI_CONTEXT_INV_SCACHE)
864       marker.inval_sqK = true;
865    if (flags & SI_CONTEXT_INV_L2)
866       marker.inval_tcc = true;
867 
868    if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
869       marker.inval_cb = true;
870       marker.flush_cb = true;
871    }
872    if (flags & SI_CONTEXT_FLUSH_AND_INV_DB) {
873       marker.inval_db = true;
874       marker.flush_db = true;
875    }
876 
877    si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
878 }
879 
880 void
si_write_user_event(struct si_context * sctx,struct radeon_cmdbuf * rcs,enum rgp_sqtt_marker_user_event_type type,const char * str,int len)881 si_write_user_event(struct si_context* sctx, struct radeon_cmdbuf *rcs,
882                     enum rgp_sqtt_marker_user_event_type type,
883                     const char *str, int len)
884 {
885    if (type == UserEventPop) {
886       assert (str == NULL);
887       struct rgp_sqtt_marker_user_event marker = { 0 };
888       marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
889       marker.data_type = type;
890 
891       si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
892    } else {
893       assert (str != NULL);
894       struct rgp_sqtt_marker_user_event_with_length marker = { 0 };
895       marker.user_event.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
896       marker.user_event.data_type = type;
897       len = MIN2(1024, len);
898       marker.length = align(len, 4);
899 
900       uint8_t *buffer = alloca(sizeof(marker) + marker.length);
901       memcpy(buffer, &marker, sizeof(marker));
902       memcpy(buffer + sizeof(marker), str, len);
903       buffer[sizeof(marker) + len - 1] = '\0';
904 
905       si_emit_thread_trace_userdata(sctx, rcs, buffer, sizeof(marker) / 4 + marker.length / 4);
906    }
907 }
908 
909 
910 bool
si_sqtt_pipeline_is_registered(struct ac_thread_trace_data * thread_trace_data,uint64_t pipeline_hash)911 si_sqtt_pipeline_is_registered(struct ac_thread_trace_data *thread_trace_data,
912                                uint64_t pipeline_hash)
913 {
914    simple_mtx_lock(&thread_trace_data->rgp_pso_correlation.lock);
915    list_for_each_entry_safe(struct rgp_pso_correlation_record, record,
916              &thread_trace_data->rgp_pso_correlation.record, list) {
917       if (record->pipeline_hash[0] == pipeline_hash) {
918          simple_mtx_unlock(&thread_trace_data->rgp_pso_correlation.lock);
919          return true;
920       }
921 
922    }
923    simple_mtx_unlock(&thread_trace_data->rgp_pso_correlation.lock);
924 
925    return false;
926 }
927 
928 
929 
930 static enum rgp_hardware_stages
si_sqtt_pipe_to_rgp_shader_stage(struct si_shader_key * key,enum pipe_shader_type stage)931 si_sqtt_pipe_to_rgp_shader_stage(struct si_shader_key* key, enum pipe_shader_type stage)
932 {
933    switch (stage) {
934    case PIPE_SHADER_VERTEX:
935       if (key->as_ls)
936          return RGP_HW_STAGE_LS;
937       else if (key->as_es)
938          return RGP_HW_STAGE_ES;
939       else if (key->as_ngg)
940          return RGP_HW_STAGE_GS;
941       else
942          return RGP_HW_STAGE_VS;
943    case PIPE_SHADER_TESS_CTRL:
944       return RGP_HW_STAGE_HS;
945    case PIPE_SHADER_TESS_EVAL:
946       if (key->as_es)
947          return RGP_HW_STAGE_ES;
948       else if (key->as_ngg)
949          return RGP_HW_STAGE_GS;
950       else
951          return RGP_HW_STAGE_VS;
952    case PIPE_SHADER_GEOMETRY:
953       return RGP_HW_STAGE_GS;
954    case PIPE_SHADER_FRAGMENT:
955       return RGP_HW_STAGE_PS;
956    case PIPE_SHADER_COMPUTE:
957       return RGP_HW_STAGE_CS;
958    default:
959       unreachable("invalid mesa shader stage");
960    }
961 }
962 
963 static bool
si_sqtt_add_code_object(struct si_context * sctx,uint64_t pipeline_hash,bool is_compute)964 si_sqtt_add_code_object(struct si_context* sctx,
965                         uint64_t pipeline_hash,
966                         bool is_compute)
967 {
968    struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace;
969    struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object;
970    struct rgp_code_object_record *record;
971 
972    record = malloc(sizeof(struct rgp_code_object_record));
973    if (!record)
974       return false;
975 
976    record->shader_stages_mask = 0;
977    record->num_shaders_combined = 0;
978    record->pipeline_hash[0] = pipeline_hash;
979    record->pipeline_hash[1] = pipeline_hash;
980 
981    for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
982       struct si_shader *shader;
983       enum rgp_hardware_stages hw_stage;
984 
985       if (is_compute) {
986          if (i != PIPE_SHADER_COMPUTE)
987             continue;
988          shader = &sctx->cs_shader_state.program->shader;
989          hw_stage = RGP_HW_STAGE_CS;
990       } else if (i != PIPE_SHADER_COMPUTE) {
991          if (!sctx->shaders[i].cso || !sctx->shaders[i].current)
992             continue;
993          shader = sctx->shaders[i].current;
994          hw_stage = si_sqtt_pipe_to_rgp_shader_stage(&shader->key, i);
995       } else {
996          continue;
997       }
998 
999       uint8_t *code = malloc(shader->binary.uploaded_code_size);
1000       if (!code) {
1001          free(record);
1002          return false;
1003       }
1004       memcpy(code, shader->binary.uploaded_code, shader->binary.uploaded_code_size);
1005 
1006       uint64_t va = shader->bo->gpu_address;
1007       unsigned gl_shader_stage = tgsi_processor_to_shader_stage(i);
1008       record->shader_data[gl_shader_stage].hash[0] = _mesa_hash_data(code, shader->binary.uploaded_code_size);
1009       record->shader_data[gl_shader_stage].hash[1] = record->shader_data[gl_shader_stage].hash[0];
1010       record->shader_data[gl_shader_stage].code_size = shader->binary.uploaded_code_size;
1011       record->shader_data[gl_shader_stage].code = code;
1012       record->shader_data[gl_shader_stage].vgpr_count = shader->config.num_vgprs;
1013       record->shader_data[gl_shader_stage].sgpr_count = shader->config.num_sgprs;
1014       record->shader_data[gl_shader_stage].base_address = va & 0xffffffffffff;
1015       record->shader_data[gl_shader_stage].elf_symbol_offset = 0;
1016       record->shader_data[gl_shader_stage].hw_stage = hw_stage;
1017       record->shader_data[gl_shader_stage].is_combined = false;
1018       record->shader_data[gl_shader_stage].scratch_memory_size = shader->config.scratch_bytes_per_wave;
1019       record->shader_data[gl_shader_stage].wavefront_size = si_get_shader_wave_size(shader);
1020 
1021       record->shader_stages_mask |= 1 << gl_shader_stage;
1022       record->num_shaders_combined++;
1023    }
1024 
1025    simple_mtx_lock(&code_object->lock);
1026    list_addtail(&record->list, &code_object->record);
1027    code_object->record_count++;
1028    simple_mtx_unlock(&code_object->lock);
1029 
1030    return true;
1031 }
1032 
1033 bool
si_sqtt_register_pipeline(struct si_context * sctx,uint64_t pipeline_hash,uint64_t base_address,bool is_compute)1034 si_sqtt_register_pipeline(struct si_context* sctx, uint64_t pipeline_hash, uint64_t base_address, bool is_compute)
1035 {
1036    struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace;
1037 
1038    assert (!si_sqtt_pipeline_is_registered(thread_trace_data, pipeline_hash));
1039 
1040    bool result = ac_sqtt_add_pso_correlation(thread_trace_data, pipeline_hash);
1041    if (!result)
1042       return false;
1043 
1044    result = ac_sqtt_add_code_object_loader_event(thread_trace_data, pipeline_hash, base_address);
1045    if (!result)
1046       return false;
1047 
1048    return si_sqtt_add_code_object(sctx, pipeline_hash, is_compute);
1049 }
1050 
1051 void
si_sqtt_describe_pipeline_bind(struct si_context * sctx,uint64_t pipeline_hash,int bind_point)1052 si_sqtt_describe_pipeline_bind(struct si_context* sctx,
1053                                uint64_t pipeline_hash,
1054                                int bind_point)
1055 {
1056    struct rgp_sqtt_marker_pipeline_bind marker = {0};
1057    struct radeon_cmdbuf *cs = &sctx->gfx_cs;
1058 
1059    if (likely(!sctx->thread_trace_enabled)) {
1060       return;
1061    }
1062 
1063    marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE;
1064    marker.cb_id = 0;
1065    marker.bind_point = bind_point;
1066    marker.api_pso_hash[0] = pipeline_hash;
1067    marker.api_pso_hash[1] = pipeline_hash >> 32;
1068 
1069    si_emit_thread_trace_userdata(sctx, cs, &marker, sizeof(marker) / 4);
1070 }
1071