1 /*
2  * Copyright © 2021 Google, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <perfetto.h>
25 
26 #include "util/u_perfetto.h"
27 
28 #include "freedreno_tracepoints.h"
29 
30 static uint32_t gpu_clock_id;
31 static uint64_t next_clock_sync_ns; /* cpu time of next clk sync */
32 
33 /**
34  * The timestamp at the point where we first emitted the clock_sync..
35  * this  will be a *later* timestamp that the first GPU traces (since
36  * we capture the first clock_sync from the CPU *after* the first GPU
37  * tracepoints happen).  To avoid confusing perfetto we need to drop
38  * the GPU traces with timestamps before this.
39  */
40 static uint64_t sync_gpu_ts;
41 
42 struct FdRenderpassIncrementalState {
43    bool was_cleared = true;
44 };
45 
46 struct FdRenderpassTraits : public perfetto::DefaultDataSourceTraits {
47    using IncrementalStateType = FdRenderpassIncrementalState;
48 };
49 
50 class FdRenderpassDataSource : public perfetto::DataSource<FdRenderpassDataSource, FdRenderpassTraits> {
51 public:
OnSetup(const SetupArgs &)52    void OnSetup(const SetupArgs &) override
53    {
54       // Use this callback to apply any custom configuration to your data source
55       // based on the TraceConfig in SetupArgs.
56    }
57 
OnStart(const StartArgs &)58    void OnStart(const StartArgs &) override
59    {
60       // This notification can be used to initialize the GPU driver, enable
61       // counters, etc. StartArgs will contains the DataSourceDescriptor,
62       // which can be extended.
63       u_trace_perfetto_start();
64       PERFETTO_LOG("Tracing started");
65 
66       /* Note: clock_id's below 128 are reserved.. for custom clock sources,
67        * using the hash of a namespaced string is the recommended approach.
68        * See: https://perfetto.dev/docs/concepts/clock-sync
69        */
70       gpu_clock_id =
71          _mesa_hash_string("org.freedesktop.mesa.freedreno") | 0x80000000;
72    }
73 
OnStop(const StopArgs &)74    void OnStop(const StopArgs &) override
75    {
76       PERFETTO_LOG("Tracing stopped");
77 
78       // Undo any initialization done in OnStart.
79       u_trace_perfetto_stop();
80       // TODO we should perhaps block until queued traces are flushed?
81 
82       Trace([](FdRenderpassDataSource::TraceContext ctx) {
83          auto packet = ctx.NewTracePacket();
84          packet->Finalize();
85          ctx.Flush();
86       });
87    }
88 };
89 
90 PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource);
91 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource);
92 
93 static void
send_descriptors(FdRenderpassDataSource::TraceContext & ctx,uint64_t ts_ns)94 send_descriptors(FdRenderpassDataSource::TraceContext &ctx, uint64_t ts_ns)
95 {
96    PERFETTO_LOG("Sending renderstage descriptors");
97 
98    auto packet = ctx.NewTracePacket();
99 
100    packet->set_timestamp(0);
101 //   packet->set_timestamp(ts_ns);
102 //   packet->set_timestamp_clock_id(gpu_clock_id);
103 
104    auto event = packet->set_gpu_render_stage_event();
105    event->set_gpu_id(0);
106 
107    auto spec = event->set_specifications();
108 
109    for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) {
110       auto desc = spec->add_hw_queue();
111 
112       desc->set_name(queues[i].name);
113       desc->set_description(queues[i].desc);
114    }
115 
116    for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) {
117       auto desc = spec->add_stage();
118 
119       desc->set_name(stages[i].name);
120       if (stages[i].desc)
121          desc->set_description(stages[i].desc);
122    }
123 }
124 
125 static void
stage_start(struct pipe_context * pctx,uint64_t ts_ns,enum fd_stage_id stage)126 stage_start(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage)
127 {
128    struct fd_context *ctx = fd_context(pctx);
129    struct fd_perfetto_state *p = &ctx->perfetto;
130 
131    p->start_ts[stage] = ts_ns;
132 }
133 
134 static void
stage_end(struct pipe_context * pctx,uint64_t ts_ns,enum fd_stage_id stage)135 stage_end(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage)
136 {
137    struct fd_context *ctx = fd_context(pctx);
138    struct fd_perfetto_state *p = &ctx->perfetto;
139 
140    /* If we haven't managed to calibrate the alignment between GPU and CPU
141     * timestamps yet, then skip this trace, otherwise perfetto won't know
142     * what to do with it.
143     */
144    if (!sync_gpu_ts)
145       return;
146 
147    FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
148       if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
149          send_descriptors(tctx, p->start_ts[stage]);
150          state->was_cleared = false;
151       }
152 
153       auto packet = tctx.NewTracePacket();
154 
155       packet->set_timestamp(p->start_ts[stage]);
156       packet->set_timestamp_clock_id(gpu_clock_id);
157 
158       auto event = packet->set_gpu_render_stage_event();
159       event->set_event_id(0); // ???
160       event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID);
161       event->set_duration(ts_ns - p->start_ts[stage]);
162       event->set_stage_id(stage);
163       event->set_context((uintptr_t)pctx);
164 
165       /* The "surface" meta-stage has extra info about render target: */
166       if (stage == SURFACE_STAGE_ID) {
167 
168          event->set_submission_id(p->submit_id);
169 
170          if (p->cbuf0_format) {
171             auto data = event->add_extra_data();
172 
173             data->set_name("color0 format");
174             data->set_value(util_format_short_name(p->cbuf0_format));
175          }
176 
177          if (p->zs_format) {
178             auto data = event->add_extra_data();
179 
180             data->set_name("zs format");
181             data->set_value(util_format_short_name(p->zs_format));
182          }
183 
184          {
185             auto data = event->add_extra_data();
186 
187             data->set_name("width");
188             data->set_value(std::to_string(p->width));
189          }
190 
191          {
192             auto data = event->add_extra_data();
193 
194             data->set_name("height");
195             data->set_value(std::to_string(p->height));
196          }
197 
198          {
199             auto data = event->add_extra_data();
200 
201             data->set_name("MSAA");
202             data->set_value(std::to_string(p->samples));
203          }
204 
205          {
206             auto data = event->add_extra_data();
207 
208             data->set_name("MRTs");
209             data->set_value(std::to_string(p->mrts));
210          }
211 
212          // "renderMode"
213          // "surfaceID"
214 
215          if (p->nbins) {
216             auto data = event->add_extra_data();
217 
218             data->set_name("numberOfBins");
219             data->set_value(std::to_string(p->nbins));
220          }
221 
222          if (p->binw) {
223             auto data = event->add_extra_data();
224 
225             data->set_name("binWidth");
226             data->set_value(std::to_string(p->binw));
227          }
228 
229          if (p->binh) {
230             auto data = event->add_extra_data();
231 
232             data->set_name("binHeight");
233             data->set_value(std::to_string(p->binh));
234          }
235       }
236    });
237 }
238 
239 #ifdef __cplusplus
240 extern "C" {
241 #endif
242 
243 void
fd_perfetto_init(void)244 fd_perfetto_init(void)
245 {
246    util_perfetto_init();
247 
248    perfetto::DataSourceDescriptor dsd;
249    dsd.set_name("gpu.renderstages.msm");
250    FdRenderpassDataSource::Register(dsd);
251 }
252 
253 static void
sync_timestamp(struct fd_context * ctx)254 sync_timestamp(struct fd_context *ctx)
255 {
256    uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
257    uint64_t gpu_ts;
258 
259    if (cpu_ts < next_clock_sync_ns)
260       return;
261 
262    if (fd_pipe_get_param(ctx->pipe, FD_TIMESTAMP, &gpu_ts)) {
263       PERFETTO_ELOG("Could not sync CPU and GPU clocks");
264       return;
265    }
266 
267    /* convert GPU ts into ns: */
268    gpu_ts = ctx->ts_to_ns(gpu_ts);
269 
270    FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
271       auto packet = tctx.NewTracePacket();
272 
273       packet->set_timestamp(cpu_ts);
274 
275       auto event = packet->set_clock_snapshot();
276 
277       {
278          auto clock = event->add_clocks();
279 
280          clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
281          clock->set_timestamp(cpu_ts);
282       }
283 
284       {
285          auto clock = event->add_clocks();
286 
287          clock->set_clock_id(gpu_clock_id);
288          clock->set_timestamp(gpu_ts);
289       }
290 
291       sync_gpu_ts = gpu_ts;
292       next_clock_sync_ns = cpu_ts + 30000000;
293    });
294 }
295 
296 static void
emit_submit_id(struct fd_context * ctx)297 emit_submit_id(struct fd_context *ctx)
298 {
299    FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
300       auto packet = tctx.NewTracePacket();
301 
302       packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
303 
304       auto event = packet->set_vulkan_api_event();
305       auto submit = event->set_vk_queue_submit();
306 
307       submit->set_submission_id(ctx->submit_count);
308    });
309 }
310 
311 void
fd_perfetto_submit(struct fd_context * ctx)312 fd_perfetto_submit(struct fd_context *ctx)
313 {
314    sync_timestamp(ctx);
315    emit_submit_id(ctx);
316 }
317 
318 /*
319  * Trace callbacks, called from u_trace once the timestamps from GPU have been
320  * collected.
321  */
322 
323 void
fd_start_render_pass(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_start_render_pass * payload)324 fd_start_render_pass(struct pipe_context *pctx, uint64_t ts_ns,
325                      const void *flush_data,
326                      const struct trace_start_render_pass *payload)
327 {
328    stage_start(pctx, ts_ns, SURFACE_STAGE_ID);
329 
330    struct fd_perfetto_state *p = &fd_context(pctx)->perfetto;
331 
332    p->submit_id = payload->submit_id;
333    p->cbuf0_format = payload->cbuf0_format;
334    p->zs_format = payload->zs_format;
335    p->width = payload->width;
336    p->height = payload->height;
337    p->mrts = payload->mrts;
338    p->samples = payload->samples;
339    p->nbins = payload->nbins;
340    p->binw = payload->binw;
341    p->binh = payload->binh;
342 }
343 
344 void
fd_end_render_pass(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_end_render_pass * payload)345 fd_end_render_pass(struct pipe_context *pctx, uint64_t ts_ns,
346                    const void *flush_data,
347                    const struct trace_end_render_pass *payload)
348 {
349    stage_end(pctx, ts_ns, SURFACE_STAGE_ID);
350 }
351 
352 void
fd_start_binning_ib(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_start_binning_ib * payload)353 fd_start_binning_ib(struct pipe_context *pctx, uint64_t ts_ns,
354                     const void *flush_data,
355                     const struct trace_start_binning_ib *payload)
356 {
357    stage_start(pctx, ts_ns, BINNING_STAGE_ID);
358 }
359 
360 void
fd_end_binning_ib(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_end_binning_ib * payload)361 fd_end_binning_ib(struct pipe_context *pctx, uint64_t ts_ns,
362                   const void *flush_data,
363                   const struct trace_end_binning_ib *payload)
364 {
365    stage_end(pctx, ts_ns, BINNING_STAGE_ID);
366 }
367 
368 void
fd_start_draw_ib(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_start_draw_ib * payload)369 fd_start_draw_ib(struct pipe_context *pctx, uint64_t ts_ns,
370                  const void *flush_data,
371                  const struct trace_start_draw_ib *payload)
372 {
373    stage_start(
374       pctx, ts_ns,
375       fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID);
376 }
377 
378 void
fd_end_draw_ib(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_end_draw_ib * payload)379 fd_end_draw_ib(struct pipe_context *pctx, uint64_t ts_ns,
380                const void *flush_data,
381                const struct trace_end_draw_ib *payload)
382 {
383    stage_end(
384       pctx, ts_ns,
385       fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID);
386 }
387 
388 void
fd_start_blit(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_start_blit * payload)389 fd_start_blit(struct pipe_context *pctx, uint64_t ts_ns,
390               const void *flush_data,
391               const struct trace_start_blit *payload)
392 {
393    stage_start(pctx, ts_ns, BLIT_STAGE_ID);
394 }
395 
396 void
fd_end_blit(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_end_blit * payload)397 fd_end_blit(struct pipe_context *pctx, uint64_t ts_ns,
398             const void *flush_data,
399             const struct trace_end_blit *payload)
400 {
401    stage_end(pctx, ts_ns, BLIT_STAGE_ID);
402 }
403 
404 void
fd_start_compute(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_start_compute * payload)405 fd_start_compute(struct pipe_context *pctx, uint64_t ts_ns,
406                  const void *flush_data,
407                  const struct trace_start_compute *payload)
408 {
409    stage_start(pctx, ts_ns, COMPUTE_STAGE_ID);
410 }
411 
412 void
fd_end_compute(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_end_compute * payload)413 fd_end_compute(struct pipe_context *pctx, uint64_t ts_ns,
414                const void *flush_data,
415                const struct trace_end_compute *payload)
416 {
417    stage_end(pctx, ts_ns, COMPUTE_STAGE_ID);
418 }
419 
420 void
fd_start_clear_restore(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_start_clear_restore * payload)421 fd_start_clear_restore(struct pipe_context *pctx, uint64_t ts_ns,
422                        const void *flush_data,
423                        const struct trace_start_clear_restore *payload)
424 {
425    stage_start(pctx, ts_ns, CLEAR_RESTORE_STAGE_ID);
426 }
427 
428 void
fd_end_clear_restore(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_end_clear_restore * payload)429 fd_end_clear_restore(struct pipe_context *pctx, uint64_t ts_ns,
430                      const void *flush_data,
431                      const struct trace_end_clear_restore *payload)
432 {
433    stage_end(pctx, ts_ns, CLEAR_RESTORE_STAGE_ID);
434 }
435 
436 void
fd_start_resolve(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_start_resolve * payload)437 fd_start_resolve(struct pipe_context *pctx, uint64_t ts_ns,
438                  const void *flush_data,
439                  const struct trace_start_resolve *payload)
440 {
441    stage_start(pctx, ts_ns, RESOLVE_STAGE_ID);
442 }
443 
444 void
fd_end_resolve(struct pipe_context * pctx,uint64_t ts_ns,const void * flush_data,const struct trace_end_resolve * payload)445 fd_end_resolve(struct pipe_context *pctx, uint64_t ts_ns,
446                const void *flush_data,
447                const struct trace_end_resolve *payload)
448 {
449    stage_end(pctx, ts_ns, RESOLVE_STAGE_ID);
450 }
451 
452 #ifdef __cplusplus
453 }
454 #endif
455