1 /*
2  * Copyright © 2020 Google, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <inttypes.h>
25 
26 #include "util/list.h"
27 #include "util/u_debug.h"
28 #include "util/u_inlines.h"
29 #include "util/u_fifo.h"
30 #include "util/u_vector.h"
31 
32 #include "u_trace.h"
33 
34 #define __NEEDS_TRACE_PRIV
35 #include "u_trace_priv.h"
36 
37 #define PAYLOAD_BUFFER_SIZE 0x100
38 #define TIMESTAMP_BUF_SIZE 0x1000
39 #define TRACES_PER_CHUNK   (TIMESTAMP_BUF_SIZE / sizeof(uint64_t))
40 
41 #ifdef HAVE_PERFETTO
42 int ut_perfetto_enabled;
43 
44 /**
45  * Global list of contexts, so we can defer starting the queue until
46  * perfetto tracing is started.
47  *
48  * TODO locking
49  */
50 struct list_head ctx_list = { &ctx_list, &ctx_list };
51 #endif
52 
53 struct u_trace_payload_buf {
54    uint32_t refcount;
55 
56    uint8_t *buf;
57    uint8_t *next;
58    uint8_t *end;
59 };
60 
61 struct u_trace_event {
62    const struct u_tracepoint *tp;
63    const void *payload;
64 };
65 
66 /**
67  * A "chunk" of trace-events and corresponding timestamp buffer.  As
68  * trace events are emitted, additional trace chucks will be allocated
69  * as needed.  When u_trace_flush() is called, they are transferred
70  * from the u_trace to the u_trace_context queue.
71  */
72 struct u_trace_chunk {
73    struct list_head node;
74 
75    struct u_trace_context *utctx;
76 
77    /* The number of traces this chunk contains so far: */
78    unsigned num_traces;
79 
80    /* table of trace events: */
81    struct u_trace_event traces[TRACES_PER_CHUNK];
82 
83    /* table of driver recorded 64b timestamps, index matches index
84     * into traces table
85     */
86    void *timestamps;
87 
88    /* Array of u_trace_payload_buf referenced by traces[] elements.
89     */
90    struct u_vector payloads;
91 
92    /* Current payload buffer being written. */
93    struct u_trace_payload_buf *payload;
94 
95    struct util_queue_fence fence;
96 
97    bool last;          /* this chunk is last in batch */
98    bool eof;           /* this chunk is last in frame */
99 
100    void *flush_data; /* assigned by u_trace_flush */
101 
102    /**
103     * Several chunks reference a single flush_data instance thus only
104     * one chunk should be designated to free the data.
105     */
106    bool free_flush_data;
107 };
108 
109 static struct u_trace_payload_buf *
u_trace_payload_buf_create(void)110 u_trace_payload_buf_create(void)
111 {
112    struct u_trace_payload_buf *payload =
113       malloc(sizeof(*payload) + PAYLOAD_BUFFER_SIZE);
114 
115    p_atomic_set(&payload->refcount, 1);
116 
117    payload->buf = (uint8_t *) (payload + 1);
118    payload->end = payload->buf + PAYLOAD_BUFFER_SIZE;
119    payload->next = payload->buf;
120 
121    return payload;
122 }
123 
124 static struct u_trace_payload_buf *
u_trace_payload_buf_ref(struct u_trace_payload_buf * payload)125 u_trace_payload_buf_ref(struct u_trace_payload_buf *payload)
126 {
127    p_atomic_inc(&payload->refcount);
128    return payload;
129 }
130 
131 static void
u_trace_payload_buf_unref(struct u_trace_payload_buf * payload)132 u_trace_payload_buf_unref(struct u_trace_payload_buf *payload)
133 {
134    if (p_atomic_dec_zero(&payload->refcount))
135       free(payload);
136 }
137 
138 static void
free_chunk(void * ptr)139 free_chunk(void *ptr)
140 {
141    struct u_trace_chunk *chunk = ptr;
142 
143    chunk->utctx->delete_timestamp_buffer(chunk->utctx, chunk->timestamps);
144 
145    /* Unref payloads attached to this chunk. */
146    struct u_trace_payload_buf **payload;
147    u_vector_foreach(payload, &chunk->payloads)
148       u_trace_payload_buf_unref(*payload);
149    u_vector_finish(&chunk->payloads);
150 
151    list_del(&chunk->node);
152    free(chunk);
153 }
154 
155 static void
free_chunks(struct list_head * chunks)156 free_chunks(struct list_head *chunks)
157 {
158    while (!list_is_empty(chunks)) {
159       struct u_trace_chunk *chunk = list_first_entry(chunks,
160             struct u_trace_chunk, node);
161       free_chunk(chunk);
162    }
163 }
164 
165 static struct u_trace_chunk *
get_chunk(struct u_trace * ut,size_t payload_size)166 get_chunk(struct u_trace *ut, size_t payload_size)
167 {
168    struct u_trace_chunk *chunk;
169 
170    assert(payload_size <= PAYLOAD_BUFFER_SIZE);
171 
172    /* do we currently have a non-full chunk to append msgs to? */
173    if (!list_is_empty(&ut->trace_chunks)) {
174            chunk = list_last_entry(&ut->trace_chunks,
175                            struct u_trace_chunk, node);
176            /* Can we store a new trace in the chunk? */
177            if (chunk->num_traces < TRACES_PER_CHUNK) {
178               /* If no payload required, nothing else to check. */
179               if (payload_size <= 0)
180                  return chunk;
181 
182               /* If the payload buffer has space for the payload, we're good.
183                */
184               if (chunk->payload &&
185                   (chunk->payload->end - chunk->payload->next) >= payload_size)
186                  return chunk;
187 
188               /* If we don't have enough space in the payload buffer, can we
189                * allocate a new one?
190                */
191               struct u_trace_payload_buf **buf = u_vector_add(&chunk->payloads);
192               *buf = u_trace_payload_buf_create();
193               chunk->payload = *buf;
194               return chunk;
195            }
196            /* we need to expand to add another chunk to the batch, so
197             * the current one is no longer the last one of the batch:
198             */
199            chunk->last = false;
200    }
201 
202    /* .. if not, then create a new one: */
203    chunk = calloc(1, sizeof(*chunk));
204 
205    chunk->utctx = ut->utctx;
206    chunk->timestamps = ut->utctx->create_timestamp_buffer(ut->utctx, TIMESTAMP_BUF_SIZE);
207    chunk->last = true;
208    u_vector_init(&chunk->payloads, 4, sizeof(struct u_trace_payload_buf *));
209    if (payload_size > 0) {
210       struct u_trace_payload_buf **buf = u_vector_add(&chunk->payloads);
211       *buf = u_trace_payload_buf_create();
212       chunk->payload = *buf;
213    }
214 
215    list_addtail(&chunk->node, &ut->trace_chunks);
216 
217    return chunk;
218 }
219 
220 DEBUG_GET_ONCE_BOOL_OPTION(trace, "GPU_TRACE", false)
221 DEBUG_GET_ONCE_FILE_OPTION(trace_file, "GPU_TRACEFILE", NULL, "w")
222 
223 static FILE *
get_tracefile(void)224 get_tracefile(void)
225 {
226    static FILE *tracefile = NULL;
227    static bool firsttime = true;
228 
229    if (firsttime) {
230       tracefile = debug_get_option_trace_file();
231       if (!tracefile && debug_get_option_trace()) {
232          tracefile = stdout;
233       }
234 
235       firsttime = false;
236    }
237 
238    return tracefile;
239 }
240 
241 static void
queue_init(struct u_trace_context * utctx)242 queue_init(struct u_trace_context *utctx)
243 {
244    if (utctx->queue.jobs)
245       return;
246 
247    bool ret = util_queue_init(&utctx->queue, "traceq", 256, 1,
248                               UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY |
249                               UTIL_QUEUE_INIT_RESIZE_IF_FULL, NULL);
250    assert(ret);
251 
252    if (!ret)
253       utctx->out = NULL;
254 }
255 
256 void
u_trace_context_init(struct u_trace_context * utctx,void * pctx,u_trace_create_ts_buffer create_timestamp_buffer,u_trace_delete_ts_buffer delete_timestamp_buffer,u_trace_record_ts record_timestamp,u_trace_read_ts read_timestamp,u_trace_delete_flush_data delete_flush_data)257 u_trace_context_init(struct u_trace_context *utctx,
258       void *pctx,
259       u_trace_create_ts_buffer  create_timestamp_buffer,
260       u_trace_delete_ts_buffer  delete_timestamp_buffer,
261       u_trace_record_ts         record_timestamp,
262       u_trace_read_ts           read_timestamp,
263       u_trace_delete_flush_data delete_flush_data)
264 {
265    utctx->pctx = pctx;
266    utctx->create_timestamp_buffer = create_timestamp_buffer;
267    utctx->delete_timestamp_buffer = delete_timestamp_buffer;
268    utctx->record_timestamp = record_timestamp;
269    utctx->read_timestamp = read_timestamp;
270    utctx->delete_flush_data = delete_flush_data;
271 
272    utctx->last_time_ns = 0;
273    utctx->first_time_ns = 0;
274    utctx->frame_nr = 0;
275 
276    list_inithead(&utctx->flushed_trace_chunks);
277 
278    utctx->out = get_tracefile();
279 
280 #ifdef HAVE_PERFETTO
281    list_add(&utctx->node, &ctx_list);
282 #endif
283 
284    if (!u_trace_context_tracing(utctx))
285       return;
286 
287    queue_init(utctx);
288 }
289 
290 void
u_trace_context_fini(struct u_trace_context * utctx)291 u_trace_context_fini(struct u_trace_context *utctx)
292 {
293 #ifdef HAVE_PERFETTO
294    list_del(&utctx->node);
295 #endif
296    if (!utctx->queue.jobs)
297       return;
298    util_queue_finish(&utctx->queue);
299    util_queue_destroy(&utctx->queue);
300    fflush(utctx->out);
301    free_chunks(&utctx->flushed_trace_chunks);
302 }
303 
304 #ifdef HAVE_PERFETTO
305 void
u_trace_perfetto_start(void)306 u_trace_perfetto_start(void)
307 {
308    list_for_each_entry (struct u_trace_context, utctx, &ctx_list, node)
309       queue_init(utctx);
310    ut_perfetto_enabled++;
311 }
312 
313 void
u_trace_perfetto_stop(void)314 u_trace_perfetto_stop(void)
315 {
316    assert(ut_perfetto_enabled > 0);
317    ut_perfetto_enabled--;
318 }
319 #endif
320 
321 static void
process_chunk(void * job,void * gdata,int thread_index)322 process_chunk(void *job, void *gdata, int thread_index)
323 {
324    struct u_trace_chunk *chunk = job;
325    struct u_trace_context *utctx = chunk->utctx;
326 
327    /* For first chunk of batch, accumulated times will be zerod: */
328    if (utctx->out && !utctx->last_time_ns) {
329       fprintf(utctx->out, "+----- NS -----+ +-- Δ --+  +----- MSG -----\n");
330    }
331 
332    for (unsigned idx = 0; idx < chunk->num_traces; idx++) {
333       const struct u_trace_event *evt = &chunk->traces[idx];
334 
335       if (!evt->tp)
336          continue;
337 
338       uint64_t ns = utctx->read_timestamp(utctx, chunk->timestamps, idx, chunk->flush_data);
339       int32_t delta;
340 
341       if (!utctx->first_time_ns)
342          utctx->first_time_ns = ns;
343 
344       if (ns != U_TRACE_NO_TIMESTAMP) {
345          delta = utctx->last_time_ns ? ns - utctx->last_time_ns : 0;
346          utctx->last_time_ns = ns;
347       } else {
348          /* we skipped recording the timestamp, so it should be
349           * the same as last msg:
350           */
351          ns = utctx->last_time_ns;
352          delta = 0;
353       }
354 
355       if (utctx->out) {
356          if (evt->tp->print) {
357             fprintf(utctx->out, "%016"PRIu64" %+9d: %s: ", ns, delta, evt->tp->name);
358             evt->tp->print(utctx->out, evt->payload);
359          } else {
360             fprintf(utctx->out, "%016"PRIu64" %+9d: %s\n", ns, delta, evt->tp->name);
361          }
362       }
363 #ifdef HAVE_PERFETTO
364       if (evt->tp->perfetto) {
365          evt->tp->perfetto(utctx->pctx, ns, chunk->flush_data, evt->payload);
366       }
367 #endif
368    }
369 
370    if (chunk->last) {
371       if (utctx->out) {
372          uint64_t elapsed = utctx->last_time_ns - utctx->first_time_ns;
373          fprintf(utctx->out, "ELAPSED: %"PRIu64" ns\n", elapsed);
374       }
375 
376       utctx->last_time_ns = 0;
377       utctx->first_time_ns = 0;
378    }
379 
380    if (chunk->free_flush_data && utctx->delete_flush_data) {
381       utctx->delete_flush_data(utctx, chunk->flush_data);
382    }
383 
384    if (utctx->out && chunk->eof) {
385       fprintf(utctx->out, "END OF FRAME %u\n", utctx->frame_nr++);
386    }
387 }
388 
389 static void
cleanup_chunk(void * job,void * gdata,int thread_index)390 cleanup_chunk(void *job, void *gdata, int thread_index)
391 {
392    free_chunk(job);
393 }
394 
395 void
u_trace_context_process(struct u_trace_context * utctx,bool eof)396 u_trace_context_process(struct u_trace_context *utctx, bool eof)
397 {
398    struct list_head *chunks = &utctx->flushed_trace_chunks;
399 
400    if (list_is_empty(chunks))
401       return;
402 
403    struct u_trace_chunk *last_chunk = list_last_entry(chunks,
404             struct u_trace_chunk, node);
405    last_chunk->eof = eof;
406 
407    while (!list_is_empty(chunks)) {
408       struct u_trace_chunk *chunk = list_first_entry(chunks,
409             struct u_trace_chunk, node);
410 
411       /* remove from list before enqueuing, because chunk is freed
412        * once it is processed by the queue:
413        */
414       list_delinit(&chunk->node);
415 
416       util_queue_add_job(&utctx->queue, chunk, &chunk->fence,
417             process_chunk, cleanup_chunk,
418             TIMESTAMP_BUF_SIZE);
419    }
420 }
421 
422 
423 void
u_trace_init(struct u_trace * ut,struct u_trace_context * utctx)424 u_trace_init(struct u_trace *ut, struct u_trace_context *utctx)
425 {
426    ut->utctx = utctx;
427    list_inithead(&ut->trace_chunks);
428    ut->enabled = u_trace_context_tracing(utctx);
429 }
430 
431 void
u_trace_fini(struct u_trace * ut)432 u_trace_fini(struct u_trace *ut)
433 {
434    /* Normally the list of trace-chunks would be empty, if they
435     * have been flushed to the trace-context.
436     */
437    free_chunks(&ut->trace_chunks);
438 }
439 
440 bool
u_trace_has_points(struct u_trace * ut)441 u_trace_has_points(struct u_trace *ut)
442 {
443    return !list_is_empty(&ut->trace_chunks);
444 }
445 
446 struct u_trace_iterator
u_trace_begin_iterator(struct u_trace * ut)447 u_trace_begin_iterator(struct u_trace *ut)
448 {
449    if (!ut->enabled)
450       return (struct u_trace_iterator) {NULL, NULL, 0};
451 
452    struct u_trace_chunk *first_chunk =
453       list_first_entry(&ut->trace_chunks, struct u_trace_chunk, node);
454 
455    return (struct u_trace_iterator) { ut, first_chunk, 0};
456 }
457 
458 struct u_trace_iterator
u_trace_end_iterator(struct u_trace * ut)459 u_trace_end_iterator(struct u_trace *ut)
460 {
461    if (!ut->enabled)
462       return (struct u_trace_iterator) {NULL, NULL, 0};
463 
464    struct u_trace_chunk *last_chunk =
465       list_last_entry(&ut->trace_chunks, struct u_trace_chunk, node);
466 
467    return (struct u_trace_iterator) { ut, last_chunk, last_chunk->num_traces};
468 }
469 
470 bool
u_trace_iterator_equal(struct u_trace_iterator a,struct u_trace_iterator b)471 u_trace_iterator_equal(struct u_trace_iterator a,
472                        struct u_trace_iterator b)
473 {
474    return a.ut == b.ut &&
475           a.chunk == b.chunk &&
476           a.event_idx == b.event_idx;
477 }
478 
479 void
u_trace_clone_append(struct u_trace_iterator begin_it,struct u_trace_iterator end_it,struct u_trace * into,void * cmdstream,u_trace_copy_ts_buffer copy_ts_buffer)480 u_trace_clone_append(struct u_trace_iterator begin_it,
481                      struct u_trace_iterator end_it,
482                      struct u_trace *into,
483                      void *cmdstream,
484                      u_trace_copy_ts_buffer copy_ts_buffer)
485 {
486    struct u_trace_chunk *from_chunk = begin_it.chunk;
487    uint32_t from_idx = begin_it.event_idx;
488 
489    while (from_chunk != end_it.chunk || from_idx != end_it.event_idx) {
490       struct u_trace_chunk *to_chunk = get_chunk(into, 0 /* payload_size */);
491 
492       unsigned to_copy = MIN2(TRACES_PER_CHUNK - to_chunk->num_traces,
493                               from_chunk->num_traces - from_idx);
494       if (from_chunk == end_it.chunk)
495          to_copy = MIN2(to_copy, end_it.event_idx - from_idx);
496 
497       copy_ts_buffer(begin_it.ut->utctx, cmdstream,
498                      from_chunk->timestamps, from_idx,
499                      to_chunk->timestamps, to_chunk->num_traces,
500                      to_copy);
501 
502       memcpy(&to_chunk->traces[to_chunk->num_traces],
503              &from_chunk->traces[from_idx],
504              to_copy * sizeof(struct u_trace_event));
505 
506       /* Take a refcount on payloads from from_chunk if needed. */
507       if (begin_it.ut != into) {
508          struct u_trace_payload_buf **in_payload;
509          u_vector_foreach(in_payload, &from_chunk->payloads) {
510             struct u_trace_payload_buf **out_payload =
511                u_vector_add(&to_chunk->payloads);
512 
513             *out_payload = u_trace_payload_buf_ref(*in_payload);
514          }
515       }
516 
517       to_chunk->num_traces += to_copy;
518       from_idx += to_copy;
519 
520       assert(from_idx <= from_chunk->num_traces);
521       if (from_idx == from_chunk->num_traces) {
522          if (from_chunk == end_it.chunk)
523             break;
524 
525          from_idx = 0;
526          from_chunk = LIST_ENTRY(struct u_trace_chunk, from_chunk->node.next, node);
527       }
528    }
529 }
530 
531 void
u_trace_disable_event_range(struct u_trace_iterator begin_it,struct u_trace_iterator end_it)532 u_trace_disable_event_range(struct u_trace_iterator begin_it,
533                             struct u_trace_iterator end_it)
534 {
535    struct u_trace_chunk *current_chunk = begin_it.chunk;
536    uint32_t start_idx = begin_it.event_idx;
537 
538    while(current_chunk != end_it.chunk) {
539       memset(&current_chunk->traces[start_idx], 0,
540              (current_chunk->num_traces - start_idx) * sizeof(struct u_trace_event));
541       start_idx = 0;
542       current_chunk = LIST_ENTRY(struct u_trace_chunk, current_chunk->node.next, node);
543    }
544 
545    memset(&current_chunk->traces[start_idx], 0,
546           (end_it.event_idx - start_idx) * sizeof(struct u_trace_event));
547 }
548 
549 /**
550  * Append a trace event, returning pointer to buffer of tp->payload_sz
551  * to be filled in with trace payload.  Called by generated tracepoint
552  * functions.
553  */
554 void *
u_trace_append(struct u_trace * ut,void * cs,const struct u_tracepoint * tp)555 u_trace_append(struct u_trace *ut, void *cs, const struct u_tracepoint *tp)
556 {
557    struct u_trace_chunk *chunk = get_chunk(ut, tp->payload_sz);
558 
559    assert(tp->payload_sz == ALIGN_NPOT(tp->payload_sz, 8));
560 
561    /* sub-allocate storage for trace payload: */
562    void *payload = NULL;
563    if (tp->payload_sz > 0) {
564       payload = chunk->payload->next;
565       chunk->payload->next += tp->payload_sz;
566    }
567 
568    /* record a timestamp for the trace: */
569    ut->utctx->record_timestamp(ut, cs, chunk->timestamps, chunk->num_traces);
570 
571    chunk->traces[chunk->num_traces] = (struct u_trace_event) {
572          .tp = tp,
573          .payload = payload,
574    };
575 
576    chunk->num_traces++;
577 
578    return payload;
579 }
580 
581 void
u_trace_flush(struct u_trace * ut,void * flush_data,bool free_data)582 u_trace_flush(struct u_trace *ut, void *flush_data, bool free_data)
583 {
584    list_for_each_entry(struct u_trace_chunk, chunk, &ut->trace_chunks, node) {
585       chunk->flush_data = flush_data;
586       chunk->free_flush_data = false;
587    }
588 
589    if (free_data && !list_is_empty(&ut->trace_chunks)) {
590       struct u_trace_chunk *last_chunk =
591          list_last_entry(&ut->trace_chunks, struct u_trace_chunk, node);
592       last_chunk->free_flush_data = true;
593    }
594 
595    /* transfer batch's log chunks to context: */
596    list_splicetail(&ut->trace_chunks, &ut->utctx->flushed_trace_chunks);
597    list_inithead(&ut->trace_chunks);
598 }
599