1 #include <inttypes.h>
2 #include "profiler.h"
3
4 #include "darray.h"
5 #include "dstr.h"
6 #include "platform.h"
7 #include "threading.h"
8
9 #include <math.h>
10
11 #include <zlib.h>
12
13 //#define TRACK_OVERHEAD
14
15 struct profiler_snapshot {
16 DARRAY(profiler_snapshot_entry_t) roots;
17 };
18
19 struct profiler_snapshot_entry {
20 const char *name;
21 profiler_time_entries_t times;
22 uint64_t min_time;
23 uint64_t max_time;
24 uint64_t overall_count;
25 profiler_time_entries_t times_between_calls;
26 uint64_t expected_time_between_calls;
27 uint64_t min_time_between_calls;
28 uint64_t max_time_between_calls;
29 uint64_t overall_between_calls_count;
30 DARRAY(profiler_snapshot_entry_t) children;
31 };
32
33 typedef struct profiler_time_entry profiler_time_entry;
34
35 typedef struct profile_call profile_call;
36 struct profile_call {
37 const char *name;
38 #ifdef TRACK_OVERHEAD
39 uint64_t overhead_start;
40 #endif
41 uint64_t start_time;
42 uint64_t end_time;
43 #ifdef TRACK_OVERHEAD
44 uint64_t overhead_end;
45 #endif
46 uint64_t expected_time_between_calls;
47 DARRAY(profile_call) children;
48 profile_call *parent;
49 };
50
51 typedef struct profile_times_table_entry profile_times_table_entry;
52 struct profile_times_table_entry {
53 size_t probes;
54 profiler_time_entry entry;
55 };
56
57 typedef struct profile_times_table profile_times_table;
58 struct profile_times_table {
59 size_t size;
60 size_t occupied;
61 size_t max_probe_count;
62 profile_times_table_entry *entries;
63
64 size_t old_start_index;
65 size_t old_occupied;
66 profile_times_table_entry *old_entries;
67 };
68
69 typedef struct profile_entry profile_entry;
70 struct profile_entry {
71 const char *name;
72 profile_times_table times;
73 #ifdef TRACK_OVERHEAD
74 profile_times_table overhead;
75 #endif
76 uint64_t expected_time_between_calls;
77 profile_times_table times_between_calls;
78 DARRAY(profile_entry) children;
79 };
80
81 typedef struct profile_root_entry profile_root_entry;
82 struct profile_root_entry {
83 pthread_mutex_t *mutex;
84 const char *name;
85 profile_entry *entry;
86 profile_call *prev_call;
87 };
88
diff_ns_to_usec(uint64_t prev,uint64_t next)89 static inline uint64_t diff_ns_to_usec(uint64_t prev, uint64_t next)
90 {
91 return (next - prev + 500) / 1000;
92 }
93
update_max_probes(profile_times_table * map,size_t val)94 static inline void update_max_probes(profile_times_table *map, size_t val)
95 {
96 map->max_probe_count =
97 map->max_probe_count < val ? val : map->max_probe_count;
98 }
99
100 static void migrate_old_entries(profile_times_table *map, bool limit_items);
101 static void grow_hashmap(profile_times_table *map, uint64_t usec,
102 uint64_t count);
103
add_hashmap_entry(profile_times_table * map,uint64_t usec,uint64_t count)104 static void add_hashmap_entry(profile_times_table *map, uint64_t usec,
105 uint64_t count)
106 {
107 size_t probes = 1;
108
109 size_t start = usec % map->size;
110 for (;; probes += 1) {
111 size_t idx = (start + probes) % map->size;
112 profile_times_table_entry *entry = &map->entries[idx];
113 if (!entry->probes) {
114 entry->probes = probes;
115 entry->entry.time_delta = usec;
116 entry->entry.count = count;
117 map->occupied += 1;
118 update_max_probes(map, probes);
119 return;
120 }
121
122 if (entry->entry.time_delta == usec) {
123 entry->entry.count += count;
124 return;
125 }
126
127 if (entry->probes >= probes)
128 continue;
129
130 if (map->occupied / (double)map->size > 0.7) {
131 grow_hashmap(map, usec, count);
132 return;
133 }
134
135 size_t old_probes = entry->probes;
136 uint64_t old_count = entry->entry.count;
137 uint64_t old_usec = entry->entry.time_delta;
138
139 entry->probes = probes;
140 entry->entry.count = count;
141 entry->entry.time_delta = usec;
142
143 update_max_probes(map, probes);
144
145 probes = old_probes;
146 count = old_count;
147 usec = old_usec;
148
149 start = usec % map->size;
150 }
151 }
152
init_hashmap(profile_times_table * map,size_t size)153 static void init_hashmap(profile_times_table *map, size_t size)
154 {
155 map->size = size;
156 map->occupied = 0;
157 map->max_probe_count = 0;
158 map->entries = bzalloc(sizeof(profile_times_table_entry) * size);
159 map->old_start_index = 0;
160 map->old_occupied = 0;
161 map->old_entries = NULL;
162 }
163
migrate_old_entries(profile_times_table * map,bool limit_items)164 static void migrate_old_entries(profile_times_table *map, bool limit_items)
165 {
166 if (!map->old_entries)
167 return;
168
169 if (!map->old_occupied) {
170 bfree(map->old_entries);
171 map->old_entries = NULL;
172 return;
173 }
174
175 for (size_t i = 0; !limit_items || i < 8; i++, map->old_start_index++) {
176 if (!map->old_occupied)
177 return;
178
179 profile_times_table_entry *entry =
180 &map->old_entries[map->old_start_index];
181 if (!entry->probes)
182 continue;
183
184 add_hashmap_entry(map, entry->entry.time_delta,
185 entry->entry.count);
186 map->old_occupied -= 1;
187 }
188 }
189
grow_hashmap(profile_times_table * map,uint64_t usec,uint64_t count)190 static void grow_hashmap(profile_times_table *map, uint64_t usec,
191 uint64_t count)
192 {
193 migrate_old_entries(map, false);
194
195 size_t old_size = map->size;
196 size_t old_occupied = map->occupied;
197 profile_times_table_entry *entries = map->entries;
198
199 init_hashmap(map, (old_size * 2 < 16) ? 16 : (old_size * 2));
200
201 map->old_occupied = old_occupied;
202 map->old_entries = entries;
203
204 add_hashmap_entry(map, usec, count);
205 }
206
init_entry(profile_entry * entry,const char * name)207 static profile_entry *init_entry(profile_entry *entry, const char *name)
208 {
209 entry->name = name;
210 init_hashmap(&entry->times, 1);
211 #ifdef TRACK_OVERHEAD
212 init_hashmap(&entry->overhead, 1);
213 #endif
214 entry->expected_time_between_calls = 0;
215 init_hashmap(&entry->times_between_calls, 1);
216 return entry;
217 }
218
get_child(profile_entry * parent,const char * name)219 static profile_entry *get_child(profile_entry *parent, const char *name)
220 {
221 const size_t num = parent->children.num;
222 for (size_t i = 0; i < num; i++) {
223 profile_entry *child = &parent->children.array[i];
224 if (child->name == name)
225 return child;
226 }
227
228 return init_entry(da_push_back_new(parent->children), name);
229 }
230
merge_call(profile_entry * entry,profile_call * call,profile_call * prev_call)231 static void merge_call(profile_entry *entry, profile_call *call,
232 profile_call *prev_call)
233 {
234 const size_t num = call->children.num;
235 for (size_t i = 0; i < num; i++) {
236 profile_call *child = &call->children.array[i];
237 merge_call(get_child(entry, child->name), child, NULL);
238 }
239
240 if (entry->expected_time_between_calls != 0 && prev_call) {
241 migrate_old_entries(&entry->times_between_calls, true);
242 uint64_t usec = diff_ns_to_usec(prev_call->start_time,
243 call->start_time);
244 add_hashmap_entry(&entry->times_between_calls, usec, 1);
245 }
246
247 migrate_old_entries(&entry->times, true);
248 uint64_t usec = diff_ns_to_usec(call->start_time, call->end_time);
249 add_hashmap_entry(&entry->times, usec, 1);
250
251 #ifdef TRACK_OVERHEAD
252 migrate_old_entries(&entry->overhead, true);
253 usec = diff_ns_to_usec(call->overhead_start, call->start_time);
254 usec += diff_ns_to_usec(call->end_time, call->overhead_end);
255 add_hashmap_entry(&entry->overhead, usec, 1);
256 #endif
257 }
258
259 static bool enabled = false;
260 static pthread_mutex_t root_mutex = PTHREAD_MUTEX_INITIALIZER;
261 static DARRAY(profile_root_entry) root_entries;
262
263 static THREAD_LOCAL profile_call *thread_context = NULL;
264 static THREAD_LOCAL bool thread_enabled = true;
265
profiler_start(void)266 void profiler_start(void)
267 {
268 pthread_mutex_lock(&root_mutex);
269 enabled = true;
270 pthread_mutex_unlock(&root_mutex);
271 }
272
profiler_stop(void)273 void profiler_stop(void)
274 {
275 pthread_mutex_lock(&root_mutex);
276 enabled = false;
277 pthread_mutex_unlock(&root_mutex);
278 }
279
profile_reenable_thread(void)280 void profile_reenable_thread(void)
281 {
282 if (thread_enabled)
283 return;
284
285 pthread_mutex_lock(&root_mutex);
286 thread_enabled = enabled;
287 pthread_mutex_unlock(&root_mutex);
288 }
289
lock_root(void)290 static bool lock_root(void)
291 {
292 pthread_mutex_lock(&root_mutex);
293 if (!enabled) {
294 pthread_mutex_unlock(&root_mutex);
295 thread_enabled = false;
296 return false;
297 }
298
299 return true;
300 }
301
get_root_entry(const char * name)302 static profile_root_entry *get_root_entry(const char *name)
303 {
304 profile_root_entry *r_entry = NULL;
305
306 for (size_t i = 0; i < root_entries.num; i++) {
307 if (root_entries.array[i].name == name) {
308 r_entry = &root_entries.array[i];
309 break;
310 }
311 }
312
313 if (!r_entry) {
314 r_entry = da_push_back_new(root_entries);
315 r_entry->mutex = bmalloc(sizeof(pthread_mutex_t));
316 pthread_mutex_init(r_entry->mutex, NULL);
317
318 r_entry->name = name;
319 r_entry->entry = bzalloc(sizeof(profile_entry));
320 init_entry(r_entry->entry, name);
321 }
322
323 return r_entry;
324 }
325
profile_register_root(const char * name,uint64_t expected_time_between_calls)326 void profile_register_root(const char *name,
327 uint64_t expected_time_between_calls)
328 {
329 if (!lock_root())
330 return;
331
332 get_root_entry(name)->entry->expected_time_between_calls =
333 (expected_time_between_calls + 500) / 1000;
334 pthread_mutex_unlock(&root_mutex);
335 }
336
337 static void free_call_context(profile_call *context);
338
merge_context(profile_call * context)339 static void merge_context(profile_call *context)
340 {
341 pthread_mutex_t *mutex = NULL;
342 profile_entry *entry = NULL;
343 profile_call *prev_call = NULL;
344
345 if (!lock_root()) {
346 free_call_context(context);
347 return;
348 }
349
350 profile_root_entry *r_entry = get_root_entry(context->name);
351
352 mutex = r_entry->mutex;
353 entry = r_entry->entry;
354 prev_call = r_entry->prev_call;
355
356 r_entry->prev_call = context;
357
358 pthread_mutex_lock(mutex);
359 pthread_mutex_unlock(&root_mutex);
360
361 merge_call(entry, context, prev_call);
362
363 pthread_mutex_unlock(mutex);
364
365 free_call_context(prev_call);
366 }
367
profile_start(const char * name)368 void profile_start(const char *name)
369 {
370 if (!thread_enabled)
371 return;
372
373 profile_call new_call = {
374 .name = name,
375 #ifdef TRACK_OVERHEAD
376 .overhead_start = os_gettime_ns(),
377 #endif
378 .parent = thread_context,
379 };
380
381 profile_call *call = NULL;
382
383 if (new_call.parent) {
384 size_t idx = da_push_back(new_call.parent->children, &new_call);
385 call = &new_call.parent->children.array[idx];
386 } else {
387 call = bmalloc(sizeof(profile_call));
388 memcpy(call, &new_call, sizeof(profile_call));
389 }
390
391 thread_context = call;
392 call->start_time = os_gettime_ns();
393 }
394
profile_end(const char * name)395 void profile_end(const char *name)
396 {
397 uint64_t end = os_gettime_ns();
398 if (!thread_enabled)
399 return;
400
401 profile_call *call = thread_context;
402 if (!call) {
403 blog(LOG_ERROR, "Called profile end with no active profile");
404 return;
405 }
406
407 if (!call->name)
408 call->name = name;
409
410 if (call->name != name) {
411 blog(LOG_ERROR,
412 "Called profile end with mismatching name: "
413 "start(\"%s\"[%p]) <-> end(\"%s\"[%p])",
414 call->name, call->name, name, name);
415
416 profile_call *parent = call->parent;
417 while (parent && parent->parent && parent->name != name)
418 parent = parent->parent;
419
420 if (!parent || parent->name != name)
421 return;
422
423 while (call->name != name) {
424 profile_end(call->name);
425 call = call->parent;
426 }
427 }
428
429 thread_context = call->parent;
430
431 call->end_time = end;
432 #ifdef TRACK_OVERHEAD
433 call->overhead_end = os_gettime_ns();
434 #endif
435
436 if (call->parent)
437 return;
438
439 merge_context(call);
440 }
441
profiler_time_entry_compare(const void * first,const void * second)442 static int profiler_time_entry_compare(const void *first, const void *second)
443 {
444 int64_t diff = ((profiler_time_entry *)second)->time_delta -
445 ((profiler_time_entry *)first)->time_delta;
446 return diff < 0 ? -1 : (diff > 0 ? 1 : 0);
447 }
448
copy_map_to_array(profile_times_table * map,profiler_time_entries_t * entry_buffer,uint64_t * min_,uint64_t * max_)449 static uint64_t copy_map_to_array(profile_times_table *map,
450 profiler_time_entries_t *entry_buffer,
451 uint64_t *min_, uint64_t *max_)
452 {
453 migrate_old_entries(map, false);
454
455 da_reserve((*entry_buffer), map->occupied);
456 da_resize((*entry_buffer), 0);
457
458 uint64_t min__ = ~(uint64_t)0;
459 uint64_t max__ = 0;
460
461 uint64_t calls = 0;
462 for (size_t i = 0; i < map->size; i++) {
463 if (!map->entries[i].probes)
464 continue;
465
466 profiler_time_entry *entry = &map->entries[i].entry;
467
468 da_push_back((*entry_buffer), entry);
469
470 calls += entry->count;
471 min__ = (min__ < entry->time_delta) ? min__ : entry->time_delta;
472 max__ = (max__ > entry->time_delta) ? max__ : entry->time_delta;
473 }
474
475 if (min_)
476 *min_ = min__;
477 if (max_)
478 *max_ = max__;
479
480 return calls;
481 }
482
483 typedef void (*profile_entry_print_func)(profiler_snapshot_entry_t *entry,
484 struct dstr *indent_buffer,
485 struct dstr *output_buffer,
486 unsigned indent, uint64_t active,
487 uint64_t parent_calls);
488
489 /* UTF-8 characters */
490 #define VPIPE_RIGHT " \xe2\x94\xa3"
491 #define VPIPE " \xe2\x94\x83"
492 #define DOWN_RIGHT " \xe2\x94\x97"
493
make_indent_string(struct dstr * indent_buffer,unsigned indent,uint64_t active)494 static void make_indent_string(struct dstr *indent_buffer, unsigned indent,
495 uint64_t active)
496 {
497 indent_buffer->len = 0;
498
499 if (!indent) {
500 dstr_cat_ch(indent_buffer, 0);
501 return;
502 }
503
504 for (size_t i = 0; i < indent; i++) {
505 const char *fragment = "";
506 bool last = i + 1 == indent;
507 if (active & ((uint64_t)1 << i))
508 fragment = last ? VPIPE_RIGHT : VPIPE;
509 else
510 fragment = last ? DOWN_RIGHT : " ";
511
512 dstr_cat(indent_buffer, fragment);
513 }
514 }
515
gather_stats(uint64_t expected_time_between_calls,profiler_time_entries_t * entries,uint64_t calls,uint64_t * percentile99,uint64_t * median,double * percent_within_bounds)516 static void gather_stats(uint64_t expected_time_between_calls,
517 profiler_time_entries_t *entries, uint64_t calls,
518 uint64_t *percentile99, uint64_t *median,
519 double *percent_within_bounds)
520 {
521 if (!entries->num) {
522 *percentile99 = 0;
523 *median = 0;
524 *percent_within_bounds = 0.;
525 return;
526 }
527
528 /*if (entry_buffer->num > 2)
529 blog(LOG_INFO, "buffer-size %lu, overall count %llu\n"
530 "map-size %lu, occupied %lu, probes %lu",
531 entry_buffer->num, calls,
532 map->size, map->occupied,
533 map->max_probe_count);*/
534 uint64_t accu = 0;
535 for (size_t i = 0; i < entries->num; i++) {
536 uint64_t old_accu = accu;
537 accu += entries->array[i].count;
538
539 if (old_accu < calls * 0.01 && accu >= calls * 0.01)
540 *percentile99 = entries->array[i].time_delta;
541 else if (old_accu < calls * 0.5 && accu >= calls * 0.5) {
542 *median = entries->array[i].time_delta;
543 break;
544 }
545 }
546
547 *percent_within_bounds = 0.;
548 if (!expected_time_between_calls)
549 return;
550
551 accu = 0;
552 for (size_t i = 0; i < entries->num; i++) {
553 profiler_time_entry *entry = &entries->array[i];
554 if (entry->time_delta < expected_time_between_calls)
555 break;
556
557 accu += entry->count;
558 }
559 *percent_within_bounds = (1. - (double)accu / calls) * 100;
560 }
561
562 #define G_MS "g\xC2\xA0ms"
563
profile_print_entry(profiler_snapshot_entry_t * entry,struct dstr * indent_buffer,struct dstr * output_buffer,unsigned indent,uint64_t active,uint64_t parent_calls)564 static void profile_print_entry(profiler_snapshot_entry_t *entry,
565 struct dstr *indent_buffer,
566 struct dstr *output_buffer, unsigned indent,
567 uint64_t active, uint64_t parent_calls)
568 {
569 uint64_t calls = entry->overall_count;
570 uint64_t min_ = entry->min_time;
571 uint64_t max_ = entry->max_time;
572 uint64_t percentile99 = 0;
573 uint64_t median = 0;
574 double percent_within_bounds = 0.;
575 gather_stats(entry->expected_time_between_calls, &entry->times, calls,
576 &percentile99, &median, &percent_within_bounds);
577
578 make_indent_string(indent_buffer, indent, active);
579
580 if (min_ == max_) {
581 dstr_printf(output_buffer, "%s%s: %" G_MS, indent_buffer->array,
582 entry->name, min_ / 1000.);
583 } else {
584 dstr_printf(output_buffer,
585 "%s%s: min=%" G_MS ", median=%" G_MS ", "
586 "max=%" G_MS ", 99th percentile=%" G_MS,
587 indent_buffer->array, entry->name, min_ / 1000.,
588 median / 1000., max_ / 1000., percentile99 / 1000.);
589
590 if (entry->expected_time_between_calls) {
591 double expected_ms =
592 entry->expected_time_between_calls / 1000.;
593 dstr_catf(output_buffer, ", %g%% below %" G_MS,
594 percent_within_bounds, expected_ms);
595 }
596 }
597
598 if (parent_calls && calls != parent_calls) {
599 double calls_per_parent = (double)calls / parent_calls;
600 if (lround(calls_per_parent * 10) != 10)
601 dstr_catf(output_buffer, ", %g calls per parent call",
602 calls_per_parent);
603 }
604
605 blog(LOG_INFO, "%s", output_buffer->array);
606
607 active |= (uint64_t)1 << indent;
608 for (size_t i = 0; i < entry->children.num; i++) {
609 if ((i + 1) == entry->children.num)
610 active &= (1 << indent) - 1;
611 profile_print_entry(&entry->children.array[i], indent_buffer,
612 output_buffer, indent + 1, active, calls);
613 }
614 }
615
gather_stats_between(profiler_time_entries_t * entries,uint64_t calls,uint64_t lower_bound,uint64_t upper_bound,uint64_t min_,uint64_t max_,uint64_t * median,double * percent,double * lower,double * higher)616 static void gather_stats_between(profiler_time_entries_t *entries,
617 uint64_t calls, uint64_t lower_bound,
618 uint64_t upper_bound, uint64_t min_,
619 uint64_t max_, uint64_t *median,
620 double *percent, double *lower, double *higher)
621 {
622 *median = 0;
623 *percent = 0.;
624 *lower = 0.;
625 *higher = 0.;
626
627 if (!entries->num)
628 return;
629
630 uint64_t accu = 0;
631 for (size_t i = 0; i < entries->num; i++) {
632 accu += entries->array[i].count;
633 if (accu < calls * 0.5)
634 continue;
635
636 *median = entries->array[i].time_delta;
637 break;
638 }
639
640 bool found_upper_bound = max_ <= upper_bound;
641 bool found_lower_bound = false;
642
643 if (min_ >= upper_bound) {
644 *higher = 100.;
645 return;
646 }
647
648 if (found_upper_bound && min_ >= lower_bound) {
649 *percent = 100.;
650 return;
651 }
652
653 accu = 0;
654 for (size_t i = 0; i < entries->num; i++) {
655 uint64_t delta = entries->array[i].time_delta;
656
657 if (!found_upper_bound && delta <= upper_bound) {
658 *higher = (double)accu / calls * 100;
659 accu = 0;
660 found_upper_bound = true;
661 }
662
663 if (!found_lower_bound && delta < lower_bound) {
664 *percent = (double)accu / calls * 100;
665 accu = 0;
666 found_lower_bound = true;
667 }
668
669 accu += entries->array[i].count;
670 }
671
672 if (!found_upper_bound) {
673 *higher = 100.;
674
675 } else if (!found_lower_bound) {
676 *percent = (double)accu / calls * 100;
677
678 } else {
679 *lower = (double)accu / calls * 100;
680 }
681 }
682
profile_print_entry_expected(profiler_snapshot_entry_t * entry,struct dstr * indent_buffer,struct dstr * output_buffer,unsigned indent,uint64_t active,uint64_t parent_calls)683 static void profile_print_entry_expected(profiler_snapshot_entry_t *entry,
684 struct dstr *indent_buffer,
685 struct dstr *output_buffer,
686 unsigned indent, uint64_t active,
687 uint64_t parent_calls)
688 {
689 UNUSED_PARAMETER(parent_calls);
690
691 if (!entry->expected_time_between_calls)
692 return;
693
694 uint64_t expected_time = entry->expected_time_between_calls;
695
696 uint64_t min_ = entry->min_time_between_calls;
697 uint64_t max_ = entry->max_time_between_calls;
698 uint64_t median = 0;
699 double percent = 0.;
700 double lower = 0.;
701 double higher = 0.;
702 gather_stats_between(&entry->times_between_calls,
703 entry->overall_between_calls_count,
704 (uint64_t)(expected_time * 0.98),
705 (uint64_t)(expected_time * 1.02 + 0.5), min_, max_,
706 &median, &percent, &lower, &higher);
707
708 make_indent_string(indent_buffer, indent, active);
709
710 blog(LOG_INFO,
711 "%s%s: min=%" G_MS ", median=%" G_MS ", max=%" G_MS ", %g%% "
712 "within ±2%% of %" G_MS " (%g%% lower, %g%% higher)",
713 indent_buffer->array, entry->name, min_ / 1000., median / 1000.,
714 max_ / 1000., percent, expected_time / 1000., lower, higher);
715
716 active |= (uint64_t)1 << indent;
717 for (size_t i = 0; i < entry->children.num; i++) {
718 if ((i + 1) == entry->children.num)
719 active &= (1 << indent) - 1;
720 profile_print_entry_expected(&entry->children.array[i],
721 indent_buffer, output_buffer,
722 indent + 1, active, 0);
723 }
724 }
725
profile_print_func(const char * intro,profile_entry_print_func print,profiler_snapshot_t * snap)726 void profile_print_func(const char *intro, profile_entry_print_func print,
727 profiler_snapshot_t *snap)
728 {
729 struct dstr indent_buffer = {0};
730 struct dstr output_buffer = {0};
731
732 bool free_snapshot = !snap;
733 if (!snap)
734 snap = profile_snapshot_create();
735
736 blog(LOG_INFO, "%s", intro);
737 for (size_t i = 0; i < snap->roots.num; i++) {
738 print(&snap->roots.array[i], &indent_buffer, &output_buffer, 0,
739 0, 0);
740 }
741 blog(LOG_INFO, "=================================================");
742
743 if (free_snapshot)
744 profile_snapshot_free(snap);
745
746 dstr_free(&output_buffer);
747 dstr_free(&indent_buffer);
748 }
749
profiler_print(profiler_snapshot_t * snap)750 void profiler_print(profiler_snapshot_t *snap)
751 {
752 profile_print_func("== Profiler Results =============================",
753 profile_print_entry, snap);
754 }
755
profiler_print_time_between_calls(profiler_snapshot_t * snap)756 void profiler_print_time_between_calls(profiler_snapshot_t *snap)
757 {
758 profile_print_func("== Profiler Time Between Calls ==================",
759 profile_print_entry_expected, snap);
760 }
761
free_call_children(profile_call * call)762 static void free_call_children(profile_call *call)
763 {
764 if (!call)
765 return;
766
767 const size_t num = call->children.num;
768 for (size_t i = 0; i < num; i++)
769 free_call_children(&call->children.array[i]);
770
771 da_free(call->children);
772 }
773
free_call_context(profile_call * context)774 static void free_call_context(profile_call *context)
775 {
776 free_call_children(context);
777 bfree(context);
778 }
779
free_hashmap(profile_times_table * map)780 static void free_hashmap(profile_times_table *map)
781 {
782 map->size = 0;
783 bfree(map->entries);
784 map->entries = NULL;
785 bfree(map->old_entries);
786 map->old_entries = NULL;
787 }
788
free_profile_entry(profile_entry * entry)789 static void free_profile_entry(profile_entry *entry)
790 {
791 for (size_t i = 0; i < entry->children.num; i++)
792 free_profile_entry(&entry->children.array[i]);
793
794 free_hashmap(&entry->times);
795 #ifdef TRACK_OVERHEAD
796 free_hashmap(&entry->overhead);
797 #endif
798 free_hashmap(&entry->times_between_calls);
799 da_free(entry->children);
800 }
801
profiler_free(void)802 void profiler_free(void)
803 {
804 DARRAY(profile_root_entry) old_root_entries = {0};
805
806 pthread_mutex_lock(&root_mutex);
807 enabled = false;
808 da_move(old_root_entries, root_entries);
809 pthread_mutex_unlock(&root_mutex);
810
811 for (size_t i = 0; i < old_root_entries.num; i++) {
812 profile_root_entry *entry = &old_root_entries.array[i];
813
814 pthread_mutex_lock(entry->mutex);
815 pthread_mutex_unlock(entry->mutex);
816
817 pthread_mutex_destroy(entry->mutex);
818 bfree(entry->mutex);
819 entry->mutex = NULL;
820
821 free_call_context(entry->prev_call);
822
823 free_profile_entry(entry->entry);
824 bfree(entry->entry);
825 }
826
827 da_free(old_root_entries);
828
829 pthread_mutex_destroy(&root_mutex);
830 }
831
832 /* ------------------------------------------------------------------------- */
833 /* Profiler name storage */
834
835 struct profiler_name_store {
836 pthread_mutex_t mutex;
837 DARRAY(char *) names;
838 };
839
profiler_name_store_create(void)840 profiler_name_store_t *profiler_name_store_create(void)
841 {
842 profiler_name_store_t *store = bzalloc(sizeof(profiler_name_store_t));
843
844 if (pthread_mutex_init(&store->mutex, NULL))
845 goto error;
846
847 return store;
848
849 error:
850 bfree(store);
851 return NULL;
852 }
853
profiler_name_store_free(profiler_name_store_t * store)854 void profiler_name_store_free(profiler_name_store_t *store)
855 {
856 if (!store)
857 return;
858
859 for (size_t i = 0; i < store->names.num; i++)
860 bfree(store->names.array[i]);
861
862 da_free(store->names);
863
864 pthread_mutex_destroy(&store->mutex);
865
866 bfree(store);
867 }
868
profile_store_name(profiler_name_store_t * store,const char * format,...)869 const char *profile_store_name(profiler_name_store_t *store, const char *format,
870 ...)
871 {
872 va_list args;
873 va_start(args, format);
874
875 struct dstr str = {0};
876 dstr_vprintf(&str, format, args);
877
878 va_end(args);
879
880 const char *result = NULL;
881
882 pthread_mutex_lock(&store->mutex);
883 size_t idx = da_push_back(store->names, &str.array);
884 result = store->names.array[idx];
885 pthread_mutex_unlock(&store->mutex);
886
887 return result;
888 }
889
890 /* ------------------------------------------------------------------------- */
891 /* Profiler data access */
892
add_entry_to_snapshot(profile_entry * entry,profiler_snapshot_entry_t * s_entry)893 static void add_entry_to_snapshot(profile_entry *entry,
894 profiler_snapshot_entry_t *s_entry)
895 {
896 s_entry->name = entry->name;
897
898 s_entry->overall_count =
899 copy_map_to_array(&entry->times, &s_entry->times,
900 &s_entry->min_time, &s_entry->max_time);
901
902 if ((s_entry->expected_time_between_calls =
903 entry->expected_time_between_calls))
904 s_entry->overall_between_calls_count =
905 copy_map_to_array(&entry->times_between_calls,
906 &s_entry->times_between_calls,
907 &s_entry->min_time_between_calls,
908 &s_entry->max_time_between_calls);
909
910 da_reserve(s_entry->children, entry->children.num);
911 for (size_t i = 0; i < entry->children.num; i++)
912 add_entry_to_snapshot(&entry->children.array[i],
913 da_push_back_new(s_entry->children));
914 }
915
sort_snapshot_entry(profiler_snapshot_entry_t * entry)916 static void sort_snapshot_entry(profiler_snapshot_entry_t *entry)
917 {
918 qsort(entry->times.array, entry->times.num, sizeof(profiler_time_entry),
919 profiler_time_entry_compare);
920
921 if (entry->expected_time_between_calls)
922 qsort(entry->times_between_calls.array,
923 entry->times_between_calls.num,
924 sizeof(profiler_time_entry), profiler_time_entry_compare);
925
926 for (size_t i = 0; i < entry->children.num; i++)
927 sort_snapshot_entry(&entry->children.array[i]);
928 }
929
profile_snapshot_create(void)930 profiler_snapshot_t *profile_snapshot_create(void)
931 {
932 profiler_snapshot_t *snap = bzalloc(sizeof(profiler_snapshot_t));
933
934 pthread_mutex_lock(&root_mutex);
935 da_reserve(snap->roots, root_entries.num);
936 for (size_t i = 0; i < root_entries.num; i++) {
937 pthread_mutex_lock(root_entries.array[i].mutex);
938 add_entry_to_snapshot(root_entries.array[i].entry,
939 da_push_back_new(snap->roots));
940 pthread_mutex_unlock(root_entries.array[i].mutex);
941 }
942 pthread_mutex_unlock(&root_mutex);
943
944 for (size_t i = 0; i < snap->roots.num; i++)
945 sort_snapshot_entry(&snap->roots.array[i]);
946
947 return snap;
948 }
949
free_snapshot_entry(profiler_snapshot_entry_t * entry)950 static void free_snapshot_entry(profiler_snapshot_entry_t *entry)
951 {
952 for (size_t i = 0; i < entry->children.num; i++)
953 free_snapshot_entry(&entry->children.array[i]);
954
955 da_free(entry->children);
956 da_free(entry->times_between_calls);
957 da_free(entry->times);
958 }
959
profile_snapshot_free(profiler_snapshot_t * snap)960 void profile_snapshot_free(profiler_snapshot_t *snap)
961 {
962 if (!snap)
963 return;
964
965 for (size_t i = 0; i < snap->roots.num; i++)
966 free_snapshot_entry(&snap->roots.array[i]);
967
968 da_free(snap->roots);
969 bfree(snap);
970 }
971
972 typedef void (*dump_csv_func)(void *data, struct dstr *buffer);
entry_dump_csv(struct dstr * buffer,const profiler_snapshot_entry_t * parent,const profiler_snapshot_entry_t * entry,dump_csv_func func,void * data)973 static void entry_dump_csv(struct dstr *buffer,
974 const profiler_snapshot_entry_t *parent,
975 const profiler_snapshot_entry_t *entry,
976 dump_csv_func func, void *data)
977 {
978 const char *parent_name = parent ? parent->name : NULL;
979
980 for (size_t i = 0; i < entry->times.num; i++) {
981 dstr_printf(buffer,
982 "%p,%p,%p,%p,%s,0,"
983 "%" PRIu64 ",%" PRIu64 "\n",
984 entry, parent, entry->name, parent_name,
985 entry->name, entry->times.array[i].time_delta,
986 entry->times.array[i].count);
987 func(data, buffer);
988 }
989
990 for (size_t i = 0; i < entry->times_between_calls.num; i++) {
991 dstr_printf(buffer,
992 "%p,%p,%p,%p,%s,"
993 "%" PRIu64 ",%" PRIu64 ",%" PRIu64 "\n",
994 entry, parent, entry->name, parent_name,
995 entry->name, entry->expected_time_between_calls,
996 entry->times_between_calls.array[i].time_delta,
997 entry->times_between_calls.array[i].count);
998 func(data, buffer);
999 }
1000
1001 for (size_t i = 0; i < entry->children.num; i++)
1002 entry_dump_csv(buffer, entry, &entry->children.array[i], func,
1003 data);
1004 }
1005
profiler_snapshot_dump(const profiler_snapshot_t * snap,dump_csv_func func,void * data)1006 static void profiler_snapshot_dump(const profiler_snapshot_t *snap,
1007 dump_csv_func func, void *data)
1008 {
1009 struct dstr buffer = {0};
1010
1011 dstr_init_copy(&buffer, "id,parent_id,name_id,parent_name_id,name,"
1012 "time_between_calls,time_delta_µs,count\n");
1013 func(data, &buffer);
1014
1015 for (size_t i = 0; i < snap->roots.num; i++)
1016 entry_dump_csv(&buffer, NULL, &snap->roots.array[i], func,
1017 data);
1018
1019 dstr_free(&buffer);
1020 }
1021
dump_csv_fwrite(void * data,struct dstr * buffer)1022 static void dump_csv_fwrite(void *data, struct dstr *buffer)
1023 {
1024 fwrite(buffer->array, 1, buffer->len, data);
1025 }
1026
profiler_snapshot_dump_csv(const profiler_snapshot_t * snap,const char * filename)1027 bool profiler_snapshot_dump_csv(const profiler_snapshot_t *snap,
1028 const char *filename)
1029 {
1030 FILE *f = os_fopen(filename, "wb+");
1031 if (!f)
1032 return false;
1033
1034 profiler_snapshot_dump(snap, dump_csv_fwrite, f);
1035
1036 fclose(f);
1037 return true;
1038 }
1039
dump_csv_gzwrite(void * data,struct dstr * buffer)1040 static void dump_csv_gzwrite(void *data, struct dstr *buffer)
1041 {
1042 gzwrite(data, buffer->array, (unsigned)buffer->len);
1043 }
1044
profiler_snapshot_dump_csv_gz(const profiler_snapshot_t * snap,const char * filename)1045 bool profiler_snapshot_dump_csv_gz(const profiler_snapshot_t *snap,
1046 const char *filename)
1047 {
1048 gzFile gz;
1049 #ifdef _WIN32
1050 wchar_t *filename_w = NULL;
1051
1052 os_utf8_to_wcs_ptr(filename, 0, &filename_w);
1053 if (!filename_w)
1054 return false;
1055
1056 gz = gzopen_w(filename_w, "wb");
1057 bfree(filename_w);
1058 #else
1059 gz = gzopen(filename, "wb");
1060 #endif
1061 if (!gz)
1062 return false;
1063
1064 profiler_snapshot_dump(snap, dump_csv_gzwrite, gz);
1065
1066 #ifdef _WIN32
1067 gzclose_w(gz);
1068 #else
1069 gzclose(gz);
1070 #endif
1071 return true;
1072 }
1073
profiler_snapshot_num_roots(profiler_snapshot_t * snap)1074 size_t profiler_snapshot_num_roots(profiler_snapshot_t *snap)
1075 {
1076 return snap ? snap->roots.num : 0;
1077 }
1078
profiler_snapshot_enumerate_roots(profiler_snapshot_t * snap,profiler_entry_enum_func func,void * context)1079 void profiler_snapshot_enumerate_roots(profiler_snapshot_t *snap,
1080 profiler_entry_enum_func func,
1081 void *context)
1082 {
1083 if (!snap)
1084 return;
1085
1086 for (size_t i = 0; i < snap->roots.num; i++)
1087 if (!func(context, &snap->roots.array[i]))
1088 break;
1089 }
1090
profiler_snapshot_filter_roots(profiler_snapshot_t * snap,profiler_name_filter_func func,void * data)1091 void profiler_snapshot_filter_roots(profiler_snapshot_t *snap,
1092 profiler_name_filter_func func, void *data)
1093 {
1094 for (size_t i = 0; i < snap->roots.num;) {
1095 bool remove = false;
1096 bool res = func(data, snap->roots.array[i].name, &remove);
1097
1098 if (remove) {
1099 free_snapshot_entry(&snap->roots.array[i]);
1100 da_erase(snap->roots, i);
1101 }
1102
1103 if (!res)
1104 break;
1105
1106 if (!remove)
1107 i += 1;
1108 }
1109 }
1110
profiler_snapshot_num_children(profiler_snapshot_entry_t * entry)1111 size_t profiler_snapshot_num_children(profiler_snapshot_entry_t *entry)
1112 {
1113 return entry ? entry->children.num : 0;
1114 }
1115
profiler_snapshot_enumerate_children(profiler_snapshot_entry_t * entry,profiler_entry_enum_func func,void * context)1116 void profiler_snapshot_enumerate_children(profiler_snapshot_entry_t *entry,
1117 profiler_entry_enum_func func,
1118 void *context)
1119 {
1120 if (!entry)
1121 return;
1122
1123 for (size_t i = 0; i < entry->children.num; i++)
1124 if (!func(context, &entry->children.array[i]))
1125 break;
1126 }
1127
profiler_snapshot_entry_name(profiler_snapshot_entry_t * entry)1128 const char *profiler_snapshot_entry_name(profiler_snapshot_entry_t *entry)
1129 {
1130 return entry ? entry->name : NULL;
1131 }
1132
1133 profiler_time_entries_t *
profiler_snapshot_entry_times(profiler_snapshot_entry_t * entry)1134 profiler_snapshot_entry_times(profiler_snapshot_entry_t *entry)
1135 {
1136 return entry ? &entry->times : NULL;
1137 }
1138
profiler_snapshot_entry_overall_count(profiler_snapshot_entry_t * entry)1139 uint64_t profiler_snapshot_entry_overall_count(profiler_snapshot_entry_t *entry)
1140 {
1141 return entry ? entry->overall_count : 0;
1142 }
1143
profiler_snapshot_entry_min_time(profiler_snapshot_entry_t * entry)1144 uint64_t profiler_snapshot_entry_min_time(profiler_snapshot_entry_t *entry)
1145 {
1146 return entry ? entry->min_time : 0;
1147 }
1148
profiler_snapshot_entry_max_time(profiler_snapshot_entry_t * entry)1149 uint64_t profiler_snapshot_entry_max_time(profiler_snapshot_entry_t *entry)
1150 {
1151 return entry ? entry->max_time : 0;
1152 }
1153
1154 profiler_time_entries_t *
profiler_snapshot_entry_times_between_calls(profiler_snapshot_entry_t * entry)1155 profiler_snapshot_entry_times_between_calls(profiler_snapshot_entry_t *entry)
1156 {
1157 return entry ? &entry->times_between_calls : NULL;
1158 }
1159
profiler_snapshot_entry_expected_time_between_calls(profiler_snapshot_entry_t * entry)1160 uint64_t profiler_snapshot_entry_expected_time_between_calls(
1161 profiler_snapshot_entry_t *entry)
1162 {
1163 return entry ? entry->expected_time_between_calls : 0;
1164 }
1165
1166 uint64_t
profiler_snapshot_entry_min_time_between_calls(profiler_snapshot_entry_t * entry)1167 profiler_snapshot_entry_min_time_between_calls(profiler_snapshot_entry_t *entry)
1168 {
1169 return entry ? entry->min_time_between_calls : 0;
1170 }
1171
1172 uint64_t
profiler_snapshot_entry_max_time_between_calls(profiler_snapshot_entry_t * entry)1173 profiler_snapshot_entry_max_time_between_calls(profiler_snapshot_entry_t *entry)
1174 {
1175 return entry ? entry->max_time_between_calls : 0;
1176 }
1177
profiler_snapshot_entry_overall_between_calls_count(profiler_snapshot_entry_t * entry)1178 uint64_t profiler_snapshot_entry_overall_between_calls_count(
1179 profiler_snapshot_entry_t *entry)
1180 {
1181 return entry ? entry->overall_between_calls_count : 0;
1182 }
1183