1 #include <inttypes.h>
2 #include "profiler.h"
3 
4 #include "darray.h"
5 #include "dstr.h"
6 #include "platform.h"
7 #include "threading.h"
8 
9 #include <math.h>
10 
11 #include <zlib.h>
12 
13 //#define TRACK_OVERHEAD
14 
15 struct profiler_snapshot {
16 	DARRAY(profiler_snapshot_entry_t) roots;
17 };
18 
19 struct profiler_snapshot_entry {
20 	const char *name;
21 	profiler_time_entries_t times;
22 	uint64_t min_time;
23 	uint64_t max_time;
24 	uint64_t overall_count;
25 	profiler_time_entries_t times_between_calls;
26 	uint64_t expected_time_between_calls;
27 	uint64_t min_time_between_calls;
28 	uint64_t max_time_between_calls;
29 	uint64_t overall_between_calls_count;
30 	DARRAY(profiler_snapshot_entry_t) children;
31 };
32 
33 typedef struct profiler_time_entry profiler_time_entry;
34 
35 typedef struct profile_call profile_call;
36 struct profile_call {
37 	const char *name;
38 #ifdef TRACK_OVERHEAD
39 	uint64_t overhead_start;
40 #endif
41 	uint64_t start_time;
42 	uint64_t end_time;
43 #ifdef TRACK_OVERHEAD
44 	uint64_t overhead_end;
45 #endif
46 	uint64_t expected_time_between_calls;
47 	DARRAY(profile_call) children;
48 	profile_call *parent;
49 };
50 
51 typedef struct profile_times_table_entry profile_times_table_entry;
52 struct profile_times_table_entry {
53 	size_t probes;
54 	profiler_time_entry entry;
55 };
56 
57 typedef struct profile_times_table profile_times_table;
58 struct profile_times_table {
59 	size_t size;
60 	size_t occupied;
61 	size_t max_probe_count;
62 	profile_times_table_entry *entries;
63 
64 	size_t old_start_index;
65 	size_t old_occupied;
66 	profile_times_table_entry *old_entries;
67 };
68 
69 typedef struct profile_entry profile_entry;
70 struct profile_entry {
71 	const char *name;
72 	profile_times_table times;
73 #ifdef TRACK_OVERHEAD
74 	profile_times_table overhead;
75 #endif
76 	uint64_t expected_time_between_calls;
77 	profile_times_table times_between_calls;
78 	DARRAY(profile_entry) children;
79 };
80 
81 typedef struct profile_root_entry profile_root_entry;
82 struct profile_root_entry {
83 	pthread_mutex_t *mutex;
84 	const char *name;
85 	profile_entry *entry;
86 	profile_call *prev_call;
87 };
88 
diff_ns_to_usec(uint64_t prev,uint64_t next)89 static inline uint64_t diff_ns_to_usec(uint64_t prev, uint64_t next)
90 {
91 	return (next - prev + 500) / 1000;
92 }
93 
update_max_probes(profile_times_table * map,size_t val)94 static inline void update_max_probes(profile_times_table *map, size_t val)
95 {
96 	map->max_probe_count =
97 		map->max_probe_count < val ? val : map->max_probe_count;
98 }
99 
100 static void migrate_old_entries(profile_times_table *map, bool limit_items);
101 static void grow_hashmap(profile_times_table *map, uint64_t usec,
102 			 uint64_t count);
103 
add_hashmap_entry(profile_times_table * map,uint64_t usec,uint64_t count)104 static void add_hashmap_entry(profile_times_table *map, uint64_t usec,
105 			      uint64_t count)
106 {
107 	size_t probes = 1;
108 
109 	size_t start = usec % map->size;
110 	for (;; probes += 1) {
111 		size_t idx = (start + probes) % map->size;
112 		profile_times_table_entry *entry = &map->entries[idx];
113 		if (!entry->probes) {
114 			entry->probes = probes;
115 			entry->entry.time_delta = usec;
116 			entry->entry.count = count;
117 			map->occupied += 1;
118 			update_max_probes(map, probes);
119 			return;
120 		}
121 
122 		if (entry->entry.time_delta == usec) {
123 			entry->entry.count += count;
124 			return;
125 		}
126 
127 		if (entry->probes >= probes)
128 			continue;
129 
130 		if (map->occupied / (double)map->size > 0.7) {
131 			grow_hashmap(map, usec, count);
132 			return;
133 		}
134 
135 		size_t old_probes = entry->probes;
136 		uint64_t old_count = entry->entry.count;
137 		uint64_t old_usec = entry->entry.time_delta;
138 
139 		entry->probes = probes;
140 		entry->entry.count = count;
141 		entry->entry.time_delta = usec;
142 
143 		update_max_probes(map, probes);
144 
145 		probes = old_probes;
146 		count = old_count;
147 		usec = old_usec;
148 
149 		start = usec % map->size;
150 	}
151 }
152 
init_hashmap(profile_times_table * map,size_t size)153 static void init_hashmap(profile_times_table *map, size_t size)
154 {
155 	map->size = size;
156 	map->occupied = 0;
157 	map->max_probe_count = 0;
158 	map->entries = bzalloc(sizeof(profile_times_table_entry) * size);
159 	map->old_start_index = 0;
160 	map->old_occupied = 0;
161 	map->old_entries = NULL;
162 }
163 
migrate_old_entries(profile_times_table * map,bool limit_items)164 static void migrate_old_entries(profile_times_table *map, bool limit_items)
165 {
166 	if (!map->old_entries)
167 		return;
168 
169 	if (!map->old_occupied) {
170 		bfree(map->old_entries);
171 		map->old_entries = NULL;
172 		return;
173 	}
174 
175 	for (size_t i = 0; !limit_items || i < 8; i++, map->old_start_index++) {
176 		if (!map->old_occupied)
177 			return;
178 
179 		profile_times_table_entry *entry =
180 			&map->old_entries[map->old_start_index];
181 		if (!entry->probes)
182 			continue;
183 
184 		add_hashmap_entry(map, entry->entry.time_delta,
185 				  entry->entry.count);
186 		map->old_occupied -= 1;
187 	}
188 }
189 
grow_hashmap(profile_times_table * map,uint64_t usec,uint64_t count)190 static void grow_hashmap(profile_times_table *map, uint64_t usec,
191 			 uint64_t count)
192 {
193 	migrate_old_entries(map, false);
194 
195 	size_t old_size = map->size;
196 	size_t old_occupied = map->occupied;
197 	profile_times_table_entry *entries = map->entries;
198 
199 	init_hashmap(map, (old_size * 2 < 16) ? 16 : (old_size * 2));
200 
201 	map->old_occupied = old_occupied;
202 	map->old_entries = entries;
203 
204 	add_hashmap_entry(map, usec, count);
205 }
206 
init_entry(profile_entry * entry,const char * name)207 static profile_entry *init_entry(profile_entry *entry, const char *name)
208 {
209 	entry->name = name;
210 	init_hashmap(&entry->times, 1);
211 #ifdef TRACK_OVERHEAD
212 	init_hashmap(&entry->overhead, 1);
213 #endif
214 	entry->expected_time_between_calls = 0;
215 	init_hashmap(&entry->times_between_calls, 1);
216 	return entry;
217 }
218 
get_child(profile_entry * parent,const char * name)219 static profile_entry *get_child(profile_entry *parent, const char *name)
220 {
221 	const size_t num = parent->children.num;
222 	for (size_t i = 0; i < num; i++) {
223 		profile_entry *child = &parent->children.array[i];
224 		if (child->name == name)
225 			return child;
226 	}
227 
228 	return init_entry(da_push_back_new(parent->children), name);
229 }
230 
merge_call(profile_entry * entry,profile_call * call,profile_call * prev_call)231 static void merge_call(profile_entry *entry, profile_call *call,
232 		       profile_call *prev_call)
233 {
234 	const size_t num = call->children.num;
235 	for (size_t i = 0; i < num; i++) {
236 		profile_call *child = &call->children.array[i];
237 		merge_call(get_child(entry, child->name), child, NULL);
238 	}
239 
240 	if (entry->expected_time_between_calls != 0 && prev_call) {
241 		migrate_old_entries(&entry->times_between_calls, true);
242 		uint64_t usec = diff_ns_to_usec(prev_call->start_time,
243 						call->start_time);
244 		add_hashmap_entry(&entry->times_between_calls, usec, 1);
245 	}
246 
247 	migrate_old_entries(&entry->times, true);
248 	uint64_t usec = diff_ns_to_usec(call->start_time, call->end_time);
249 	add_hashmap_entry(&entry->times, usec, 1);
250 
251 #ifdef TRACK_OVERHEAD
252 	migrate_old_entries(&entry->overhead, true);
253 	usec = diff_ns_to_usec(call->overhead_start, call->start_time);
254 	usec += diff_ns_to_usec(call->end_time, call->overhead_end);
255 	add_hashmap_entry(&entry->overhead, usec, 1);
256 #endif
257 }
258 
259 static bool enabled = false;
260 static pthread_mutex_t root_mutex = PTHREAD_MUTEX_INITIALIZER;
261 static DARRAY(profile_root_entry) root_entries;
262 
263 static THREAD_LOCAL profile_call *thread_context = NULL;
264 static THREAD_LOCAL bool thread_enabled = true;
265 
profiler_start(void)266 void profiler_start(void)
267 {
268 	pthread_mutex_lock(&root_mutex);
269 	enabled = true;
270 	pthread_mutex_unlock(&root_mutex);
271 }
272 
profiler_stop(void)273 void profiler_stop(void)
274 {
275 	pthread_mutex_lock(&root_mutex);
276 	enabled = false;
277 	pthread_mutex_unlock(&root_mutex);
278 }
279 
profile_reenable_thread(void)280 void profile_reenable_thread(void)
281 {
282 	if (thread_enabled)
283 		return;
284 
285 	pthread_mutex_lock(&root_mutex);
286 	thread_enabled = enabled;
287 	pthread_mutex_unlock(&root_mutex);
288 }
289 
lock_root(void)290 static bool lock_root(void)
291 {
292 	pthread_mutex_lock(&root_mutex);
293 	if (!enabled) {
294 		pthread_mutex_unlock(&root_mutex);
295 		thread_enabled = false;
296 		return false;
297 	}
298 
299 	return true;
300 }
301 
get_root_entry(const char * name)302 static profile_root_entry *get_root_entry(const char *name)
303 {
304 	profile_root_entry *r_entry = NULL;
305 
306 	for (size_t i = 0; i < root_entries.num; i++) {
307 		if (root_entries.array[i].name == name) {
308 			r_entry = &root_entries.array[i];
309 			break;
310 		}
311 	}
312 
313 	if (!r_entry) {
314 		r_entry = da_push_back_new(root_entries);
315 		r_entry->mutex = bmalloc(sizeof(pthread_mutex_t));
316 		pthread_mutex_init(r_entry->mutex, NULL);
317 
318 		r_entry->name = name;
319 		r_entry->entry = bzalloc(sizeof(profile_entry));
320 		init_entry(r_entry->entry, name);
321 	}
322 
323 	return r_entry;
324 }
325 
profile_register_root(const char * name,uint64_t expected_time_between_calls)326 void profile_register_root(const char *name,
327 			   uint64_t expected_time_between_calls)
328 {
329 	if (!lock_root())
330 		return;
331 
332 	get_root_entry(name)->entry->expected_time_between_calls =
333 		(expected_time_between_calls + 500) / 1000;
334 	pthread_mutex_unlock(&root_mutex);
335 }
336 
337 static void free_call_context(profile_call *context);
338 
merge_context(profile_call * context)339 static void merge_context(profile_call *context)
340 {
341 	pthread_mutex_t *mutex = NULL;
342 	profile_entry *entry = NULL;
343 	profile_call *prev_call = NULL;
344 
345 	if (!lock_root()) {
346 		free_call_context(context);
347 		return;
348 	}
349 
350 	profile_root_entry *r_entry = get_root_entry(context->name);
351 
352 	mutex = r_entry->mutex;
353 	entry = r_entry->entry;
354 	prev_call = r_entry->prev_call;
355 
356 	r_entry->prev_call = context;
357 
358 	pthread_mutex_lock(mutex);
359 	pthread_mutex_unlock(&root_mutex);
360 
361 	merge_call(entry, context, prev_call);
362 
363 	pthread_mutex_unlock(mutex);
364 
365 	free_call_context(prev_call);
366 }
367 
profile_start(const char * name)368 void profile_start(const char *name)
369 {
370 	if (!thread_enabled)
371 		return;
372 
373 	profile_call new_call = {
374 		.name = name,
375 #ifdef TRACK_OVERHEAD
376 		.overhead_start = os_gettime_ns(),
377 #endif
378 		.parent = thread_context,
379 	};
380 
381 	profile_call *call = NULL;
382 
383 	if (new_call.parent) {
384 		size_t idx = da_push_back(new_call.parent->children, &new_call);
385 		call = &new_call.parent->children.array[idx];
386 	} else {
387 		call = bmalloc(sizeof(profile_call));
388 		memcpy(call, &new_call, sizeof(profile_call));
389 	}
390 
391 	thread_context = call;
392 	call->start_time = os_gettime_ns();
393 }
394 
profile_end(const char * name)395 void profile_end(const char *name)
396 {
397 	uint64_t end = os_gettime_ns();
398 	if (!thread_enabled)
399 		return;
400 
401 	profile_call *call = thread_context;
402 	if (!call) {
403 		blog(LOG_ERROR, "Called profile end with no active profile");
404 		return;
405 	}
406 
407 	if (!call->name)
408 		call->name = name;
409 
410 	if (call->name != name) {
411 		blog(LOG_ERROR,
412 		     "Called profile end with mismatching name: "
413 		     "start(\"%s\"[%p]) <-> end(\"%s\"[%p])",
414 		     call->name, call->name, name, name);
415 
416 		profile_call *parent = call->parent;
417 		while (parent && parent->parent && parent->name != name)
418 			parent = parent->parent;
419 
420 		if (!parent || parent->name != name)
421 			return;
422 
423 		while (call->name != name) {
424 			profile_end(call->name);
425 			call = call->parent;
426 		}
427 	}
428 
429 	thread_context = call->parent;
430 
431 	call->end_time = end;
432 #ifdef TRACK_OVERHEAD
433 	call->overhead_end = os_gettime_ns();
434 #endif
435 
436 	if (call->parent)
437 		return;
438 
439 	merge_context(call);
440 }
441 
profiler_time_entry_compare(const void * first,const void * second)442 static int profiler_time_entry_compare(const void *first, const void *second)
443 {
444 	int64_t diff = ((profiler_time_entry *)second)->time_delta -
445 		       ((profiler_time_entry *)first)->time_delta;
446 	return diff < 0 ? -1 : (diff > 0 ? 1 : 0);
447 }
448 
copy_map_to_array(profile_times_table * map,profiler_time_entries_t * entry_buffer,uint64_t * min_,uint64_t * max_)449 static uint64_t copy_map_to_array(profile_times_table *map,
450 				  profiler_time_entries_t *entry_buffer,
451 				  uint64_t *min_, uint64_t *max_)
452 {
453 	migrate_old_entries(map, false);
454 
455 	da_reserve((*entry_buffer), map->occupied);
456 	da_resize((*entry_buffer), 0);
457 
458 	uint64_t min__ = ~(uint64_t)0;
459 	uint64_t max__ = 0;
460 
461 	uint64_t calls = 0;
462 	for (size_t i = 0; i < map->size; i++) {
463 		if (!map->entries[i].probes)
464 			continue;
465 
466 		profiler_time_entry *entry = &map->entries[i].entry;
467 
468 		da_push_back((*entry_buffer), entry);
469 
470 		calls += entry->count;
471 		min__ = (min__ < entry->time_delta) ? min__ : entry->time_delta;
472 		max__ = (max__ > entry->time_delta) ? max__ : entry->time_delta;
473 	}
474 
475 	if (min_)
476 		*min_ = min__;
477 	if (max_)
478 		*max_ = max__;
479 
480 	return calls;
481 }
482 
483 typedef void (*profile_entry_print_func)(profiler_snapshot_entry_t *entry,
484 					 struct dstr *indent_buffer,
485 					 struct dstr *output_buffer,
486 					 unsigned indent, uint64_t active,
487 					 uint64_t parent_calls);
488 
489 /* UTF-8 characters */
490 #define VPIPE_RIGHT " \xe2\x94\xa3"
491 #define VPIPE " \xe2\x94\x83"
492 #define DOWN_RIGHT " \xe2\x94\x97"
493 
make_indent_string(struct dstr * indent_buffer,unsigned indent,uint64_t active)494 static void make_indent_string(struct dstr *indent_buffer, unsigned indent,
495 			       uint64_t active)
496 {
497 	indent_buffer->len = 0;
498 
499 	if (!indent) {
500 		dstr_cat_ch(indent_buffer, 0);
501 		return;
502 	}
503 
504 	for (size_t i = 0; i < indent; i++) {
505 		const char *fragment = "";
506 		bool last = i + 1 == indent;
507 		if (active & ((uint64_t)1 << i))
508 			fragment = last ? VPIPE_RIGHT : VPIPE;
509 		else
510 			fragment = last ? DOWN_RIGHT : "  ";
511 
512 		dstr_cat(indent_buffer, fragment);
513 	}
514 }
515 
gather_stats(uint64_t expected_time_between_calls,profiler_time_entries_t * entries,uint64_t calls,uint64_t * percentile99,uint64_t * median,double * percent_within_bounds)516 static void gather_stats(uint64_t expected_time_between_calls,
517 			 profiler_time_entries_t *entries, uint64_t calls,
518 			 uint64_t *percentile99, uint64_t *median,
519 			 double *percent_within_bounds)
520 {
521 	if (!entries->num) {
522 		*percentile99 = 0;
523 		*median = 0;
524 		*percent_within_bounds = 0.;
525 		return;
526 	}
527 
528 	/*if (entry_buffer->num > 2)
529 		blog(LOG_INFO, "buffer-size %lu, overall count %llu\n"
530 				"map-size %lu, occupied %lu, probes %lu",
531 				entry_buffer->num, calls,
532 				map->size, map->occupied,
533 				map->max_probe_count);*/
534 	uint64_t accu = 0;
535 	for (size_t i = 0; i < entries->num; i++) {
536 		uint64_t old_accu = accu;
537 		accu += entries->array[i].count;
538 
539 		if (old_accu < calls * 0.01 && accu >= calls * 0.01)
540 			*percentile99 = entries->array[i].time_delta;
541 		else if (old_accu < calls * 0.5 && accu >= calls * 0.5) {
542 			*median = entries->array[i].time_delta;
543 			break;
544 		}
545 	}
546 
547 	*percent_within_bounds = 0.;
548 	if (!expected_time_between_calls)
549 		return;
550 
551 	accu = 0;
552 	for (size_t i = 0; i < entries->num; i++) {
553 		profiler_time_entry *entry = &entries->array[i];
554 		if (entry->time_delta < expected_time_between_calls)
555 			break;
556 
557 		accu += entry->count;
558 	}
559 	*percent_within_bounds = (1. - (double)accu / calls) * 100;
560 }
561 
562 #define G_MS "g\xC2\xA0ms"
563 
profile_print_entry(profiler_snapshot_entry_t * entry,struct dstr * indent_buffer,struct dstr * output_buffer,unsigned indent,uint64_t active,uint64_t parent_calls)564 static void profile_print_entry(profiler_snapshot_entry_t *entry,
565 				struct dstr *indent_buffer,
566 				struct dstr *output_buffer, unsigned indent,
567 				uint64_t active, uint64_t parent_calls)
568 {
569 	uint64_t calls = entry->overall_count;
570 	uint64_t min_ = entry->min_time;
571 	uint64_t max_ = entry->max_time;
572 	uint64_t percentile99 = 0;
573 	uint64_t median = 0;
574 	double percent_within_bounds = 0.;
575 	gather_stats(entry->expected_time_between_calls, &entry->times, calls,
576 		     &percentile99, &median, &percent_within_bounds);
577 
578 	make_indent_string(indent_buffer, indent, active);
579 
580 	if (min_ == max_) {
581 		dstr_printf(output_buffer, "%s%s: %" G_MS, indent_buffer->array,
582 			    entry->name, min_ / 1000.);
583 	} else {
584 		dstr_printf(output_buffer,
585 			    "%s%s: min=%" G_MS ", median=%" G_MS ", "
586 			    "max=%" G_MS ", 99th percentile=%" G_MS,
587 			    indent_buffer->array, entry->name, min_ / 1000.,
588 			    median / 1000., max_ / 1000., percentile99 / 1000.);
589 
590 		if (entry->expected_time_between_calls) {
591 			double expected_ms =
592 				entry->expected_time_between_calls / 1000.;
593 			dstr_catf(output_buffer, ", %g%% below %" G_MS,
594 				  percent_within_bounds, expected_ms);
595 		}
596 	}
597 
598 	if (parent_calls && calls != parent_calls) {
599 		double calls_per_parent = (double)calls / parent_calls;
600 		if (lround(calls_per_parent * 10) != 10)
601 			dstr_catf(output_buffer, ", %g calls per parent call",
602 				  calls_per_parent);
603 	}
604 
605 	blog(LOG_INFO, "%s", output_buffer->array);
606 
607 	active |= (uint64_t)1 << indent;
608 	for (size_t i = 0; i < entry->children.num; i++) {
609 		if ((i + 1) == entry->children.num)
610 			active &= (1 << indent) - 1;
611 		profile_print_entry(&entry->children.array[i], indent_buffer,
612 				    output_buffer, indent + 1, active, calls);
613 	}
614 }
615 
gather_stats_between(profiler_time_entries_t * entries,uint64_t calls,uint64_t lower_bound,uint64_t upper_bound,uint64_t min_,uint64_t max_,uint64_t * median,double * percent,double * lower,double * higher)616 static void gather_stats_between(profiler_time_entries_t *entries,
617 				 uint64_t calls, uint64_t lower_bound,
618 				 uint64_t upper_bound, uint64_t min_,
619 				 uint64_t max_, uint64_t *median,
620 				 double *percent, double *lower, double *higher)
621 {
622 	*median = 0;
623 	*percent = 0.;
624 	*lower = 0.;
625 	*higher = 0.;
626 
627 	if (!entries->num)
628 		return;
629 
630 	uint64_t accu = 0;
631 	for (size_t i = 0; i < entries->num; i++) {
632 		accu += entries->array[i].count;
633 		if (accu < calls * 0.5)
634 			continue;
635 
636 		*median = entries->array[i].time_delta;
637 		break;
638 	}
639 
640 	bool found_upper_bound = max_ <= upper_bound;
641 	bool found_lower_bound = false;
642 
643 	if (min_ >= upper_bound) {
644 		*higher = 100.;
645 		return;
646 	}
647 
648 	if (found_upper_bound && min_ >= lower_bound) {
649 		*percent = 100.;
650 		return;
651 	}
652 
653 	accu = 0;
654 	for (size_t i = 0; i < entries->num; i++) {
655 		uint64_t delta = entries->array[i].time_delta;
656 
657 		if (!found_upper_bound && delta <= upper_bound) {
658 			*higher = (double)accu / calls * 100;
659 			accu = 0;
660 			found_upper_bound = true;
661 		}
662 
663 		if (!found_lower_bound && delta < lower_bound) {
664 			*percent = (double)accu / calls * 100;
665 			accu = 0;
666 			found_lower_bound = true;
667 		}
668 
669 		accu += entries->array[i].count;
670 	}
671 
672 	if (!found_upper_bound) {
673 		*higher = 100.;
674 
675 	} else if (!found_lower_bound) {
676 		*percent = (double)accu / calls * 100;
677 
678 	} else {
679 		*lower = (double)accu / calls * 100;
680 	}
681 }
682 
profile_print_entry_expected(profiler_snapshot_entry_t * entry,struct dstr * indent_buffer,struct dstr * output_buffer,unsigned indent,uint64_t active,uint64_t parent_calls)683 static void profile_print_entry_expected(profiler_snapshot_entry_t *entry,
684 					 struct dstr *indent_buffer,
685 					 struct dstr *output_buffer,
686 					 unsigned indent, uint64_t active,
687 					 uint64_t parent_calls)
688 {
689 	UNUSED_PARAMETER(parent_calls);
690 
691 	if (!entry->expected_time_between_calls)
692 		return;
693 
694 	uint64_t expected_time = entry->expected_time_between_calls;
695 
696 	uint64_t min_ = entry->min_time_between_calls;
697 	uint64_t max_ = entry->max_time_between_calls;
698 	uint64_t median = 0;
699 	double percent = 0.;
700 	double lower = 0.;
701 	double higher = 0.;
702 	gather_stats_between(&entry->times_between_calls,
703 			     entry->overall_between_calls_count,
704 			     (uint64_t)(expected_time * 0.98),
705 			     (uint64_t)(expected_time * 1.02 + 0.5), min_, max_,
706 			     &median, &percent, &lower, &higher);
707 
708 	make_indent_string(indent_buffer, indent, active);
709 
710 	blog(LOG_INFO,
711 	     "%s%s: min=%" G_MS ", median=%" G_MS ", max=%" G_MS ", %g%% "
712 	     "within ±2%% of %" G_MS " (%g%% lower, %g%% higher)",
713 	     indent_buffer->array, entry->name, min_ / 1000., median / 1000.,
714 	     max_ / 1000., percent, expected_time / 1000., lower, higher);
715 
716 	active |= (uint64_t)1 << indent;
717 	for (size_t i = 0; i < entry->children.num; i++) {
718 		if ((i + 1) == entry->children.num)
719 			active &= (1 << indent) - 1;
720 		profile_print_entry_expected(&entry->children.array[i],
721 					     indent_buffer, output_buffer,
722 					     indent + 1, active, 0);
723 	}
724 }
725 
profile_print_func(const char * intro,profile_entry_print_func print,profiler_snapshot_t * snap)726 void profile_print_func(const char *intro, profile_entry_print_func print,
727 			profiler_snapshot_t *snap)
728 {
729 	struct dstr indent_buffer = {0};
730 	struct dstr output_buffer = {0};
731 
732 	bool free_snapshot = !snap;
733 	if (!snap)
734 		snap = profile_snapshot_create();
735 
736 	blog(LOG_INFO, "%s", intro);
737 	for (size_t i = 0; i < snap->roots.num; i++) {
738 		print(&snap->roots.array[i], &indent_buffer, &output_buffer, 0,
739 		      0, 0);
740 	}
741 	blog(LOG_INFO, "=================================================");
742 
743 	if (free_snapshot)
744 		profile_snapshot_free(snap);
745 
746 	dstr_free(&output_buffer);
747 	dstr_free(&indent_buffer);
748 }
749 
profiler_print(profiler_snapshot_t * snap)750 void profiler_print(profiler_snapshot_t *snap)
751 {
752 	profile_print_func("== Profiler Results =============================",
753 			   profile_print_entry, snap);
754 }
755 
profiler_print_time_between_calls(profiler_snapshot_t * snap)756 void profiler_print_time_between_calls(profiler_snapshot_t *snap)
757 {
758 	profile_print_func("== Profiler Time Between Calls ==================",
759 			   profile_print_entry_expected, snap);
760 }
761 
free_call_children(profile_call * call)762 static void free_call_children(profile_call *call)
763 {
764 	if (!call)
765 		return;
766 
767 	const size_t num = call->children.num;
768 	for (size_t i = 0; i < num; i++)
769 		free_call_children(&call->children.array[i]);
770 
771 	da_free(call->children);
772 }
773 
free_call_context(profile_call * context)774 static void free_call_context(profile_call *context)
775 {
776 	free_call_children(context);
777 	bfree(context);
778 }
779 
free_hashmap(profile_times_table * map)780 static void free_hashmap(profile_times_table *map)
781 {
782 	map->size = 0;
783 	bfree(map->entries);
784 	map->entries = NULL;
785 	bfree(map->old_entries);
786 	map->old_entries = NULL;
787 }
788 
free_profile_entry(profile_entry * entry)789 static void free_profile_entry(profile_entry *entry)
790 {
791 	for (size_t i = 0; i < entry->children.num; i++)
792 		free_profile_entry(&entry->children.array[i]);
793 
794 	free_hashmap(&entry->times);
795 #ifdef TRACK_OVERHEAD
796 	free_hashmap(&entry->overhead);
797 #endif
798 	free_hashmap(&entry->times_between_calls);
799 	da_free(entry->children);
800 }
801 
profiler_free(void)802 void profiler_free(void)
803 {
804 	DARRAY(profile_root_entry) old_root_entries = {0};
805 
806 	pthread_mutex_lock(&root_mutex);
807 	enabled = false;
808 	da_move(old_root_entries, root_entries);
809 	pthread_mutex_unlock(&root_mutex);
810 
811 	for (size_t i = 0; i < old_root_entries.num; i++) {
812 		profile_root_entry *entry = &old_root_entries.array[i];
813 
814 		pthread_mutex_lock(entry->mutex);
815 		pthread_mutex_unlock(entry->mutex);
816 
817 		pthread_mutex_destroy(entry->mutex);
818 		bfree(entry->mutex);
819 		entry->mutex = NULL;
820 
821 		free_call_context(entry->prev_call);
822 
823 		free_profile_entry(entry->entry);
824 		bfree(entry->entry);
825 	}
826 
827 	da_free(old_root_entries);
828 
829 	pthread_mutex_destroy(&root_mutex);
830 }
831 
832 /* ------------------------------------------------------------------------- */
833 /* Profiler name storage */
834 
835 struct profiler_name_store {
836 	pthread_mutex_t mutex;
837 	DARRAY(char *) names;
838 };
839 
profiler_name_store_create(void)840 profiler_name_store_t *profiler_name_store_create(void)
841 {
842 	profiler_name_store_t *store = bzalloc(sizeof(profiler_name_store_t));
843 
844 	if (pthread_mutex_init(&store->mutex, NULL))
845 		goto error;
846 
847 	return store;
848 
849 error:
850 	bfree(store);
851 	return NULL;
852 }
853 
profiler_name_store_free(profiler_name_store_t * store)854 void profiler_name_store_free(profiler_name_store_t *store)
855 {
856 	if (!store)
857 		return;
858 
859 	for (size_t i = 0; i < store->names.num; i++)
860 		bfree(store->names.array[i]);
861 
862 	da_free(store->names);
863 
864 	pthread_mutex_destroy(&store->mutex);
865 
866 	bfree(store);
867 }
868 
profile_store_name(profiler_name_store_t * store,const char * format,...)869 const char *profile_store_name(profiler_name_store_t *store, const char *format,
870 			       ...)
871 {
872 	va_list args;
873 	va_start(args, format);
874 
875 	struct dstr str = {0};
876 	dstr_vprintf(&str, format, args);
877 
878 	va_end(args);
879 
880 	const char *result = NULL;
881 
882 	pthread_mutex_lock(&store->mutex);
883 	size_t idx = da_push_back(store->names, &str.array);
884 	result = store->names.array[idx];
885 	pthread_mutex_unlock(&store->mutex);
886 
887 	return result;
888 }
889 
890 /* ------------------------------------------------------------------------- */
891 /* Profiler data access */
892 
add_entry_to_snapshot(profile_entry * entry,profiler_snapshot_entry_t * s_entry)893 static void add_entry_to_snapshot(profile_entry *entry,
894 				  profiler_snapshot_entry_t *s_entry)
895 {
896 	s_entry->name = entry->name;
897 
898 	s_entry->overall_count =
899 		copy_map_to_array(&entry->times, &s_entry->times,
900 				  &s_entry->min_time, &s_entry->max_time);
901 
902 	if ((s_entry->expected_time_between_calls =
903 		     entry->expected_time_between_calls))
904 		s_entry->overall_between_calls_count =
905 			copy_map_to_array(&entry->times_between_calls,
906 					  &s_entry->times_between_calls,
907 					  &s_entry->min_time_between_calls,
908 					  &s_entry->max_time_between_calls);
909 
910 	da_reserve(s_entry->children, entry->children.num);
911 	for (size_t i = 0; i < entry->children.num; i++)
912 		add_entry_to_snapshot(&entry->children.array[i],
913 				      da_push_back_new(s_entry->children));
914 }
915 
sort_snapshot_entry(profiler_snapshot_entry_t * entry)916 static void sort_snapshot_entry(profiler_snapshot_entry_t *entry)
917 {
918 	qsort(entry->times.array, entry->times.num, sizeof(profiler_time_entry),
919 	      profiler_time_entry_compare);
920 
921 	if (entry->expected_time_between_calls)
922 		qsort(entry->times_between_calls.array,
923 		      entry->times_between_calls.num,
924 		      sizeof(profiler_time_entry), profiler_time_entry_compare);
925 
926 	for (size_t i = 0; i < entry->children.num; i++)
927 		sort_snapshot_entry(&entry->children.array[i]);
928 }
929 
profile_snapshot_create(void)930 profiler_snapshot_t *profile_snapshot_create(void)
931 {
932 	profiler_snapshot_t *snap = bzalloc(sizeof(profiler_snapshot_t));
933 
934 	pthread_mutex_lock(&root_mutex);
935 	da_reserve(snap->roots, root_entries.num);
936 	for (size_t i = 0; i < root_entries.num; i++) {
937 		pthread_mutex_lock(root_entries.array[i].mutex);
938 		add_entry_to_snapshot(root_entries.array[i].entry,
939 				      da_push_back_new(snap->roots));
940 		pthread_mutex_unlock(root_entries.array[i].mutex);
941 	}
942 	pthread_mutex_unlock(&root_mutex);
943 
944 	for (size_t i = 0; i < snap->roots.num; i++)
945 		sort_snapshot_entry(&snap->roots.array[i]);
946 
947 	return snap;
948 }
949 
free_snapshot_entry(profiler_snapshot_entry_t * entry)950 static void free_snapshot_entry(profiler_snapshot_entry_t *entry)
951 {
952 	for (size_t i = 0; i < entry->children.num; i++)
953 		free_snapshot_entry(&entry->children.array[i]);
954 
955 	da_free(entry->children);
956 	da_free(entry->times_between_calls);
957 	da_free(entry->times);
958 }
959 
profile_snapshot_free(profiler_snapshot_t * snap)960 void profile_snapshot_free(profiler_snapshot_t *snap)
961 {
962 	if (!snap)
963 		return;
964 
965 	for (size_t i = 0; i < snap->roots.num; i++)
966 		free_snapshot_entry(&snap->roots.array[i]);
967 
968 	da_free(snap->roots);
969 	bfree(snap);
970 }
971 
972 typedef void (*dump_csv_func)(void *data, struct dstr *buffer);
entry_dump_csv(struct dstr * buffer,const profiler_snapshot_entry_t * parent,const profiler_snapshot_entry_t * entry,dump_csv_func func,void * data)973 static void entry_dump_csv(struct dstr *buffer,
974 			   const profiler_snapshot_entry_t *parent,
975 			   const profiler_snapshot_entry_t *entry,
976 			   dump_csv_func func, void *data)
977 {
978 	const char *parent_name = parent ? parent->name : NULL;
979 
980 	for (size_t i = 0; i < entry->times.num; i++) {
981 		dstr_printf(buffer,
982 			    "%p,%p,%p,%p,%s,0,"
983 			    "%" PRIu64 ",%" PRIu64 "\n",
984 			    entry, parent, entry->name, parent_name,
985 			    entry->name, entry->times.array[i].time_delta,
986 			    entry->times.array[i].count);
987 		func(data, buffer);
988 	}
989 
990 	for (size_t i = 0; i < entry->times_between_calls.num; i++) {
991 		dstr_printf(buffer,
992 			    "%p,%p,%p,%p,%s,"
993 			    "%" PRIu64 ",%" PRIu64 ",%" PRIu64 "\n",
994 			    entry, parent, entry->name, parent_name,
995 			    entry->name, entry->expected_time_between_calls,
996 			    entry->times_between_calls.array[i].time_delta,
997 			    entry->times_between_calls.array[i].count);
998 		func(data, buffer);
999 	}
1000 
1001 	for (size_t i = 0; i < entry->children.num; i++)
1002 		entry_dump_csv(buffer, entry, &entry->children.array[i], func,
1003 			       data);
1004 }
1005 
profiler_snapshot_dump(const profiler_snapshot_t * snap,dump_csv_func func,void * data)1006 static void profiler_snapshot_dump(const profiler_snapshot_t *snap,
1007 				   dump_csv_func func, void *data)
1008 {
1009 	struct dstr buffer = {0};
1010 
1011 	dstr_init_copy(&buffer, "id,parent_id,name_id,parent_name_id,name,"
1012 				"time_between_calls,time_delta_µs,count\n");
1013 	func(data, &buffer);
1014 
1015 	for (size_t i = 0; i < snap->roots.num; i++)
1016 		entry_dump_csv(&buffer, NULL, &snap->roots.array[i], func,
1017 			       data);
1018 
1019 	dstr_free(&buffer);
1020 }
1021 
dump_csv_fwrite(void * data,struct dstr * buffer)1022 static void dump_csv_fwrite(void *data, struct dstr *buffer)
1023 {
1024 	fwrite(buffer->array, 1, buffer->len, data);
1025 }
1026 
profiler_snapshot_dump_csv(const profiler_snapshot_t * snap,const char * filename)1027 bool profiler_snapshot_dump_csv(const profiler_snapshot_t *snap,
1028 				const char *filename)
1029 {
1030 	FILE *f = os_fopen(filename, "wb+");
1031 	if (!f)
1032 		return false;
1033 
1034 	profiler_snapshot_dump(snap, dump_csv_fwrite, f);
1035 
1036 	fclose(f);
1037 	return true;
1038 }
1039 
dump_csv_gzwrite(void * data,struct dstr * buffer)1040 static void dump_csv_gzwrite(void *data, struct dstr *buffer)
1041 {
1042 	gzwrite(data, buffer->array, (unsigned)buffer->len);
1043 }
1044 
profiler_snapshot_dump_csv_gz(const profiler_snapshot_t * snap,const char * filename)1045 bool profiler_snapshot_dump_csv_gz(const profiler_snapshot_t *snap,
1046 				   const char *filename)
1047 {
1048 	gzFile gz;
1049 #ifdef _WIN32
1050 	wchar_t *filename_w = NULL;
1051 
1052 	os_utf8_to_wcs_ptr(filename, 0, &filename_w);
1053 	if (!filename_w)
1054 		return false;
1055 
1056 	gz = gzopen_w(filename_w, "wb");
1057 	bfree(filename_w);
1058 #else
1059 	gz = gzopen(filename, "wb");
1060 #endif
1061 	if (!gz)
1062 		return false;
1063 
1064 	profiler_snapshot_dump(snap, dump_csv_gzwrite, gz);
1065 
1066 #ifdef _WIN32
1067 	gzclose_w(gz);
1068 #else
1069 	gzclose(gz);
1070 #endif
1071 	return true;
1072 }
1073 
profiler_snapshot_num_roots(profiler_snapshot_t * snap)1074 size_t profiler_snapshot_num_roots(profiler_snapshot_t *snap)
1075 {
1076 	return snap ? snap->roots.num : 0;
1077 }
1078 
profiler_snapshot_enumerate_roots(profiler_snapshot_t * snap,profiler_entry_enum_func func,void * context)1079 void profiler_snapshot_enumerate_roots(profiler_snapshot_t *snap,
1080 				       profiler_entry_enum_func func,
1081 				       void *context)
1082 {
1083 	if (!snap)
1084 		return;
1085 
1086 	for (size_t i = 0; i < snap->roots.num; i++)
1087 		if (!func(context, &snap->roots.array[i]))
1088 			break;
1089 }
1090 
profiler_snapshot_filter_roots(profiler_snapshot_t * snap,profiler_name_filter_func func,void * data)1091 void profiler_snapshot_filter_roots(profiler_snapshot_t *snap,
1092 				    profiler_name_filter_func func, void *data)
1093 {
1094 	for (size_t i = 0; i < snap->roots.num;) {
1095 		bool remove = false;
1096 		bool res = func(data, snap->roots.array[i].name, &remove);
1097 
1098 		if (remove) {
1099 			free_snapshot_entry(&snap->roots.array[i]);
1100 			da_erase(snap->roots, i);
1101 		}
1102 
1103 		if (!res)
1104 			break;
1105 
1106 		if (!remove)
1107 			i += 1;
1108 	}
1109 }
1110 
profiler_snapshot_num_children(profiler_snapshot_entry_t * entry)1111 size_t profiler_snapshot_num_children(profiler_snapshot_entry_t *entry)
1112 {
1113 	return entry ? entry->children.num : 0;
1114 }
1115 
profiler_snapshot_enumerate_children(profiler_snapshot_entry_t * entry,profiler_entry_enum_func func,void * context)1116 void profiler_snapshot_enumerate_children(profiler_snapshot_entry_t *entry,
1117 					  profiler_entry_enum_func func,
1118 					  void *context)
1119 {
1120 	if (!entry)
1121 		return;
1122 
1123 	for (size_t i = 0; i < entry->children.num; i++)
1124 		if (!func(context, &entry->children.array[i]))
1125 			break;
1126 }
1127 
profiler_snapshot_entry_name(profiler_snapshot_entry_t * entry)1128 const char *profiler_snapshot_entry_name(profiler_snapshot_entry_t *entry)
1129 {
1130 	return entry ? entry->name : NULL;
1131 }
1132 
1133 profiler_time_entries_t *
profiler_snapshot_entry_times(profiler_snapshot_entry_t * entry)1134 profiler_snapshot_entry_times(profiler_snapshot_entry_t *entry)
1135 {
1136 	return entry ? &entry->times : NULL;
1137 }
1138 
profiler_snapshot_entry_overall_count(profiler_snapshot_entry_t * entry)1139 uint64_t profiler_snapshot_entry_overall_count(profiler_snapshot_entry_t *entry)
1140 {
1141 	return entry ? entry->overall_count : 0;
1142 }
1143 
profiler_snapshot_entry_min_time(profiler_snapshot_entry_t * entry)1144 uint64_t profiler_snapshot_entry_min_time(profiler_snapshot_entry_t *entry)
1145 {
1146 	return entry ? entry->min_time : 0;
1147 }
1148 
profiler_snapshot_entry_max_time(profiler_snapshot_entry_t * entry)1149 uint64_t profiler_snapshot_entry_max_time(profiler_snapshot_entry_t *entry)
1150 {
1151 	return entry ? entry->max_time : 0;
1152 }
1153 
1154 profiler_time_entries_t *
profiler_snapshot_entry_times_between_calls(profiler_snapshot_entry_t * entry)1155 profiler_snapshot_entry_times_between_calls(profiler_snapshot_entry_t *entry)
1156 {
1157 	return entry ? &entry->times_between_calls : NULL;
1158 }
1159 
profiler_snapshot_entry_expected_time_between_calls(profiler_snapshot_entry_t * entry)1160 uint64_t profiler_snapshot_entry_expected_time_between_calls(
1161 	profiler_snapshot_entry_t *entry)
1162 {
1163 	return entry ? entry->expected_time_between_calls : 0;
1164 }
1165 
1166 uint64_t
profiler_snapshot_entry_min_time_between_calls(profiler_snapshot_entry_t * entry)1167 profiler_snapshot_entry_min_time_between_calls(profiler_snapshot_entry_t *entry)
1168 {
1169 	return entry ? entry->min_time_between_calls : 0;
1170 }
1171 
1172 uint64_t
profiler_snapshot_entry_max_time_between_calls(profiler_snapshot_entry_t * entry)1173 profiler_snapshot_entry_max_time_between_calls(profiler_snapshot_entry_t *entry)
1174 {
1175 	return entry ? entry->max_time_between_calls : 0;
1176 }
1177 
profiler_snapshot_entry_overall_between_calls_count(profiler_snapshot_entry_t * entry)1178 uint64_t profiler_snapshot_entry_overall_between_calls_count(
1179 	profiler_snapshot_entry_t *entry)
1180 {
1181 	return entry ? entry->overall_between_calls_count : 0;
1182 }
1183