1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "w32_leakcheck.h"
9 
10 #if defined(GIT_WIN32_LEAKCHECK)
11 
12 #include "Windows.h"
13 #include "Dbghelp.h"
14 #include "win32/posix.h"
15 #include "hash.h"
16 #include "runtime.h"
17 
18 /* Stack frames (for stack tracing, below) */
19 
20 static bool   g_win32_stack_initialized = false;
21 static HANDLE g_win32_stack_process = INVALID_HANDLE_VALUE;
22 static git_win32_leakcheck_stack_aux_cb_alloc  g_aux_cb_alloc  = NULL;
23 static git_win32_leakcheck_stack_aux_cb_lookup g_aux_cb_lookup = NULL;
24 
git_win32_leakcheck_stack_set_aux_cb(git_win32_leakcheck_stack_aux_cb_alloc cb_alloc,git_win32_leakcheck_stack_aux_cb_lookup cb_lookup)25 int git_win32_leakcheck_stack_set_aux_cb(
26 	git_win32_leakcheck_stack_aux_cb_alloc cb_alloc,
27 	git_win32_leakcheck_stack_aux_cb_lookup cb_lookup)
28 {
29 	g_aux_cb_alloc = cb_alloc;
30 	g_aux_cb_lookup = cb_lookup;
31 
32 	return 0;
33 }
34 
35 /**
36  * Load symbol table data.  This should be done in the primary
37  * thread at startup (under a lock if there are other threads
38  * active).
39  */
git_win32_leakcheck_stack_init(void)40 void git_win32_leakcheck_stack_init(void)
41 {
42 	if (!g_win32_stack_initialized) {
43 		g_win32_stack_process = GetCurrentProcess();
44 		SymSetOptions(SYMOPT_LOAD_LINES);
45 		SymInitialize(g_win32_stack_process, NULL, TRUE);
46 		g_win32_stack_initialized = true;
47 	}
48 }
49 
50 /**
51  * Cleanup symbol table data.  This should be done in the
52  * primary thead at shutdown (under a lock if there are other
53  * threads active).
54  */
git_win32_leakcheck_stack_cleanup(void)55 void git_win32_leakcheck_stack_cleanup(void)
56 {
57 	if (g_win32_stack_initialized) {
58 		SymCleanup(g_win32_stack_process);
59 		g_win32_stack_process = INVALID_HANDLE_VALUE;
60 		g_win32_stack_initialized = false;
61 	}
62 }
63 
git_win32_leakcheck_stack_capture(git_win32_leakcheck_stack_raw_data * pdata,int skip)64 int git_win32_leakcheck_stack_capture(git_win32_leakcheck_stack_raw_data *pdata, int skip)
65 {
66 	if (!g_win32_stack_initialized) {
67 		git_error_set(GIT_ERROR_INVALID, "git_win32_stack not initialized.");
68 		return GIT_ERROR;
69 	}
70 
71 	memset(pdata, 0, sizeof(*pdata));
72 	pdata->nr_frames = RtlCaptureStackBackTrace(
73 		skip+1, GIT_WIN32_LEAKCHECK_STACK_MAX_FRAMES, pdata->frames, NULL);
74 
75 	/* If an "aux" data provider was registered, ask it to capture
76 	 * whatever data it needs and give us an "aux_id" to it so that
77 	 * we can refer to it later when reporting.
78 	 */
79 	if (g_aux_cb_alloc)
80 		(g_aux_cb_alloc)(&pdata->aux_id);
81 
82 	return 0;
83 }
84 
git_win32_leakcheck_stack_compare(git_win32_leakcheck_stack_raw_data * d1,git_win32_leakcheck_stack_raw_data * d2)85 int git_win32_leakcheck_stack_compare(
86 	git_win32_leakcheck_stack_raw_data *d1,
87 	git_win32_leakcheck_stack_raw_data *d2)
88 {
89 	return memcmp(d1, d2, sizeof(*d1));
90 }
91 
git_win32_leakcheck_stack_format(char * pbuf,size_t buf_len,const git_win32_leakcheck_stack_raw_data * pdata,const char * prefix,const char * suffix)92 int git_win32_leakcheck_stack_format(
93 	char *pbuf, size_t buf_len,
94 	const git_win32_leakcheck_stack_raw_data *pdata,
95 	const char *prefix, const char *suffix)
96 {
97 #define MY_MAX_FILENAME 255
98 
99 	/* SYMBOL_INFO has char FileName[1] at the end.  The docs say to
100 	 * to malloc it with extra space for your desired max filename.
101 	 */
102 	struct {
103 		SYMBOL_INFO symbol;
104 		char extra[MY_MAX_FILENAME + 1];
105 	} s;
106 
107 	IMAGEHLP_LINE64 line;
108 	size_t buf_used = 0;
109 	unsigned int k;
110 	char detail[MY_MAX_FILENAME * 2]; /* filename plus space for function name and formatting */
111 	size_t detail_len;
112 
113 	if (!g_win32_stack_initialized) {
114 		git_error_set(GIT_ERROR_INVALID, "git_win32_stack not initialized.");
115 		return GIT_ERROR;
116 	}
117 
118 	if (!prefix)
119 		prefix = "\t";
120 	if (!suffix)
121 		suffix = "\n";
122 
123 	memset(pbuf, 0, buf_len);
124 
125 	memset(&s, 0, sizeof(s));
126 	s.symbol.MaxNameLen = MY_MAX_FILENAME;
127 	s.symbol.SizeOfStruct = sizeof(SYMBOL_INFO);
128 
129 	memset(&line, 0, sizeof(line));
130 	line.SizeOfStruct = sizeof(IMAGEHLP_LINE64);
131 
132 	for (k=0; k < pdata->nr_frames; k++) {
133 		DWORD64 frame_k = (DWORD64)pdata->frames[k];
134 		DWORD dwUnused;
135 
136 		if (SymFromAddr(g_win32_stack_process, frame_k, 0, &s.symbol) &&
137 			SymGetLineFromAddr64(g_win32_stack_process, frame_k, &dwUnused, &line)) {
138 			const char *pslash;
139 			const char *pfile;
140 
141 			pslash = strrchr(line.FileName, '\\');
142 			pfile = ((pslash) ? (pslash+1) : line.FileName);
143 			p_snprintf(detail, sizeof(detail), "%s%s:%d> %s%s",
144 					   prefix, pfile, line.LineNumber, s.symbol.Name, suffix);
145 		} else {
146 			/* This happens when we cross into another module.
147 			 * For example, in CLAR tests, this is typically
148 			 * the CRT startup code.  Just print an unknown
149 			 * frame and continue.
150 			 */
151 			p_snprintf(detail, sizeof(detail), "%s??%s", prefix, suffix);
152 		}
153 		detail_len = strlen(detail);
154 
155 		if (buf_len < (buf_used + detail_len + 1)) {
156 			/* we don't have room for this frame in the buffer, so just stop. */
157 			break;
158 		}
159 
160 		memcpy(&pbuf[buf_used], detail, detail_len);
161 		buf_used += detail_len;
162 	}
163 
164 	/* "aux_id" 0 is reserved to mean no aux data. This is needed to handle
165 	 * allocs that occur before the aux callbacks were registered.
166 	 */
167 	if (pdata->aux_id > 0) {
168 		p_snprintf(detail, sizeof(detail), "%saux_id: %d%s",
169 				   prefix, pdata->aux_id, suffix);
170 		detail_len = strlen(detail);
171 		if ((buf_used + detail_len + 1) < buf_len) {
172 			memcpy(&pbuf[buf_used], detail, detail_len);
173 			buf_used += detail_len;
174 		}
175 
176 		/* If an "aux" data provider is still registered, ask it to append its detailed
177 		 * data to the end of ours using the "aux_id" it gave us when this de-duped
178 		 * item was created.
179 		 */
180 		if (g_aux_cb_lookup)
181 			(g_aux_cb_lookup)(pdata->aux_id, &pbuf[buf_used], (buf_len - buf_used - 1));
182 	}
183 
184 	return GIT_OK;
185 }
186 
git_win32_leakcheck_stack(char * pbuf,size_t buf_len,int skip,const char * prefix,const char * suffix)187 int git_win32_leakcheck_stack(
188 	char * pbuf, size_t buf_len,
189 	int skip,
190 	const char *prefix, const char *suffix)
191 {
192 	git_win32_leakcheck_stack_raw_data data;
193 	int error;
194 
195 	if ((error = git_win32_leakcheck_stack_capture(&data, skip)) < 0)
196 		return error;
197 	if ((error = git_win32_leakcheck_stack_format(pbuf, buf_len, &data, prefix, suffix)) < 0)
198 		return error;
199 	return 0;
200 }
201 
202 /* Strack tracing */
203 
204 #define STACKTRACE_UID_LEN (15)
205 
206 /**
207  * The stacktrace of an allocation can be distilled
208  * to a unique id based upon the stackframe pointers
209  * and ignoring any size arguments. We will use these
210  * UIDs as the (char const*) __FILE__ argument we
211  * give to the CRT malloc routines.
212  */
213 typedef struct {
214 	char uid[STACKTRACE_UID_LEN + 1];
215 } git_win32_leakcheck_stacktrace_uid;
216 
217 /**
218  * All mallocs with the same stacktrace will be de-duped
219  * and aggregated into this row.
220  */
221 typedef struct {
222 	git_win32_leakcheck_stacktrace_uid uid; /* must be first */
223 	git_win32_leakcheck_stack_raw_data raw_data;
224 	unsigned int count_allocs; /* times this alloc signature seen since init */
225 	unsigned int count_allocs_at_last_checkpoint; /* times since last mark */
226 	unsigned int transient_count_leaks; /* sum of leaks */
227 } git_win32_leakcheck_stacktrace_row;
228 
229 static CRITICAL_SECTION g_crtdbg_stacktrace_cs;
230 
231 /**
232  * CRTDBG memory leak tracking takes a "char const * const file_name"
233  * and stores the pointer in the heap data (instead of allocing a copy
234  * for itself).  Normally, this is not a problem, since we usually pass
235  * in __FILE__.  But I'm going to lie to it and pass in the address of
236  * the UID in place of the file_name.  Also, I do not want to alloc the
237  * stacktrace data (because we are called from inside our alloc routines).
238  * Therefore, I'm creating a very large static pool array to store row
239  * data. This also eliminates the temptation to realloc it (and move the
240  * UID pointers).
241  *
242  * And to efficiently look for duplicates we need an index on the rows
243  * so we can bsearch it.  Again, without mallocing.
244  *
245  * If we observe more than MY_ROW_LIMIT unique malloc signatures, we
246  * fall through and use the traditional __FILE__ processing and don't
247  * try to de-dup them.  If your testing hits this limit, just increase
248  * it and try again.
249  */
250 
251 #define MY_ROW_LIMIT (2 * 1024 * 1024)
252 static git_win32_leakcheck_stacktrace_row  g_cs_rows[MY_ROW_LIMIT];
253 static git_win32_leakcheck_stacktrace_row *g_cs_index[MY_ROW_LIMIT];
254 
255 static unsigned int g_cs_end = MY_ROW_LIMIT;
256 static unsigned int g_cs_ins = 0; /* insertion point == unique allocs seen */
257 static unsigned int g_count_total_allocs = 0; /* number of allocs seen */
258 static unsigned int g_transient_count_total_leaks = 0; /* number of total leaks */
259 static unsigned int g_transient_count_dedup_leaks = 0; /* number of unique leaks */
260 static bool g_limit_reached = false; /* had allocs after we filled row table */
261 
262 static unsigned int g_checkpoint_id = 0; /* to better label leak checkpoints */
263 static bool g_transient_leaks_since_mark = false; /* payload for hook */
264 
265 /**
266  * Compare function for bsearch on g_cs_index table.
267  */
row_cmp(const void * v1,const void * v2)268 static int row_cmp(const void *v1, const void *v2)
269 {
270 	git_win32_leakcheck_stack_raw_data *d1 = (git_win32_leakcheck_stack_raw_data*)v1;
271 	git_win32_leakcheck_stacktrace_row *r2 = (git_win32_leakcheck_stacktrace_row *)v2;
272 
273 	return (git_win32_leakcheck_stack_compare(d1, &r2->raw_data));
274 }
275 
276 /**
277  * Unique insert the new data into the row and index tables.
278  * We have to sort by the stackframe data itself, not the uid.
279  */
insert_unique(const git_win32_leakcheck_stack_raw_data * pdata)280 static git_win32_leakcheck_stacktrace_row * insert_unique(
281 	const git_win32_leakcheck_stack_raw_data *pdata)
282 {
283 	size_t pos;
284 	if (git__bsearch(g_cs_index, g_cs_ins, pdata, row_cmp, &pos) < 0) {
285 		/* Append new unique item to row table. */
286 		memcpy(&g_cs_rows[g_cs_ins].raw_data, pdata, sizeof(*pdata));
287 		sprintf(g_cs_rows[g_cs_ins].uid.uid, "##%08lx", g_cs_ins);
288 
289 		/* Insert pointer to it into the proper place in the index table. */
290 		if (pos < g_cs_ins)
291 			memmove(&g_cs_index[pos+1], &g_cs_index[pos], (g_cs_ins - pos)*sizeof(g_cs_index[0]));
292 		g_cs_index[pos] = &g_cs_rows[g_cs_ins];
293 
294 		g_cs_ins++;
295 	}
296 
297 	g_cs_index[pos]->count_allocs++;
298 
299 	return g_cs_index[pos];
300 }
301 
302 /**
303  * Hook function to receive leak data from the CRT. (This includes
304  * both "<file_name>:(<line_number>)" data, but also each of the
305  * various headers and fields.
306  *
307  * Scan this for the special "##<pos>" UID forms that we substituted
308  * for the "<file_name>".  Map <pos> back to the row data and
309  * increment its leak count.
310  *
311  * See https://msdn.microsoft.com/en-us/library/74kabxyx.aspx
312  *
313  * We suppress the actual crtdbg output.
314  */
report_hook(int nRptType,char * szMsg,int * retVal)315 static int __cdecl report_hook(int nRptType, char *szMsg, int *retVal)
316 {
317 	static int hook_result = TRUE; /* FALSE to get stock dump; TRUE to suppress. */
318 	unsigned int pos;
319 
320 	*retVal = 0; /* do not invoke debugger */
321 
322 	if ((szMsg[0] != '#') || (szMsg[1] != '#'))
323 		return hook_result;
324 
325 	if (sscanf(&szMsg[2], "%08lx", &pos) < 1)
326 		return hook_result;
327 	if (pos >= g_cs_ins)
328 		return hook_result;
329 
330 	if (g_transient_leaks_since_mark) {
331 		if (g_cs_rows[pos].count_allocs == g_cs_rows[pos].count_allocs_at_last_checkpoint)
332 			return hook_result;
333 	}
334 
335 	g_cs_rows[pos].transient_count_leaks++;
336 
337 	if (g_cs_rows[pos].transient_count_leaks == 1)
338 		g_transient_count_dedup_leaks++;
339 
340 	g_transient_count_total_leaks++;
341 
342 	return hook_result;
343 }
344 
345 /**
346  * Write leak data to all of the various places we need.
347  * We force the caller to sprintf() the message first
348  * because we want to avoid fprintf() because it allocs.
349  */
my_output(const char * buf)350 static void my_output(const char *buf)
351 {
352 	fwrite(buf, strlen(buf), 1, stderr);
353 	OutputDebugString(buf);
354 }
355 
356 /**
357  * For each row with leaks, dump a stacktrace for it.
358  */
dump_summary(const char * label)359 static void dump_summary(const char *label)
360 {
361 	unsigned int k;
362 	char buf[10 * 1024];
363 
364 	if (g_transient_count_total_leaks == 0)
365 		return;
366 
367 	fflush(stdout);
368 	fflush(stderr);
369 	my_output("\n");
370 
371 	if (g_limit_reached) {
372 		sprintf(buf,
373 				"LEAK SUMMARY: de-dup row table[%d] filled. Increase MY_ROW_LIMIT.\n",
374 				MY_ROW_LIMIT);
375 		my_output(buf);
376 	}
377 
378 	if (!label)
379 		label = "";
380 
381 	if (g_transient_leaks_since_mark) {
382 		sprintf(buf, "LEAK CHECKPOINT %d: leaks %d unique %d: %s\n",
383 				g_checkpoint_id, g_transient_count_total_leaks, g_transient_count_dedup_leaks, label);
384 		my_output(buf);
385 	} else {
386 		sprintf(buf, "LEAK SUMMARY: TOTAL leaks %d de-duped %d: %s\n",
387 				g_transient_count_total_leaks, g_transient_count_dedup_leaks, label);
388 		my_output(buf);
389 	}
390 	my_output("\n");
391 
392 	for (k = 0; k < g_cs_ins; k++) {
393 		if (g_cs_rows[k].transient_count_leaks > 0) {
394 			sprintf(buf, "LEAK: %s leaked %d of %d times:\n",
395 					g_cs_rows[k].uid.uid,
396 					g_cs_rows[k].transient_count_leaks,
397 					g_cs_rows[k].count_allocs);
398 			my_output(buf);
399 
400 			if (git_win32_leakcheck_stack_format(
401 					buf, sizeof(buf), &g_cs_rows[k].raw_data,
402 					NULL, NULL) >= 0) {
403 				my_output(buf);
404 			}
405 
406 			my_output("\n");
407 		}
408 	}
409 
410 	fflush(stderr);
411 }
412 
413 /**
414  * Initialize our memory leak tracking and de-dup data structures.
415  * This should ONLY be called by git_libgit2_init().
416  */
git_win32_leakcheck_stacktrace_init(void)417 void git_win32_leakcheck_stacktrace_init(void)
418 {
419 	InitializeCriticalSection(&g_crtdbg_stacktrace_cs);
420 
421 	EnterCriticalSection(&g_crtdbg_stacktrace_cs);
422 
423 	_CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
424 
425 	_CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_DEBUG | _CRTDBG_MODE_FILE);
426 	_CrtSetReportMode(_CRT_ERROR,  _CRTDBG_MODE_DEBUG | _CRTDBG_MODE_FILE);
427 	_CrtSetReportMode(_CRT_WARN,   _CRTDBG_MODE_DEBUG | _CRTDBG_MODE_FILE);
428 
429 	_CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
430 	_CrtSetReportFile(_CRT_ERROR,  _CRTDBG_FILE_STDERR);
431 	_CrtSetReportFile(_CRT_WARN,   _CRTDBG_FILE_STDERR);
432 
433 	LeaveCriticalSection(&g_crtdbg_stacktrace_cs);
434 }
435 
git_win32_leakcheck_stacktrace_dump(git_win32_leakcheck_stacktrace_options opt,const char * label)436 int git_win32_leakcheck_stacktrace_dump(
437 	git_win32_leakcheck_stacktrace_options opt,
438 	const char *label)
439 {
440 	_CRT_REPORT_HOOK old;
441 	unsigned int k;
442 	int r = 0;
443 
444 #define IS_BIT_SET(o,b) (((o) & (b)) != 0)
445 
446 	bool b_set_mark         = IS_BIT_SET(opt, GIT_WIN32_LEAKCHECK_STACKTRACE_SET_MARK);
447 	bool b_leaks_since_mark = IS_BIT_SET(opt, GIT_WIN32_LEAKCHECK_STACKTRACE_LEAKS_SINCE_MARK);
448 	bool b_leaks_total      = IS_BIT_SET(opt, GIT_WIN32_LEAKCHECK_STACKTRACE_LEAKS_TOTAL);
449 	bool b_quiet            = IS_BIT_SET(opt, GIT_WIN32_LEAKCHECK_STACKTRACE_QUIET);
450 
451 	if (b_leaks_since_mark && b_leaks_total) {
452 		git_error_set(GIT_ERROR_INVALID, "cannot combine LEAKS_SINCE_MARK and LEAKS_TOTAL.");
453 		return GIT_ERROR;
454 	}
455 	if (!b_set_mark && !b_leaks_since_mark && !b_leaks_total) {
456 		git_error_set(GIT_ERROR_INVALID, "nothing to do.");
457 		return GIT_ERROR;
458 	}
459 
460 	EnterCriticalSection(&g_crtdbg_stacktrace_cs);
461 
462 	if (b_leaks_since_mark || b_leaks_total) {
463 		/* All variables with "transient" in the name are per-dump counters
464 		 * and reset before each dump.  This lets us handle checkpoints.
465 		 */
466 		g_transient_count_total_leaks = 0;
467 		g_transient_count_dedup_leaks = 0;
468 		for (k = 0; k < g_cs_ins; k++) {
469 			g_cs_rows[k].transient_count_leaks = 0;
470 		}
471 	}
472 
473 	g_transient_leaks_since_mark = b_leaks_since_mark;
474 
475 	old = _CrtSetReportHook(report_hook);
476 	_CrtDumpMemoryLeaks();
477 	_CrtSetReportHook(old);
478 
479 	if (b_leaks_since_mark || b_leaks_total) {
480 		r = g_transient_count_dedup_leaks;
481 
482 		if (!b_quiet)
483 			dump_summary(label);
484 	}
485 
486 	if (b_set_mark) {
487 		for (k = 0; k < g_cs_ins; k++) {
488 			g_cs_rows[k].count_allocs_at_last_checkpoint = g_cs_rows[k].count_allocs;
489 		}
490 
491 		g_checkpoint_id++;
492 	}
493 
494 	LeaveCriticalSection(&g_crtdbg_stacktrace_cs);
495 
496 	return r;
497 }
498 
499 /**
500  * Shutdown our memory leak tracking and dump summary data.
501  * This should ONLY be called by git_libgit2_shutdown().
502  *
503  * We explicitly call _CrtDumpMemoryLeaks() during here so
504  * that we can compute summary data for the leaks. We print
505  * the stacktrace of each unique leak.
506  *
507  * This cleanup does not happen if the app calls exit()
508  * without calling the libgit2 shutdown code.
509  *
510  * This info we print here is independent of any automatic
511  * reporting during exit() caused by _CRTDBG_LEAK_CHECK_DF.
512  * Set it in your app if you also want traditional reporting.
513  */
git_win32_leakcheck_stacktrace_cleanup(void)514 void git_win32_leakcheck_stacktrace_cleanup(void)
515 {
516 	/* At shutdown/cleanup, dump cummulative leak info
517 	 * with everything since startup.  This might generate
518 	 * extra noise if the caller has been doing checkpoint
519 	 * dumps, but it might also eliminate some false
520 	 * positives for resources previously reported during
521 	 * checkpoints.
522 	 */
523 	git_win32_leakcheck_stacktrace_dump(
524 		GIT_WIN32_LEAKCHECK_STACKTRACE_LEAKS_TOTAL,
525 		"CLEANUP");
526 
527 	DeleteCriticalSection(&g_crtdbg_stacktrace_cs);
528 }
529 
git_win32_leakcheck_stacktrace(int skip,const char * file)530 const char *git_win32_leakcheck_stacktrace(int skip, const char *file)
531 {
532 	git_win32_leakcheck_stack_raw_data new_data;
533 	git_win32_leakcheck_stacktrace_row *row;
534 	const char * result = file;
535 
536 	if (git_win32_leakcheck_stack_capture(&new_data, skip+1) < 0)
537 		return result;
538 
539 	EnterCriticalSection(&g_crtdbg_stacktrace_cs);
540 
541 	if (g_cs_ins < g_cs_end) {
542 		row = insert_unique(&new_data);
543 		result = row->uid.uid;
544 	} else {
545 		g_limit_reached = true;
546 	}
547 
548 	g_count_total_allocs++;
549 
550 	LeaveCriticalSection(&g_crtdbg_stacktrace_cs);
551 
552 	return result;
553 }
554 
git_win32_leakcheck_global_shutdown(void)555 static void git_win32_leakcheck_global_shutdown(void)
556 {
557 	git_win32_leakcheck_stacktrace_cleanup();
558 	git_win32_leakcheck_stack_cleanup();
559 }
560 
git_win32_leakcheck_global_init(void)561 int git_win32_leakcheck_global_init(void)
562 {
563 	git_win32_leakcheck_stacktrace_init();
564 	git_win32_leakcheck_stack_init();
565 
566 	return git_runtime_shutdown_register(git_win32_leakcheck_global_shutdown);
567 }
568 
569 #else
570 
git_win32_leakcheck_global_init(void)571 int git_win32_leakcheck_global_init(void)
572 {
573 	return 0;
574 }
575 
576 #endif
577