1 /*
2  *   This program is free software; you can redistribute it and/or modify
3  *   it under the terms of the GNU General Public License as published by
4  *   the Free Software Foundation; either version 2 of the License, or
5  *   (at your option) any later version.
6  *
7  *   This program is distributed in the hope that it will be useful,
8  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
9  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  *   GNU General Public License for more details.
11  *
12  *   You should have received a copy of the GNU General Public License
13  *   along with this program; if not, write to the Free Software
14  *   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
15  */
16 
17 /**
18  * @file debug.c
19  * @brief Various functions to aid in debugging
20  *
21  * @copyright 2013  The FreeRADIUS server project
22  * @copyright 2013  Arran Cudbard-Bell <a.cudbardb@freeradius.org>
23  */
24 #include <assert.h>
25 #include <freeradius-devel/libradius.h>
26 #include <sys/stat.h>
27 #include <sys/wait.h>
28 
29 #if defined(HAVE_MALLOPT) && defined(HAVE_MALLOC_H)
30 #  include <malloc.h>
31 #endif
32 
33 /*
34  *	runtime backtrace functions are not POSIX but are included in
35  *	glibc, OSX >= 10.5 and various BSDs
36  */
37 #ifdef HAVE_EXECINFO
38 #  include <execinfo.h>
39 #endif
40 
41 #ifdef HAVE_SYS_PRCTL_H
42 #  include <sys/prctl.h>
43 #endif
44 
45 #ifdef HAVE_SYS_PROCCTL_H
46 #  include <sys/procctl.h>
47 #endif
48 
49 #ifdef HAVE_SYS_PTRACE_H
50 #  include <sys/ptrace.h>
51 #  if !defined(PT_ATTACH) && defined(PTRACE_ATTACH)
52 #    define PT_ATTACH PTRACE_ATTACH
53 #  endif
54 #  if !defined(PT_DETACH) && defined(PTRACE_DETACH)
55 #    define PT_DETACH PTRACE_DETACH
56 #  endif
57 #endif
58 
59 #ifdef HAVE_SYS_RESOURCE_H
60 #  include <sys/resource.h>
61 #endif
62 
63 #ifdef HAVE_PTHREAD_H
64 #  define PTHREAD_MUTEX_LOCK pthread_mutex_lock
65 #  define PTHREAD_MUTEX_UNLOCK pthread_mutex_unlock
66 #else
67 #  define PTHREAD_MUTEX_LOCK(_x)
68 #  define PTHREAD_MUTEX_UNLOCK(_x)
69 #endif
70 
71 #ifdef HAVE_EXECINFO
72 #  ifndef MAX_BT_FRAMES
73 #    define MAX_BT_FRAMES 128
74 #  endif
75 #  ifndef MAX_BT_CBUFF
76 #    define MAX_BT_CBUFF  1048576			//!< Should be a power of 2
77 #  endif
78 
79 #  ifdef HAVE_PTHREAD_H
80 static pthread_mutex_t fr_debug_init = PTHREAD_MUTEX_INITIALIZER;
81 #  endif
82 
83 typedef struct fr_bt_info {
84 	void 		*obj;				//!< Memory address of the block of allocated memory.
85 	void		*frames[MAX_BT_FRAMES];		//!< Backtrace frame data
86 	int		count;				//!< Number of frames stored
87 } fr_bt_info_t;
88 
89 struct fr_bt_marker {
90 	void 		*obj;				//!< Pointer to the parent object, this is our needle
91 							//!< when we iterate over the contents of the circular buffer.
92 	fr_cbuff_t 	*cbuff;				//!< Where we temporarily store the backtraces
93 };
94 #endif
95 
96 static char panic_action[512];				//!< The command to execute when panicking.
97 static fr_fault_cb_t panic_cb = NULL;			//!< Callback to execute whilst panicking, before the
98 							//!< panic_action.
99 
100 static bool dump_core;					//!< Whether we should drop a core on fatal signals.
101 
102 static int fr_fault_log_fd = STDERR_FILENO;		//!< Where to write debug output.
103 
104 fr_debug_state_t fr_debug_state = DEBUG_STATE_UNKNOWN;	//!< Whether we're attached to by a debugger.
105 
106 #ifdef HAVE_SYS_RESOURCE_H
107 static struct rlimit core_limits;
108 #endif
109 
110 static TALLOC_CTX *talloc_null_ctx;
111 static TALLOC_CTX *talloc_autofree_ctx;
112 
113 /*
114  * On BSD systems, ptrace(PT_DETACH) uses a third argument for
115  * resume address, with the magic value (void *)1 to resume where
116  * process stopped. Specifying NULL there leads to a crash because
117  * process resumes at address 0.
118  */
119 #ifdef HAVE_SYS_PTRACE_H
120 #  ifdef __linux__
121 #    define _PTRACE(_x, _y) ptrace(_x, _y, NULL, NULL)
122 #    define _PTRACE_DETACH(_x) ptrace(PT_DETACH, _x, NULL, NULL)
123 #  else
124 #    define _PTRACE(_x, _y) ptrace(_x, _y, NULL, 0)
125 #    define _PTRACE_DETACH(_x) ptrace(PT_DETACH, _x, (void *)1, 0)
126 #  endif
127 
128 #  ifdef HAVE_CAPABILITY_H
129 #    include <sys/capability.h>
130 #  endif
131 
132 /** Determine if we're running under a debugger by attempting to attach using pattach
133  *
134  * @return 0 if we're not, 1 if we are, -1 if we can't tell because of an error,
135  *	-2 if we can't tell because we don't have the CAP_SYS_PTRACE capability.
136  */
fr_get_debug_state(void)137 static int fr_get_debug_state(void)
138 {
139 	int pid;
140 
141 	int from_child[2] = {-1, -1};
142 
143 #ifdef HAVE_CAPABILITY_H
144 	cap_flag_value_t value;
145 	cap_t current;
146 
147 	/*
148 	 *  If we're running under linux, we first need to check if we have
149 	 *  permission to to ptrace. We do that using the capabilities
150 	 *  functions.
151 	 */
152 	current = cap_get_proc();
153 	if (!current) {
154 		fr_strerror_printf("Failed getting process capabilities: %s", fr_syserror(errno));
155 		return DEBUG_STATE_UNKNOWN;
156 	}
157 
158 	if (cap_get_flag(current, CAP_SYS_PTRACE, CAP_PERMITTED, &value) < 0) {
159 		fr_strerror_printf("Failed getting permitted ptrace capability state: %s",
160 				   fr_syserror(errno));
161 		cap_free(current);
162 		return DEBUG_STATE_UNKNOWN;
163 	}
164 
165 	if ((value == CAP_SET) && (cap_get_flag(current, CAP_SYS_PTRACE, CAP_EFFECTIVE, &value) < 0)) {
166 		fr_strerror_printf("Failed getting effective ptrace capability state: %s",
167 				   fr_syserror(errno));
168 		cap_free(current);
169 		return DEBUG_STATE_UNKNOWN;
170 	}
171 
172 	/*
173 	 *  We don't have permission to ptrace, so this test will always fail.
174 	 */
175 	if (value == CAP_CLEAR) {
176 		fr_strerror_printf("ptrace capability not set.  If debugger detection is required run as root or: "
177 				   "setcap cap_sys_ptrace+ep <path_to_radiusd>");
178 		cap_free(current);
179 		return DEBUG_STATE_UNKNOWN_NO_PTRACE_CAP;
180 	}
181 	cap_free(current);
182 #endif
183 
184 	if (pipe(from_child) < 0) {
185 		fr_strerror_printf("Error opening internal pipe: %s", fr_syserror(errno));
186 		return DEBUG_STATE_UNKNOWN;
187 	}
188 
189 	pid = fork();
190 	if (pid == -1) {
191 		fr_strerror_printf("Error forking: %s", fr_syserror(errno));
192 		return DEBUG_STATE_UNKNOWN;
193 	}
194 
195 	/* Child */
196 	if (pid == 0) {
197 		int8_t ret = DEBUG_STATE_NOT_ATTACHED;
198 		int ppid = getppid();
199 
200 		/* Close parent's side */
201 		close(from_child[0]);
202 
203 		/*
204 		 *	FreeBSD is extremely picky about the order of operations here
205 		 *	we need to attach, wait *then* write whilst the parent is still
206 		 *	suspended, then detach, continuing the process.
207 		 *
208 		 *	If we don't do it in that order the read in the parent triggers
209 		 *	a SIGKILL.
210 		 */
211 		if (_PTRACE(PT_ATTACH, ppid) == 0) {
212 			/* Wait for the parent to stop */
213 			waitpid(ppid, NULL, 0);
214 
215 			/* Tell the parent what happened */
216 			if (write(from_child[1], &ret, sizeof(ret)) < 0) {
217 				fprintf(stderr, "Writing ptrace status to parent failed: %s", fr_syserror(errno));
218 			}
219 
220 			/* Detach */
221 			_PTRACE_DETACH(ppid);
222 			exit(0);
223 		}
224 
225 		ret = DEBUG_STATE_ATTACHED;
226 		/* Tell the parent what happened */
227 		if (write(from_child[1], &ret, sizeof(ret)) < 0) {
228 			fprintf(stderr, "Writing ptrace status to parent failed: %s", fr_syserror(errno));
229 		}
230 
231 		exit(0);
232 	/* Parent */
233 	} else {
234 		int8_t ret = DEBUG_STATE_UNKNOWN;
235 
236 		/*
237 		 *	The child writes errno (reason) if pattach failed else 0.
238 		 *
239 		 *	This read may be interrupted by pattach,
240 		 *	which is why we need the loop.
241 		 */
242 		while ((read(from_child[0], &ret, sizeof(ret)) < 0) && (errno == EINTR));
243 
244 		/* Close the pipes here (if we did it above, it might race with pattach) */
245 		close(from_child[1]);
246 		close(from_child[0]);
247 
248 		/* Collect the status of the child */
249 		waitpid(pid, NULL, 0);
250 
251 		return ret;
252 	}
253 }
254 #elif defined(HAVE_SYS_PROCCTL_H)
fr_get_debug_state(void)255 static int fr_get_debug_state(void)
256 {
257 	int status;
258 
259 	if (procctl(P_PID, getpid(), PROC_TRACE_STATUS, &status) == -1) {
260 		fr_strerror_printf("Cannot get dumpable flag: procctl(PROC_TRACE_STATUS) failed: %s", fr_syserror(errno));
261 		return DEBUG_STATE_UNKNOWN;
262 	}
263 
264 	/*
265 	 *	As FreeBSD docs say about "PROC_TRACE_STATUS":
266 	 *
267 	 *	Returns the current tracing status for the specified process in the
268 	 *	integer variable pointed to by data.  If tracing is disabled, data
269 	 *	is set to -1.  If tracing is enabled, but no debugger is attached by
270 	 *	the ptrace(2) syscall, data is set to 0.  If a debugger is attached,
271 	 *	data is set to the pid of the debugger process.
272 	 */
273 	if (status <= 0) return DEBUG_STATE_NOT_ATTACHED;
274 
275 	return DEBUG_STATE_ATTACHED;
276 }
277 #else
fr_get_debug_state(void)278 static int fr_get_debug_state(void)
279 {
280 	fr_strerror_printf("PTRACE not available");
281 
282 	return DEBUG_STATE_UNKNOWN_NO_PTRACE;
283 }
284 #endif
285 
286 /** Should be run before using setuid or setgid to get useful results
287  *
288  * @note sets the fr_debug_state global.
289  */
fr_store_debug_state(void)290 void fr_store_debug_state(void)
291 {
292 	fr_debug_state = fr_get_debug_state();
293 
294 #ifndef NDEBUG
295 	/*
296 	 *  There are many reasons why this might happen with
297 	 *  a vanilla install, so we don't want to spam users
298 	 *  with messages they won't understand and may not
299 	 *  want to resolve.
300 	 */
301 	if (fr_debug_state < 0) fprintf(stderr, "Getting debug state failed: %s\n", fr_strerror());
302 #endif
303 }
304 
305 /** Return current value of debug_state
306  *
307  * @param state to translate into a humanly readable value.
308  * @return humanly readable version of debug state.
309  */
fr_debug_state_to_msg(fr_debug_state_t state)310 char const *fr_debug_state_to_msg(fr_debug_state_t state)
311 {
312 	switch (state) {
313 	case DEBUG_STATE_UNKNOWN_NO_PTRACE:
314 		return "Debug state unknown (ptrace functionality not available)";
315 
316 	case DEBUG_STATE_UNKNOWN_NO_PTRACE_CAP:
317 		return "Debug state unknown (cap_sys_ptrace capability not set)";
318 
319 	case DEBUG_STATE_UNKNOWN:
320 		return "Debug state unknown";
321 
322 	case DEBUG_STATE_ATTACHED:
323 		return "Found debugger attached";
324 
325 	case DEBUG_STATE_NOT_ATTACHED:
326 		return "Debugger not attached";
327 	}
328 
329 	return "<INVALID>";
330 }
331 
332 /** Break in debugger (if were running under a debugger)
333  *
334  * If the server is running under a debugger this will raise a
335  * SIGTRAP which will pause the running process.
336  *
337  * If the server is not running under debugger then this will do nothing.
338  */
fr_debug_break(bool always)339 void fr_debug_break(bool always)
340 {
341 	if (always) raise(SIGTRAP);
342 
343 	if (fr_debug_state < 0) fr_debug_state = fr_get_debug_state();
344 	if (fr_debug_state == DEBUG_STATE_ATTACHED) {
345 		fprintf(stderr, "Debugger detected, raising SIGTRAP\n");
346 		fflush(stderr);
347 
348 		raise(SIGTRAP);
349 	}
350 }
351 
352 #ifdef HAVE_EXECINFO
353 /** Print backtrace entry for a given object
354  *
355  * @param cbuff to search in.
356  * @param obj pointer to original object
357  */
backtrace_print(fr_cbuff_t * cbuff,void * obj)358 void backtrace_print(fr_cbuff_t *cbuff, void *obj)
359 {
360 	fr_bt_info_t *p;
361 	bool found = false;
362 
363 	while ((p = fr_cbuff_rp_next(cbuff, NULL))) {
364 		if ((p->obj == obj) || !obj) {
365 			found = true;
366 
367 			fprintf(stderr, "Stacktrace for: %p\n", p->obj);
368 			backtrace_symbols_fd(p->frames, p->count, STDERR_FILENO);
369 		}
370 	};
371 
372 	if (!found) {
373 		fprintf(stderr, "No backtrace available for %p", obj);
374 	}
375 }
376 
377 /** Generate a backtrace for an object
378  *
379  * If this is the first entry being inserted
380  */
fr_backtrace_do(fr_bt_marker_t * marker)381 int fr_backtrace_do(fr_bt_marker_t *marker)
382 {
383 	fr_bt_info_t *bt;
384 
385 	if (!fr_assert(marker->obj) || !fr_assert(marker->cbuff)) return -1;
386 
387 	bt = talloc_zero(NULL, fr_bt_info_t);
388 	if (!bt) return -1;
389 
390 	bt->obj = marker->obj;
391 	bt->count = backtrace(bt->frames, MAX_BT_FRAMES);
392 
393 	fr_cbuff_rp_insert(marker->cbuff, bt);
394 
395 	return 0;
396 }
397 
398 /** Inserts a backtrace marker into the provided context
399  *
400  * Allows for maximum laziness and will initialise a circular buffer if one has not already been created.
401  *
402  * Code augmentation should look something like:
403 @verbatim
404 	// Create a static cbuffer pointer, the first call to backtrace_attach will initialise it
405 	static fr_cbuff_t *my_obj_bt;
406 
407 	my_obj_t *alloc_my_obj(TALLOC_CTX *ctx) {
408 		my_obj_t *this;
409 
410 		this = talloc(ctx, my_obj_t);
411 
412 		// Attach backtrace marker to object
413 		backtrace_attach(&my_obj_bt, this);
414 
415 		return this;
416 	}
417 @endverbatim
418  *
419  * Then, later when a double free occurs:
420 @verbatim
421 	(gdb) call backtrace_print(&my_obj_bt, <pointer to double freed memory>)
422 @endverbatim
423  *
424  * which should print a limited backtrace to stderr. Note, this backtrace will not include any argument
425  * values, but should at least show the code path taken.
426  *
427  * @param cbuff this should be a pointer to a static *fr_cbuff.
428  * @param obj we want to generate a backtrace for.
429  */
fr_backtrace_attach(fr_cbuff_t ** cbuff,TALLOC_CTX * obj)430 fr_bt_marker_t *fr_backtrace_attach(fr_cbuff_t **cbuff, TALLOC_CTX *obj)
431 {
432 	fr_bt_marker_t *marker;
433 
434 	if (*cbuff == NULL) {
435 		PTHREAD_MUTEX_LOCK(&fr_debug_init);
436 		/* Check again now we hold the mutex - eww*/
437 		if (*cbuff == NULL) *cbuff = fr_cbuff_alloc(NULL, MAX_BT_CBUFF, true);
438 		PTHREAD_MUTEX_UNLOCK(&fr_debug_init);
439 	}
440 
441 	marker = talloc(obj, fr_bt_marker_t);
442 	if (!marker) {
443 		return NULL;
444 	}
445 
446 	marker->obj = (void *) obj;
447 	marker->cbuff = *cbuff;
448 
449 	fprintf(stderr, "Backtrace attached to %s %p\n", talloc_get_name(obj), obj);
450 	/*
451 	 *	Generate the backtrace for memory allocation
452 	 */
453 	fr_backtrace_do(marker);
454 	talloc_set_destructor(marker, fr_backtrace_do);
455 
456 	return marker;
457 }
458 #else
backtrace_print(UNUSED fr_cbuff_t * cbuff,UNUSED void * obj)459 void backtrace_print(UNUSED fr_cbuff_t *cbuff, UNUSED void *obj)
460 {
461 	fprintf(stderr, "Server built without fr_backtrace_* support, requires execinfo.h and possibly -lexecinfo\n");
462 }
fr_backtrace_attach(UNUSED fr_cbuff_t ** cbuff,UNUSED TALLOC_CTX * obj)463 fr_bt_marker_t *fr_backtrace_attach(UNUSED fr_cbuff_t **cbuff, UNUSED TALLOC_CTX *obj)
464 {
465 	fprintf(stderr, "Server built without fr_backtrace_* support, requires execinfo.h and possibly -lexecinfo\n");
466 	abort();
467 }
468 #endif /* ifdef HAVE_EXECINFO */
469 
_panic_on_free(UNUSED char * foo)470 static int _panic_on_free(UNUSED char *foo)
471 {
472 	fr_fault(SIGABRT);
473 	return -1;	/* this should make the free fail */
474 }
475 
476 /** Insert memory into the context of another talloc memory chunk which
477  * causes a panic when freed.
478  *
479  * @param ctx TALLOC_CTX to monitor for frees.
480  */
fr_panic_on_free(TALLOC_CTX * ctx)481 void fr_panic_on_free(TALLOC_CTX *ctx)
482 {
483 	char *ptr;
484 
485 	ptr = talloc(ctx, char);
486 	talloc_set_destructor(ptr, _panic_on_free);
487 }
488 
489 /** Set the dumpable flag, also controls whether processes can PATTACH
490  *
491  * @param dumpable whether we should allow core dumping
492  */
493 #if defined(HAVE_SYS_PRCTL_H) && defined(PR_SET_DUMPABLE)
fr_set_dumpable_flag(bool dumpable)494 static int fr_set_dumpable_flag(bool dumpable)
495 {
496 	if (prctl(PR_SET_DUMPABLE, dumpable ? 1 : 0) < 0) {
497 		fr_strerror_printf("Cannot re-enable core dumps: prctl(PR_SET_DUMPABLE) failed: %s",
498 				   fr_syserror(errno));
499 		return -1;
500 	}
501 
502 	return 0;
503 }
504 #elif defined(HAVE_SYS_PROCCTL_H) && defined(PROC_TRACE_CTL_ENABLE)
fr_set_dumpable_flag(bool dumpable)505 static int fr_set_dumpable_flag(bool dumpable)
506 {
507 	int mode = dumpable ? PROC_TRACE_CTL_ENABLE : PROC_TRACE_CTL_DISABLE;
508 
509 	if (procctl(P_PID, getpid(), PROC_TRACE_CTL, &mode) == -1) {
510 		fr_strerror_printf("Cannot re-enable core dumps: procctl(PROC_TRACE_CTL) failed: %s",
511 				   fr_syserror(errno));
512 		return -1;
513 	}
514 
515 	return 0;
516 }
517 #else
fr_set_dumpable_flag(UNUSED bool dumpable)518 static int fr_set_dumpable_flag(UNUSED bool dumpable)
519 {
520 	fr_strerror_printf("Changing value of PR_DUMPABLE not supported on this system");
521 	return -2;
522 }
523 #endif
524 
525 /** Get the processes dumpable flag
526  *
527  */
528 #if defined(HAVE_SYS_PRCTL_H) && defined(PR_GET_DUMPABLE)
fr_get_dumpable_flag(void)529 static int fr_get_dumpable_flag(void)
530 {
531 	int ret;
532 
533 	ret = prctl(PR_GET_DUMPABLE);
534 	if (ret < 0) {
535 		fr_strerror_printf("Cannot get dumpable flag: %s", fr_syserror(errno));
536 		return -1;
537 	}
538 
539 	/*
540 	 *  Linux is crazy and prctl sometimes returns 2 for disabled
541 	 */
542 	if (ret != 1) return 0;
543 	return 1;
544 }
545 #elif defined(HAVE_SYS_PROCCTL_H) && defined(PROC_TRACE_CTL)
fr_get_dumpable_flag(void)546 static int fr_get_dumpable_flag(void)
547 {
548 	int status;
549 
550 	if (procctl(P_PID, getpid(), PROC_TRACE_CTL, &status) == -1) {
551 		fr_strerror_printf("Cannot get dumpable flag: procctl(PROC_TRACE_CTL) failed: %s", fr_syserror(errno));
552 		return -1;
553 	}
554 
555 	/*
556 	 *	There are a few different kinds of disabled, but only
557 	 *	one ENABLE.
558 	 */
559 	if (status != PROC_TRACE_CTL_ENABLE) return 0;
560 
561 	return 1;
562 }
563 #else
fr_get_dumpable_flag(void)564 static int fr_get_dumpable_flag(void)
565 {
566 	fr_strerror_printf("Getting value of PR_DUMPABLE not supported on this system");
567 	return -2;
568 }
569 #endif
570 
571 
572 /** Get the current maximum for core files
573  *
574  * Do this before anything else so as to ensure it's properly initialized.
575  */
fr_set_dumpable_init(void)576 int fr_set_dumpable_init(void)
577 {
578 #ifdef HAVE_SYS_RESOURCE_H
579 	if (getrlimit(RLIMIT_CORE, &core_limits) < 0) {
580 		fr_strerror_printf("Failed to get current core limit:  %s", fr_syserror(errno));
581 		return -1;
582 	}
583 #endif
584 	return 0;
585 }
586 
587 /** Enable or disable core dumps
588  *
589  * @param allow_core_dumps whether to enable or disable core dumps.
590  */
fr_set_dumpable(bool allow_core_dumps)591 int fr_set_dumpable(bool allow_core_dumps)
592 {
593 	dump_core = allow_core_dumps;
594 	/*
595 	 *	If configured, turn core dumps off.
596 	 */
597 	if (!allow_core_dumps) {
598 #ifdef HAVE_SYS_RESOURCE_H
599 		struct rlimit no_core;
600 
601 		no_core.rlim_cur = 0;
602 		no_core.rlim_max = core_limits.rlim_max;
603 
604 		if (setrlimit(RLIMIT_CORE, &no_core) < 0) {
605 			fr_strerror_printf("Failed disabling core dumps: %s", fr_syserror(errno));
606 
607 			return -1;
608 		}
609 #endif
610 		return 0;
611 	}
612 
613 	if (fr_set_dumpable_flag(true) < 0) return -1;
614 
615 	/*
616 	 *	Reset the core dump limits to their original value.
617 	 */
618 #ifdef HAVE_SYS_RESOURCE_H
619 	if (setrlimit(RLIMIT_CORE, &core_limits) < 0) {
620 		fr_strerror_printf("Cannot update core dump limit: %s", fr_syserror(errno));
621 
622 		return -1;
623 	}
624 #endif
625 	return 0;
626 }
627 
628 /** Reset dumpable state to previously configured value
629  *
630  * Needed after suid up/down
631  *
632  * @return 0 on success, else -1 on failure.
633  */
fr_reset_dumpable(void)634 int fr_reset_dumpable(void)
635 {
636 	return fr_set_dumpable(dump_core);
637 }
638 
639 /** Check to see if panic_action file is world writeable
640  *
641  * @return 0 if file is OK, else -1.
642  */
fr_fault_check_permissions(void)643 static int fr_fault_check_permissions(void)
644 {
645 	char const *p, *q;
646 	size_t len;
647 	char filename[256];
648 	struct stat statbuf;
649 
650 	/*
651 	 *	Try and guess which part of the command is the binary, and check to see if
652 	 *	it's world writeable, to try and save the admin from their own stupidity.
653 	 *
654 	 *	@fixme we should do this properly and take into account single and double
655 	 *	quotes.
656 	 */
657 	if ((q = strchr(panic_action, ' '))) {
658 		/*
659 		 *	need to use a static buffer, because mallocing memory in a signal handler
660 		 *	is a bad idea and can result in deadlock.
661 		 */
662 		len = snprintf(filename, sizeof(filename), "%.*s", (int)(q - panic_action), panic_action);
663 		if (is_truncated(len, sizeof(filename))) {
664 			fr_strerror_printf("Failed writing panic_action to temporary buffer (truncated)");
665 			return -1;
666 		}
667 		p = filename;
668 	} else {
669 		p = panic_action;
670 	}
671 
672 	if (stat(p, &statbuf) == 0) {
673 #ifdef S_IWOTH
674 		if ((statbuf.st_mode & S_IWOTH) != 0) {
675 			fr_strerror_printf("panic_action file \"%s\" is globally writable", p);
676 			return -1;
677 		}
678 #endif
679 	}
680 
681 	return 0;
682 }
683 
684 /** Prints a simple backtrace (if execinfo is available) and calls panic_action if set.
685  *
686  * @param sig caught
687  */
fr_fault(int sig)688 NEVER_RETURNS void fr_fault(int sig)
689 {
690 	char cmd[sizeof(panic_action) + 20];
691 	char *out = cmd;
692 	size_t left = sizeof(cmd), ret;
693 
694 	char const *p = panic_action;
695 	char const *q;
696 
697 	int code;
698 
699 	/*
700 	 *	If a debugger is attached, we don't want to run the panic action,
701 	 *	as it may interfere with the operation of the debugger.
702 	 *	If something calls us directly we just raise the signal and let
703 	 *	the debugger handle it how it wants.
704 	 */
705 	if (fr_debug_state == DEBUG_STATE_ATTACHED) {
706 		FR_FAULT_LOG("RAISING SIGNAL: %s", strsignal(sig));
707 		raise(sig);
708 		goto finish;
709 	}
710 
711 	/*
712 	 *	Makes the backtraces slightly cleaner
713 	 */
714 	memset(cmd, 0, sizeof(cmd));
715 
716 	FR_FAULT_LOG("CAUGHT SIGNAL: %s", strsignal(sig));
717 
718 	/*
719 	 *	Check for administrator sanity.
720 	 */
721 	if (fr_fault_check_permissions() < 0) {
722 		FR_FAULT_LOG("Refusing to execute panic action: %s", fr_strerror());
723 		goto finish;
724 	}
725 
726 	/*
727 	 *	Run the callback if one was registered
728 	 */
729 	if (panic_cb && (panic_cb(sig) < 0)) goto finish;
730 
731 	/*
732 	 *	Produce a simple backtrace - They're very basic but at least give us an
733 	 *	idea of the area of the code we hit the issue in.
734 	 *
735 	 *	See below in fr_fault_setup() and
736 	 *	https://sourceware.org/bugzilla/show_bug.cgi?id=16159
737 	 *	for why we only print backtraces in debug builds if we're using GLIBC.
738 	 */
739 #if defined(HAVE_EXECINFO) && (!defined(NDEBUG) || !defined(__GNUC__))
740 	if (fr_fault_log_fd >= 0) {
741 		size_t frame_count;
742 		void *stack[MAX_BT_FRAMES];
743 
744 		frame_count = backtrace(stack, MAX_BT_FRAMES);
745 
746 		FR_FAULT_LOG("Backtrace of last %zu frames:", frame_count);
747 
748 		backtrace_symbols_fd(stack, frame_count, fr_fault_log_fd);
749 	}
750 #endif
751 
752 	/* No panic action set... */
753 	if (panic_action[0] == '\0') {
754 		FR_FAULT_LOG("No panic action set");
755 		goto finish;
756 	}
757 
758 	/* Substitute %p for the current PID (useful for attaching a debugger) */
759 	while ((q = strstr(p, "%p"))) {
760 		out += ret = snprintf(out, left, "%.*s%d", (int) (q - p), p, (int) getpid());
761 		if (left <= ret) {
762 		oob:
763 			FR_FAULT_LOG("Panic action too long");
764 			fr_exit_now(1);
765 		}
766 		left -= ret;
767 		p = q + 2;
768 	}
769 	if (strlen(p) >= left) goto oob;
770 	strlcpy(out, p, left);
771 
772 	{
773 		bool disable = false;
774 
775 		FR_FAULT_LOG("Calling: %s", cmd);
776 
777 		/*
778 		 *	Here we temporarily enable the dumpable flag so if GBD or LLDB
779 		 *	is called in the panic_action, they can pattach to the running
780 		 *	process.
781 		 */
782 		if (fr_get_dumpable_flag() == 0) {
783 			if ((fr_set_dumpable_flag(true) < 0) || !fr_get_dumpable_flag()) {
784 				FR_FAULT_LOG("Failed setting dumpable flag, pattach may not work: %s", fr_strerror());
785 			} else {
786 				disable = true;
787 			}
788 			FR_FAULT_LOG("Temporarily setting PR_DUMPABLE to 1");
789 		}
790 
791 		code = system(cmd);
792 
793 		/*
794 		 *	We only want to error out here, if dumpable was originally disabled
795 		 *	and we managed to change the value to enabled, but failed
796 		 *	setting it back to disabled.
797 		 */
798 		if (disable) {
799 			FR_FAULT_LOG("Resetting PR_DUMPABLE to 0");
800 			if (fr_set_dumpable_flag(false) < 0) {
801 				FR_FAULT_LOG("Failed resetting dumpable flag to off: %s", fr_strerror());
802 				FR_FAULT_LOG("Exiting due to insecure process state");
803 				fr_exit_now(1);
804 			}
805 		}
806 
807 		FR_FAULT_LOG("Panic action exited with %i", code);
808 
809 		fr_exit_now(code);
810 	}
811 
812 
813 finish:
814 	/*
815 	 *	(Re-)Raise the signal, so that if we're running under
816 	 *	a debugger, the debugger can break when it receives
817 	 *	the signal.
818 	 */
819 	fr_unset_signal(sig);	/* Make sure we don't get into a loop */
820 
821 	raise(sig);
822 
823 	fr_exit_now(1);		/* Function marked as noreturn */
824 }
825 
826 /** Callback executed on fatal talloc error
827  *
828  * This is the simple version which mostly behaves the same way as the default
829  * one, and will not call panic_action.
830  *
831  * @param reason string provided by talloc.
832  */
833 static void _fr_talloc_fault_simple(char const *reason) CC_HINT(noreturn);
_fr_talloc_fault_simple(char const * reason)834 static void _fr_talloc_fault_simple(char const *reason)
835 {
836 	FR_FAULT_LOG("talloc abort: %s\n", reason);
837 
838 #if defined(HAVE_EXECINFO) && (!defined(NDEBUG) || !defined(__GNUC__))
839 	if (fr_fault_log_fd >= 0) {
840 		size_t frame_count;
841 		void *stack[MAX_BT_FRAMES];
842 
843 		frame_count = backtrace(stack, MAX_BT_FRAMES);
844 		FR_FAULT_LOG("Backtrace of last %zu frames:", frame_count);
845 		backtrace_symbols_fd(stack, frame_count, fr_fault_log_fd);
846 	}
847 #endif
848 	abort();
849 }
850 
851 /** Callback executed on fatal talloc error
852  *
853  * Translates a talloc abort into a fr_fault call.
854  * Mostly to work around issues with some debuggers not being able to
855  * attach after a SIGABRT has been raised.
856  *
857  * @param reason string provided by talloc.
858  */
859 static void _fr_talloc_fault(char const *reason) CC_HINT(noreturn);
_fr_talloc_fault(char const * reason)860 static void _fr_talloc_fault(char const *reason)
861 {
862 	FR_FAULT_LOG("talloc abort: %s", reason);
863 #ifdef SIGABRT
864 	fr_fault(SIGABRT);
865 #endif
866 	fr_exit_now(1);
867 }
868 
869 /** Wrapper to pass talloc log output to our fr_fault_log function
870  *
871  */
_fr_talloc_log(char const * msg)872 static void _fr_talloc_log(char const *msg)
873 {
874 	fr_fault_log("%s\n", msg);
875 }
876 
877 /** Generate a talloc memory report for a context and print to stderr/stdout
878  *
879  * @param ctx to generate a report for, may be NULL in which case the root context is used.
880  */
fr_log_talloc_report(TALLOC_CTX * ctx)881 int fr_log_talloc_report(TALLOC_CTX *ctx)
882 {
883 #define TALLOC_REPORT_MAX_DEPTH 20
884 
885 	FILE *log;
886 	int fd;
887 
888 	fd = dup(fr_fault_log_fd);
889 	if (fd < 0) {
890 		fr_strerror_printf("Couldn't write memory report, failed to dup log fd: %s", fr_syserror(errno));
891 		return -1;
892 	}
893 	log = fdopen(fd, "w");
894 	if (!log) {
895 		close(fd);
896 		fr_strerror_printf("Couldn't write memory report, fdopen failed: %s", fr_syserror(errno));
897 		return -1;
898 	}
899 
900 	if (!ctx) {
901 		fprintf(log, "Current state of talloced memory:\n");
902 		talloc_report_full(talloc_null_ctx, log);
903 	} else {
904 		int i;
905 
906 		fprintf(log, "Talloc chunk lineage:\n");
907 		fprintf(log, "%p (%s)", ctx, talloc_get_name(ctx));
908 
909 		i = 0;
910 		while ((i < TALLOC_REPORT_MAX_DEPTH) && (ctx = talloc_parent(ctx))) {
911 			fprintf(log, " < %p (%s)", ctx, talloc_get_name(ctx));
912 			i++;
913 		}
914 		fprintf(log, "\n");
915 
916 		i = 0;
917 		do {
918 			fprintf(log, "Talloc context level %i:\n", i++);
919 			talloc_report_full(ctx, log);
920 		} while ((ctx = talloc_parent(ctx)) &&
921 			 (i < TALLOC_REPORT_MAX_DEPTH) &&
922 			 (talloc_parent(ctx) != talloc_autofree_ctx) &&	/* Stop before we hit the autofree ctx */
923 			 (talloc_parent(ctx) != talloc_null_ctx));  	/* Stop before we hit NULL ctx */
924 	}
925 
926 	fclose(log);
927 
928 	return 0;
929 }
930 
931 
_fr_disable_null_tracking(UNUSED bool * p)932 static int _fr_disable_null_tracking(UNUSED bool *p)
933 {
934 	talloc_disable_null_tracking();
935 	return 0;
936 }
937 
938 /** Register talloc fault handlers
939  *
940  * Just register the fault handlers we need to make talloc
941  * produce useful debugging output.
942  */
fr_talloc_fault_setup(void)943 void fr_talloc_fault_setup(void)
944 {
945 	talloc_set_log_fn(_fr_talloc_log);
946 	talloc_set_abort_fn(_fr_talloc_fault_simple);
947 }
948 
949 /** Registers signal handlers to execute panic_action on fatal signal
950  *
951  * May be called multiple time to change the panic_action/program.
952  *
953  * @param cmd to execute on fault. If present %p will be substituted
954  *        for the parent PID before the command is executed, and %e
955  *        will be substituted for the currently running program.
956  * @param program Name of program currently executing (argv[0]).
957  * @return 0 on success -1 on failure.
958  */
fr_fault_setup(char const * cmd,char const * program)959 int fr_fault_setup(char const *cmd, char const *program)
960 {
961 	static bool setup = false;
962 
963 	char *out = panic_action;
964 	size_t left = sizeof(panic_action);
965 
966 	char const *p = cmd;
967 	char const *q;
968 
969 	if (cmd) {
970 		size_t ret;
971 
972 		/* Substitute %e for the current program */
973 		while ((q = strstr(p, "%e"))) {
974 			out += ret = snprintf(out, left, "%.*s%s", (int) (q - p), p, program ? program : "");
975 			if (left <= ret) {
976 			oob:
977 				fr_strerror_printf("Panic action too long");
978 				return -1;
979 			}
980 			left -= ret;
981 			p = q + 2;
982 		}
983 		if (strlen(p) >= left) goto oob;
984 		strlcpy(out, p, left);
985 	} else {
986 		*panic_action = '\0';
987 	}
988 
989 	/*
990 	 *	Check for administrator sanity.
991 	 */
992 	if (fr_fault_check_permissions() < 0) return -1;
993 
994 	/* Unsure what the side effects of changing the signal handler mid execution might be */
995 	if (!setup) {
996 		char *env;
997 		fr_debug_state_t debug_state;
998 
999 		/*
1000 		 *  Installing signal handlers interferes with some debugging
1001 		 *  operations.  Give the developer control over whether the
1002 		 *  signal handlers are installed or not.
1003 		 */
1004 		env = getenv("DEBUG");
1005 		if (!env || (strcmp(env, "no") == 0)) {
1006 			debug_state = DEBUG_STATE_NOT_ATTACHED;
1007 		} else if (!strcmp(env, "auto") || !strcmp(env, "yes")) {
1008 			/*
1009 			 *  Figure out if we were started under a debugger
1010 			 */
1011 			if (fr_debug_state < 0) fr_debug_state = fr_get_debug_state();
1012 			debug_state = fr_debug_state;
1013 		} else {
1014 			debug_state = DEBUG_STATE_ATTACHED;
1015 		}
1016 
1017 		talloc_set_log_fn(_fr_talloc_log);
1018 
1019 		/*
1020 		 *  These signals can't be properly dealt with in the debugger
1021 		 *  if we set our own signal handlers.
1022 		 */
1023 		switch (debug_state) {
1024 		default:
1025 #ifndef NDEBUG
1026 			FR_FAULT_LOG("Debugger check failed: %s", fr_strerror());
1027 			FR_FAULT_LOG("Signal processing in debuggers may not work as expected");
1028 #endif
1029 			/* FALL-THROUGH */
1030 
1031 		case DEBUG_STATE_NOT_ATTACHED:
1032 #ifdef SIGABRT
1033 			if (fr_set_signal(SIGABRT, fr_fault) < 0) return -1;
1034 
1035 			/*
1036 			 *  Use this instead of abort so we get a
1037 			 *  full backtrace with broken versions of LLDB
1038 			 */
1039 			talloc_set_abort_fn(_fr_talloc_fault);
1040 #endif
1041 #ifdef SIGILL
1042 			if (fr_set_signal(SIGILL, fr_fault) < 0) return -1;
1043 #endif
1044 #ifdef SIGFPE
1045 			if (fr_set_signal(SIGFPE, fr_fault) < 0) return -1;
1046 #endif
1047 #ifdef SIGSEGV
1048 			if (fr_set_signal(SIGSEGV, fr_fault) < 0) return -1;
1049 #endif
1050 			break;
1051 
1052 		case DEBUG_STATE_ATTACHED:
1053 			break;
1054 		}
1055 
1056 		/*
1057 		 *  Needed for memory reports
1058 		 */
1059 		{
1060 			TALLOC_CTX *tmp;
1061 			bool *marker;
1062 
1063 			tmp = talloc(NULL, bool);
1064 			talloc_null_ctx = talloc_parent(tmp);
1065 			talloc_free(tmp);
1066 
1067 			/*
1068 			 *  Disable null tracking on exit, else valgrind complains
1069 			 */
1070 			talloc_autofree_ctx = talloc_autofree_context();
1071 			marker = talloc(talloc_autofree_ctx, bool);
1072 			talloc_set_destructor(marker, _fr_disable_null_tracking);
1073 		}
1074 
1075 #if defined(HAVE_MALLOPT) && !defined(NDEBUG)
1076 		/*
1077 		 *  If were using glibc malloc > 2.4 this scribbles over
1078 		 *  uninitialised and freed memory, to make memory issues easier
1079 		 *  to track down.
1080 		 */
1081 		if (!getenv("TALLOC_FREE_FILL")) mallopt(M_PERTURB, 0x42);
1082 		mallopt(M_CHECK_ACTION, 3);
1083 #endif
1084 
1085 #if defined(HAVE_EXECINFO) && defined(__GNUC__) && !defined(NDEBUG)
1086 	       /*
1087 		*  We need to pre-load lgcc_s, else we can get into a deadlock
1088 		*  in fr_fault, as backtrace() attempts to dlopen it.
1089 		*
1090 		*  Apparently there's a performance impact of loading lgcc_s,
1091 		*  so only do it if this is a debug build.
1092 		*
1093 		*  See: https://sourceware.org/bugzilla/show_bug.cgi?id=16159
1094 		*/
1095 		{
1096 			void *stack[10];
1097 
1098 			backtrace(stack, 10);
1099 		}
1100 #endif
1101 	}
1102 	setup = true;
1103 
1104 	return 0;
1105 }
1106 
1107 /** Set a callback to be called before fr_fault()
1108  *
1109  * @param func to execute. If callback returns < 0
1110  *	fr_fault will exit before running panic_action code.
1111  */
fr_fault_set_cb(fr_fault_cb_t func)1112 void fr_fault_set_cb(fr_fault_cb_t func)
1113 {
1114 	panic_cb = func;
1115 }
1116 
1117 /** Log output to the fr_fault_log_fd
1118  *
1119  * We used to support a user defined callback, which was set to a radlog
1120  * function. Unfortunately, when logging to syslog, syslog would malloc memory
1121  * which would result in a deadlock if fr_fault was triggered from within
1122  * a malloc call.
1123  *
1124  * Now we just write directly to the FD.
1125  */
fr_fault_log(char const * msg,...)1126 void fr_fault_log(char const *msg, ...)
1127 {
1128 	va_list ap;
1129 
1130 	if (fr_fault_log_fd < 0) return;
1131 
1132 	va_start(ap, msg);
1133 	vdprintf(fr_fault_log_fd, msg, ap);
1134 	va_end(ap);
1135 }
1136 
1137 /** Set a file descriptor to log memory reports to.
1138  *
1139  * @param fd to write output to.
1140  */
fr_fault_set_log_fd(int fd)1141 void fr_fault_set_log_fd(int fd)
1142 {
1143 	fr_fault_log_fd = fd;
1144 }
1145 
1146 /** A soft assertion which triggers the fault handler in debug builds
1147  *
1148  * @param file the assertion failed in.
1149  * @param line of the assertion in the file.
1150  * @param expr that was evaluated.
1151  * @param cond Result of evaluating the expression.
1152  * @return the value of cond.
1153  */
fr_assert_cond(char const * file,int line,char const * expr,bool cond)1154 bool fr_assert_cond(char const *file, int line, char const *expr, bool cond)
1155 {
1156 	if (!cond) {
1157 		FR_FAULT_LOG("SOFT ASSERT FAILED %s[%u]: %s", file, line, expr);
1158 #if !defined(NDEBUG)
1159 		fr_fault(SIGABRT);
1160 #endif
1161 		return false;
1162 	}
1163 
1164 	return cond;
1165 }
1166 
1167 /** Exit possibly printing a message about why we're exiting.
1168  *
1169  * @note Use the fr_exit(status) macro instead of calling this function directly.
1170  *
1171  * @param file where fr_exit() was called.
1172  * @param line where fr_exit() was called.
1173  * @param status we're exiting with.
1174  */
_fr_exit(char const * file,int line,int status)1175 void NEVER_RETURNS _fr_exit(char const *file, int line, int status)
1176 {
1177 #ifndef NDEBUG
1178 	char const *error = fr_strerror();
1179 
1180 	if (error && (status != 0)) {
1181 		FR_FAULT_LOG("EXIT(%i) CALLED %s[%u].  Last error was: %s", status, file, line, error);
1182 	} else {
1183 		FR_FAULT_LOG("EXIT(%i) CALLED %s[%u]", status, file, line);
1184 	}
1185 #endif
1186 	fr_debug_break(false);	/* If running under GDB we'll break here */
1187 
1188 	exit(status);
1189 }
1190 
1191 /** Exit possibly printing a message about why we're exiting.
1192  *
1193  * @note Use the fr_exit_now(status) macro instead of calling this function directly.
1194  *
1195  * @param file where fr_exit_now() was called.
1196  * @param line where fr_exit_now() was called.
1197  * @param status we're exiting with.
1198  */
_fr_exit_now(char const * file,int line,int status)1199 void NEVER_RETURNS _fr_exit_now(char const *file, int line, int status)
1200 {
1201 #ifndef NDEBUG
1202 	char const *error = fr_strerror();
1203 
1204 	if (error && (status != 0)) {
1205 		FR_FAULT_LOG("_EXIT(%i) CALLED %s[%u].  Last error was: %s", status, file, line, error);
1206 	} else {
1207 		FR_FAULT_LOG("_EXIT(%i) CALLED %s[%u]", status, file, line);
1208 	}
1209 #endif
1210 	fr_debug_break(false);	/* If running under GDB we'll break here */
1211 
1212 	_exit(status);
1213 }
1214