1 /*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
15 */
16
17 /**
18 * @file debug.c
19 * @brief Various functions to aid in debugging
20 *
21 * @copyright 2013 The FreeRADIUS server project
22 * @copyright 2013 Arran Cudbard-Bell <a.cudbardb@freeradius.org>
23 */
24 #include <assert.h>
25 #include <freeradius-devel/libradius.h>
26 #include <sys/stat.h>
27 #include <sys/wait.h>
28
29 #if defined(HAVE_MALLOPT) && defined(HAVE_MALLOC_H)
30 # include <malloc.h>
31 #endif
32
33 /*
34 * runtime backtrace functions are not POSIX but are included in
35 * glibc, OSX >= 10.5 and various BSDs
36 */
37 #ifdef HAVE_EXECINFO
38 # include <execinfo.h>
39 #endif
40
41 #ifdef HAVE_SYS_PRCTL_H
42 # include <sys/prctl.h>
43 #endif
44
45 #ifdef HAVE_SYS_PROCCTL_H
46 # include <sys/procctl.h>
47 #endif
48
49 #ifdef HAVE_SYS_PTRACE_H
50 # include <sys/ptrace.h>
51 # if !defined(PT_ATTACH) && defined(PTRACE_ATTACH)
52 # define PT_ATTACH PTRACE_ATTACH
53 # endif
54 # if !defined(PT_DETACH) && defined(PTRACE_DETACH)
55 # define PT_DETACH PTRACE_DETACH
56 # endif
57 #endif
58
59 #ifdef HAVE_SYS_RESOURCE_H
60 # include <sys/resource.h>
61 #endif
62
63 #ifdef HAVE_PTHREAD_H
64 # define PTHREAD_MUTEX_LOCK pthread_mutex_lock
65 # define PTHREAD_MUTEX_UNLOCK pthread_mutex_unlock
66 #else
67 # define PTHREAD_MUTEX_LOCK(_x)
68 # define PTHREAD_MUTEX_UNLOCK(_x)
69 #endif
70
71 #ifdef HAVE_EXECINFO
72 # ifndef MAX_BT_FRAMES
73 # define MAX_BT_FRAMES 128
74 # endif
75 # ifndef MAX_BT_CBUFF
76 # define MAX_BT_CBUFF 1048576 //!< Should be a power of 2
77 # endif
78
79 # ifdef HAVE_PTHREAD_H
80 static pthread_mutex_t fr_debug_init = PTHREAD_MUTEX_INITIALIZER;
81 # endif
82
83 typedef struct fr_bt_info {
84 void *obj; //!< Memory address of the block of allocated memory.
85 void *frames[MAX_BT_FRAMES]; //!< Backtrace frame data
86 int count; //!< Number of frames stored
87 } fr_bt_info_t;
88
89 struct fr_bt_marker {
90 void *obj; //!< Pointer to the parent object, this is our needle
91 //!< when we iterate over the contents of the circular buffer.
92 fr_cbuff_t *cbuff; //!< Where we temporarily store the backtraces
93 };
94 #endif
95
96 static char panic_action[512]; //!< The command to execute when panicking.
97 static fr_fault_cb_t panic_cb = NULL; //!< Callback to execute whilst panicking, before the
98 //!< panic_action.
99
100 static bool dump_core; //!< Whether we should drop a core on fatal signals.
101
102 static int fr_fault_log_fd = STDERR_FILENO; //!< Where to write debug output.
103
104 fr_debug_state_t fr_debug_state = DEBUG_STATE_UNKNOWN; //!< Whether we're attached to by a debugger.
105
106 #ifdef HAVE_SYS_RESOURCE_H
107 static struct rlimit core_limits;
108 #endif
109
110 static TALLOC_CTX *talloc_null_ctx;
111 static TALLOC_CTX *talloc_autofree_ctx;
112
113 /*
114 * On BSD systems, ptrace(PT_DETACH) uses a third argument for
115 * resume address, with the magic value (void *)1 to resume where
116 * process stopped. Specifying NULL there leads to a crash because
117 * process resumes at address 0.
118 */
119 #ifdef HAVE_SYS_PTRACE_H
120 # ifdef __linux__
121 # define _PTRACE(_x, _y) ptrace(_x, _y, NULL, NULL)
122 # define _PTRACE_DETACH(_x) ptrace(PT_DETACH, _x, NULL, NULL)
123 # else
124 # define _PTRACE(_x, _y) ptrace(_x, _y, NULL, 0)
125 # define _PTRACE_DETACH(_x) ptrace(PT_DETACH, _x, (void *)1, 0)
126 # endif
127
128 # ifdef HAVE_CAPABILITY_H
129 # include <sys/capability.h>
130 # endif
131
132 /** Determine if we're running under a debugger by attempting to attach using pattach
133 *
134 * @return 0 if we're not, 1 if we are, -1 if we can't tell because of an error,
135 * -2 if we can't tell because we don't have the CAP_SYS_PTRACE capability.
136 */
fr_get_debug_state(void)137 static int fr_get_debug_state(void)
138 {
139 int pid;
140
141 int from_child[2] = {-1, -1};
142
143 #ifdef HAVE_CAPABILITY_H
144 cap_flag_value_t value;
145 cap_t current;
146
147 /*
148 * If we're running under linux, we first need to check if we have
149 * permission to to ptrace. We do that using the capabilities
150 * functions.
151 */
152 current = cap_get_proc();
153 if (!current) {
154 fr_strerror_printf("Failed getting process capabilities: %s", fr_syserror(errno));
155 return DEBUG_STATE_UNKNOWN;
156 }
157
158 if (cap_get_flag(current, CAP_SYS_PTRACE, CAP_PERMITTED, &value) < 0) {
159 fr_strerror_printf("Failed getting permitted ptrace capability state: %s",
160 fr_syserror(errno));
161 cap_free(current);
162 return DEBUG_STATE_UNKNOWN;
163 }
164
165 if ((value == CAP_SET) && (cap_get_flag(current, CAP_SYS_PTRACE, CAP_EFFECTIVE, &value) < 0)) {
166 fr_strerror_printf("Failed getting effective ptrace capability state: %s",
167 fr_syserror(errno));
168 cap_free(current);
169 return DEBUG_STATE_UNKNOWN;
170 }
171
172 /*
173 * We don't have permission to ptrace, so this test will always fail.
174 */
175 if (value == CAP_CLEAR) {
176 fr_strerror_printf("ptrace capability not set. If debugger detection is required run as root or: "
177 "setcap cap_sys_ptrace+ep <path_to_radiusd>");
178 cap_free(current);
179 return DEBUG_STATE_UNKNOWN_NO_PTRACE_CAP;
180 }
181 cap_free(current);
182 #endif
183
184 if (pipe(from_child) < 0) {
185 fr_strerror_printf("Error opening internal pipe: %s", fr_syserror(errno));
186 return DEBUG_STATE_UNKNOWN;
187 }
188
189 pid = fork();
190 if (pid == -1) {
191 fr_strerror_printf("Error forking: %s", fr_syserror(errno));
192 return DEBUG_STATE_UNKNOWN;
193 }
194
195 /* Child */
196 if (pid == 0) {
197 int8_t ret = DEBUG_STATE_NOT_ATTACHED;
198 int ppid = getppid();
199
200 /* Close parent's side */
201 close(from_child[0]);
202
203 /*
204 * FreeBSD is extremely picky about the order of operations here
205 * we need to attach, wait *then* write whilst the parent is still
206 * suspended, then detach, continuing the process.
207 *
208 * If we don't do it in that order the read in the parent triggers
209 * a SIGKILL.
210 */
211 if (_PTRACE(PT_ATTACH, ppid) == 0) {
212 /* Wait for the parent to stop */
213 waitpid(ppid, NULL, 0);
214
215 /* Tell the parent what happened */
216 if (write(from_child[1], &ret, sizeof(ret)) < 0) {
217 fprintf(stderr, "Writing ptrace status to parent failed: %s", fr_syserror(errno));
218 }
219
220 /* Detach */
221 _PTRACE_DETACH(ppid);
222 exit(0);
223 }
224
225 ret = DEBUG_STATE_ATTACHED;
226 /* Tell the parent what happened */
227 if (write(from_child[1], &ret, sizeof(ret)) < 0) {
228 fprintf(stderr, "Writing ptrace status to parent failed: %s", fr_syserror(errno));
229 }
230
231 exit(0);
232 /* Parent */
233 } else {
234 int8_t ret = DEBUG_STATE_UNKNOWN;
235
236 /*
237 * The child writes errno (reason) if pattach failed else 0.
238 *
239 * This read may be interrupted by pattach,
240 * which is why we need the loop.
241 */
242 while ((read(from_child[0], &ret, sizeof(ret)) < 0) && (errno == EINTR));
243
244 /* Close the pipes here (if we did it above, it might race with pattach) */
245 close(from_child[1]);
246 close(from_child[0]);
247
248 /* Collect the status of the child */
249 waitpid(pid, NULL, 0);
250
251 return ret;
252 }
253 }
254 #elif defined(HAVE_SYS_PROCCTL_H)
fr_get_debug_state(void)255 static int fr_get_debug_state(void)
256 {
257 int status;
258
259 if (procctl(P_PID, getpid(), PROC_TRACE_STATUS, &status) == -1) {
260 fr_strerror_printf("Cannot get dumpable flag: procctl(PROC_TRACE_STATUS) failed: %s", fr_syserror(errno));
261 return DEBUG_STATE_UNKNOWN;
262 }
263
264 /*
265 * As FreeBSD docs say about "PROC_TRACE_STATUS":
266 *
267 * Returns the current tracing status for the specified process in the
268 * integer variable pointed to by data. If tracing is disabled, data
269 * is set to -1. If tracing is enabled, but no debugger is attached by
270 * the ptrace(2) syscall, data is set to 0. If a debugger is attached,
271 * data is set to the pid of the debugger process.
272 */
273 if (status <= 0) return DEBUG_STATE_NOT_ATTACHED;
274
275 return DEBUG_STATE_ATTACHED;
276 }
277 #else
fr_get_debug_state(void)278 static int fr_get_debug_state(void)
279 {
280 fr_strerror_printf("PTRACE not available");
281
282 return DEBUG_STATE_UNKNOWN_NO_PTRACE;
283 }
284 #endif
285
286 /** Should be run before using setuid or setgid to get useful results
287 *
288 * @note sets the fr_debug_state global.
289 */
fr_store_debug_state(void)290 void fr_store_debug_state(void)
291 {
292 fr_debug_state = fr_get_debug_state();
293
294 #ifndef NDEBUG
295 /*
296 * There are many reasons why this might happen with
297 * a vanilla install, so we don't want to spam users
298 * with messages they won't understand and may not
299 * want to resolve.
300 */
301 if (fr_debug_state < 0) fprintf(stderr, "Getting debug state failed: %s\n", fr_strerror());
302 #endif
303 }
304
305 /** Return current value of debug_state
306 *
307 * @param state to translate into a humanly readable value.
308 * @return humanly readable version of debug state.
309 */
fr_debug_state_to_msg(fr_debug_state_t state)310 char const *fr_debug_state_to_msg(fr_debug_state_t state)
311 {
312 switch (state) {
313 case DEBUG_STATE_UNKNOWN_NO_PTRACE:
314 return "Debug state unknown (ptrace functionality not available)";
315
316 case DEBUG_STATE_UNKNOWN_NO_PTRACE_CAP:
317 return "Debug state unknown (cap_sys_ptrace capability not set)";
318
319 case DEBUG_STATE_UNKNOWN:
320 return "Debug state unknown";
321
322 case DEBUG_STATE_ATTACHED:
323 return "Found debugger attached";
324
325 case DEBUG_STATE_NOT_ATTACHED:
326 return "Debugger not attached";
327 }
328
329 return "<INVALID>";
330 }
331
332 /** Break in debugger (if were running under a debugger)
333 *
334 * If the server is running under a debugger this will raise a
335 * SIGTRAP which will pause the running process.
336 *
337 * If the server is not running under debugger then this will do nothing.
338 */
fr_debug_break(bool always)339 void fr_debug_break(bool always)
340 {
341 if (always) raise(SIGTRAP);
342
343 if (fr_debug_state < 0) fr_debug_state = fr_get_debug_state();
344 if (fr_debug_state == DEBUG_STATE_ATTACHED) {
345 fprintf(stderr, "Debugger detected, raising SIGTRAP\n");
346 fflush(stderr);
347
348 raise(SIGTRAP);
349 }
350 }
351
352 #ifdef HAVE_EXECINFO
353 /** Print backtrace entry for a given object
354 *
355 * @param cbuff to search in.
356 * @param obj pointer to original object
357 */
backtrace_print(fr_cbuff_t * cbuff,void * obj)358 void backtrace_print(fr_cbuff_t *cbuff, void *obj)
359 {
360 fr_bt_info_t *p;
361 bool found = false;
362
363 while ((p = fr_cbuff_rp_next(cbuff, NULL))) {
364 if ((p->obj == obj) || !obj) {
365 found = true;
366
367 fprintf(stderr, "Stacktrace for: %p\n", p->obj);
368 backtrace_symbols_fd(p->frames, p->count, STDERR_FILENO);
369 }
370 };
371
372 if (!found) {
373 fprintf(stderr, "No backtrace available for %p", obj);
374 }
375 }
376
377 /** Generate a backtrace for an object
378 *
379 * If this is the first entry being inserted
380 */
fr_backtrace_do(fr_bt_marker_t * marker)381 int fr_backtrace_do(fr_bt_marker_t *marker)
382 {
383 fr_bt_info_t *bt;
384
385 if (!fr_assert(marker->obj) || !fr_assert(marker->cbuff)) return -1;
386
387 bt = talloc_zero(NULL, fr_bt_info_t);
388 if (!bt) return -1;
389
390 bt->obj = marker->obj;
391 bt->count = backtrace(bt->frames, MAX_BT_FRAMES);
392
393 fr_cbuff_rp_insert(marker->cbuff, bt);
394
395 return 0;
396 }
397
398 /** Inserts a backtrace marker into the provided context
399 *
400 * Allows for maximum laziness and will initialise a circular buffer if one has not already been created.
401 *
402 * Code augmentation should look something like:
403 @verbatim
404 // Create a static cbuffer pointer, the first call to backtrace_attach will initialise it
405 static fr_cbuff_t *my_obj_bt;
406
407 my_obj_t *alloc_my_obj(TALLOC_CTX *ctx) {
408 my_obj_t *this;
409
410 this = talloc(ctx, my_obj_t);
411
412 // Attach backtrace marker to object
413 backtrace_attach(&my_obj_bt, this);
414
415 return this;
416 }
417 @endverbatim
418 *
419 * Then, later when a double free occurs:
420 @verbatim
421 (gdb) call backtrace_print(&my_obj_bt, <pointer to double freed memory>)
422 @endverbatim
423 *
424 * which should print a limited backtrace to stderr. Note, this backtrace will not include any argument
425 * values, but should at least show the code path taken.
426 *
427 * @param cbuff this should be a pointer to a static *fr_cbuff.
428 * @param obj we want to generate a backtrace for.
429 */
fr_backtrace_attach(fr_cbuff_t ** cbuff,TALLOC_CTX * obj)430 fr_bt_marker_t *fr_backtrace_attach(fr_cbuff_t **cbuff, TALLOC_CTX *obj)
431 {
432 fr_bt_marker_t *marker;
433
434 if (*cbuff == NULL) {
435 PTHREAD_MUTEX_LOCK(&fr_debug_init);
436 /* Check again now we hold the mutex - eww*/
437 if (*cbuff == NULL) *cbuff = fr_cbuff_alloc(NULL, MAX_BT_CBUFF, true);
438 PTHREAD_MUTEX_UNLOCK(&fr_debug_init);
439 }
440
441 marker = talloc(obj, fr_bt_marker_t);
442 if (!marker) {
443 return NULL;
444 }
445
446 marker->obj = (void *) obj;
447 marker->cbuff = *cbuff;
448
449 fprintf(stderr, "Backtrace attached to %s %p\n", talloc_get_name(obj), obj);
450 /*
451 * Generate the backtrace for memory allocation
452 */
453 fr_backtrace_do(marker);
454 talloc_set_destructor(marker, fr_backtrace_do);
455
456 return marker;
457 }
458 #else
backtrace_print(UNUSED fr_cbuff_t * cbuff,UNUSED void * obj)459 void backtrace_print(UNUSED fr_cbuff_t *cbuff, UNUSED void *obj)
460 {
461 fprintf(stderr, "Server built without fr_backtrace_* support, requires execinfo.h and possibly -lexecinfo\n");
462 }
fr_backtrace_attach(UNUSED fr_cbuff_t ** cbuff,UNUSED TALLOC_CTX * obj)463 fr_bt_marker_t *fr_backtrace_attach(UNUSED fr_cbuff_t **cbuff, UNUSED TALLOC_CTX *obj)
464 {
465 fprintf(stderr, "Server built without fr_backtrace_* support, requires execinfo.h and possibly -lexecinfo\n");
466 abort();
467 }
468 #endif /* ifdef HAVE_EXECINFO */
469
_panic_on_free(UNUSED char * foo)470 static int _panic_on_free(UNUSED char *foo)
471 {
472 fr_fault(SIGABRT);
473 return -1; /* this should make the free fail */
474 }
475
476 /** Insert memory into the context of another talloc memory chunk which
477 * causes a panic when freed.
478 *
479 * @param ctx TALLOC_CTX to monitor for frees.
480 */
fr_panic_on_free(TALLOC_CTX * ctx)481 void fr_panic_on_free(TALLOC_CTX *ctx)
482 {
483 char *ptr;
484
485 ptr = talloc(ctx, char);
486 talloc_set_destructor(ptr, _panic_on_free);
487 }
488
489 /** Set the dumpable flag, also controls whether processes can PATTACH
490 *
491 * @param dumpable whether we should allow core dumping
492 */
493 #if defined(HAVE_SYS_PRCTL_H) && defined(PR_SET_DUMPABLE)
fr_set_dumpable_flag(bool dumpable)494 static int fr_set_dumpable_flag(bool dumpable)
495 {
496 if (prctl(PR_SET_DUMPABLE, dumpable ? 1 : 0) < 0) {
497 fr_strerror_printf("Cannot re-enable core dumps: prctl(PR_SET_DUMPABLE) failed: %s",
498 fr_syserror(errno));
499 return -1;
500 }
501
502 return 0;
503 }
504 #elif defined(HAVE_SYS_PROCCTL_H) && defined(PROC_TRACE_CTL_ENABLE)
fr_set_dumpable_flag(bool dumpable)505 static int fr_set_dumpable_flag(bool dumpable)
506 {
507 int mode = dumpable ? PROC_TRACE_CTL_ENABLE : PROC_TRACE_CTL_DISABLE;
508
509 if (procctl(P_PID, getpid(), PROC_TRACE_CTL, &mode) == -1) {
510 fr_strerror_printf("Cannot re-enable core dumps: procctl(PROC_TRACE_CTL) failed: %s",
511 fr_syserror(errno));
512 return -1;
513 }
514
515 return 0;
516 }
517 #else
fr_set_dumpable_flag(UNUSED bool dumpable)518 static int fr_set_dumpable_flag(UNUSED bool dumpable)
519 {
520 fr_strerror_printf("Changing value of PR_DUMPABLE not supported on this system");
521 return -2;
522 }
523 #endif
524
525 /** Get the processes dumpable flag
526 *
527 */
528 #if defined(HAVE_SYS_PRCTL_H) && defined(PR_GET_DUMPABLE)
fr_get_dumpable_flag(void)529 static int fr_get_dumpable_flag(void)
530 {
531 int ret;
532
533 ret = prctl(PR_GET_DUMPABLE);
534 if (ret < 0) {
535 fr_strerror_printf("Cannot get dumpable flag: %s", fr_syserror(errno));
536 return -1;
537 }
538
539 /*
540 * Linux is crazy and prctl sometimes returns 2 for disabled
541 */
542 if (ret != 1) return 0;
543 return 1;
544 }
545 #elif defined(HAVE_SYS_PROCCTL_H) && defined(PROC_TRACE_CTL)
fr_get_dumpable_flag(void)546 static int fr_get_dumpable_flag(void)
547 {
548 int status;
549
550 if (procctl(P_PID, getpid(), PROC_TRACE_CTL, &status) == -1) {
551 fr_strerror_printf("Cannot get dumpable flag: procctl(PROC_TRACE_CTL) failed: %s", fr_syserror(errno));
552 return -1;
553 }
554
555 /*
556 * There are a few different kinds of disabled, but only
557 * one ENABLE.
558 */
559 if (status != PROC_TRACE_CTL_ENABLE) return 0;
560
561 return 1;
562 }
563 #else
fr_get_dumpable_flag(void)564 static int fr_get_dumpable_flag(void)
565 {
566 fr_strerror_printf("Getting value of PR_DUMPABLE not supported on this system");
567 return -2;
568 }
569 #endif
570
571
572 /** Get the current maximum for core files
573 *
574 * Do this before anything else so as to ensure it's properly initialized.
575 */
fr_set_dumpable_init(void)576 int fr_set_dumpable_init(void)
577 {
578 #ifdef HAVE_SYS_RESOURCE_H
579 if (getrlimit(RLIMIT_CORE, &core_limits) < 0) {
580 fr_strerror_printf("Failed to get current core limit: %s", fr_syserror(errno));
581 return -1;
582 }
583 #endif
584 return 0;
585 }
586
587 /** Enable or disable core dumps
588 *
589 * @param allow_core_dumps whether to enable or disable core dumps.
590 */
fr_set_dumpable(bool allow_core_dumps)591 int fr_set_dumpable(bool allow_core_dumps)
592 {
593 dump_core = allow_core_dumps;
594 /*
595 * If configured, turn core dumps off.
596 */
597 if (!allow_core_dumps) {
598 #ifdef HAVE_SYS_RESOURCE_H
599 struct rlimit no_core;
600
601 no_core.rlim_cur = 0;
602 no_core.rlim_max = core_limits.rlim_max;
603
604 if (setrlimit(RLIMIT_CORE, &no_core) < 0) {
605 fr_strerror_printf("Failed disabling core dumps: %s", fr_syserror(errno));
606
607 return -1;
608 }
609 #endif
610 return 0;
611 }
612
613 if (fr_set_dumpable_flag(true) < 0) return -1;
614
615 /*
616 * Reset the core dump limits to their original value.
617 */
618 #ifdef HAVE_SYS_RESOURCE_H
619 if (setrlimit(RLIMIT_CORE, &core_limits) < 0) {
620 fr_strerror_printf("Cannot update core dump limit: %s", fr_syserror(errno));
621
622 return -1;
623 }
624 #endif
625 return 0;
626 }
627
628 /** Reset dumpable state to previously configured value
629 *
630 * Needed after suid up/down
631 *
632 * @return 0 on success, else -1 on failure.
633 */
fr_reset_dumpable(void)634 int fr_reset_dumpable(void)
635 {
636 return fr_set_dumpable(dump_core);
637 }
638
639 /** Check to see if panic_action file is world writeable
640 *
641 * @return 0 if file is OK, else -1.
642 */
fr_fault_check_permissions(void)643 static int fr_fault_check_permissions(void)
644 {
645 char const *p, *q;
646 size_t len;
647 char filename[256];
648 struct stat statbuf;
649
650 /*
651 * Try and guess which part of the command is the binary, and check to see if
652 * it's world writeable, to try and save the admin from their own stupidity.
653 *
654 * @fixme we should do this properly and take into account single and double
655 * quotes.
656 */
657 if ((q = strchr(panic_action, ' '))) {
658 /*
659 * need to use a static buffer, because mallocing memory in a signal handler
660 * is a bad idea and can result in deadlock.
661 */
662 len = snprintf(filename, sizeof(filename), "%.*s", (int)(q - panic_action), panic_action);
663 if (is_truncated(len, sizeof(filename))) {
664 fr_strerror_printf("Failed writing panic_action to temporary buffer (truncated)");
665 return -1;
666 }
667 p = filename;
668 } else {
669 p = panic_action;
670 }
671
672 if (stat(p, &statbuf) == 0) {
673 #ifdef S_IWOTH
674 if ((statbuf.st_mode & S_IWOTH) != 0) {
675 fr_strerror_printf("panic_action file \"%s\" is globally writable", p);
676 return -1;
677 }
678 #endif
679 }
680
681 return 0;
682 }
683
684 /** Prints a simple backtrace (if execinfo is available) and calls panic_action if set.
685 *
686 * @param sig caught
687 */
fr_fault(int sig)688 NEVER_RETURNS void fr_fault(int sig)
689 {
690 char cmd[sizeof(panic_action) + 20];
691 char *out = cmd;
692 size_t left = sizeof(cmd), ret;
693
694 char const *p = panic_action;
695 char const *q;
696
697 int code;
698
699 /*
700 * If a debugger is attached, we don't want to run the panic action,
701 * as it may interfere with the operation of the debugger.
702 * If something calls us directly we just raise the signal and let
703 * the debugger handle it how it wants.
704 */
705 if (fr_debug_state == DEBUG_STATE_ATTACHED) {
706 FR_FAULT_LOG("RAISING SIGNAL: %s", strsignal(sig));
707 raise(sig);
708 goto finish;
709 }
710
711 /*
712 * Makes the backtraces slightly cleaner
713 */
714 memset(cmd, 0, sizeof(cmd));
715
716 FR_FAULT_LOG("CAUGHT SIGNAL: %s", strsignal(sig));
717
718 /*
719 * Check for administrator sanity.
720 */
721 if (fr_fault_check_permissions() < 0) {
722 FR_FAULT_LOG("Refusing to execute panic action: %s", fr_strerror());
723 goto finish;
724 }
725
726 /*
727 * Run the callback if one was registered
728 */
729 if (panic_cb && (panic_cb(sig) < 0)) goto finish;
730
731 /*
732 * Produce a simple backtrace - They're very basic but at least give us an
733 * idea of the area of the code we hit the issue in.
734 *
735 * See below in fr_fault_setup() and
736 * https://sourceware.org/bugzilla/show_bug.cgi?id=16159
737 * for why we only print backtraces in debug builds if we're using GLIBC.
738 */
739 #if defined(HAVE_EXECINFO) && (!defined(NDEBUG) || !defined(__GNUC__))
740 if (fr_fault_log_fd >= 0) {
741 size_t frame_count;
742 void *stack[MAX_BT_FRAMES];
743
744 frame_count = backtrace(stack, MAX_BT_FRAMES);
745
746 FR_FAULT_LOG("Backtrace of last %zu frames:", frame_count);
747
748 backtrace_symbols_fd(stack, frame_count, fr_fault_log_fd);
749 }
750 #endif
751
752 /* No panic action set... */
753 if (panic_action[0] == '\0') {
754 FR_FAULT_LOG("No panic action set");
755 goto finish;
756 }
757
758 /* Substitute %p for the current PID (useful for attaching a debugger) */
759 while ((q = strstr(p, "%p"))) {
760 out += ret = snprintf(out, left, "%.*s%d", (int) (q - p), p, (int) getpid());
761 if (left <= ret) {
762 oob:
763 FR_FAULT_LOG("Panic action too long");
764 fr_exit_now(1);
765 }
766 left -= ret;
767 p = q + 2;
768 }
769 if (strlen(p) >= left) goto oob;
770 strlcpy(out, p, left);
771
772 {
773 bool disable = false;
774
775 FR_FAULT_LOG("Calling: %s", cmd);
776
777 /*
778 * Here we temporarily enable the dumpable flag so if GBD or LLDB
779 * is called in the panic_action, they can pattach to the running
780 * process.
781 */
782 if (fr_get_dumpable_flag() == 0) {
783 if ((fr_set_dumpable_flag(true) < 0) || !fr_get_dumpable_flag()) {
784 FR_FAULT_LOG("Failed setting dumpable flag, pattach may not work: %s", fr_strerror());
785 } else {
786 disable = true;
787 }
788 FR_FAULT_LOG("Temporarily setting PR_DUMPABLE to 1");
789 }
790
791 code = system(cmd);
792
793 /*
794 * We only want to error out here, if dumpable was originally disabled
795 * and we managed to change the value to enabled, but failed
796 * setting it back to disabled.
797 */
798 if (disable) {
799 FR_FAULT_LOG("Resetting PR_DUMPABLE to 0");
800 if (fr_set_dumpable_flag(false) < 0) {
801 FR_FAULT_LOG("Failed resetting dumpable flag to off: %s", fr_strerror());
802 FR_FAULT_LOG("Exiting due to insecure process state");
803 fr_exit_now(1);
804 }
805 }
806
807 FR_FAULT_LOG("Panic action exited with %i", code);
808
809 fr_exit_now(code);
810 }
811
812
813 finish:
814 /*
815 * (Re-)Raise the signal, so that if we're running under
816 * a debugger, the debugger can break when it receives
817 * the signal.
818 */
819 fr_unset_signal(sig); /* Make sure we don't get into a loop */
820
821 raise(sig);
822
823 fr_exit_now(1); /* Function marked as noreturn */
824 }
825
826 /** Callback executed on fatal talloc error
827 *
828 * This is the simple version which mostly behaves the same way as the default
829 * one, and will not call panic_action.
830 *
831 * @param reason string provided by talloc.
832 */
833 static void _fr_talloc_fault_simple(char const *reason) CC_HINT(noreturn);
_fr_talloc_fault_simple(char const * reason)834 static void _fr_talloc_fault_simple(char const *reason)
835 {
836 FR_FAULT_LOG("talloc abort: %s\n", reason);
837
838 #if defined(HAVE_EXECINFO) && (!defined(NDEBUG) || !defined(__GNUC__))
839 if (fr_fault_log_fd >= 0) {
840 size_t frame_count;
841 void *stack[MAX_BT_FRAMES];
842
843 frame_count = backtrace(stack, MAX_BT_FRAMES);
844 FR_FAULT_LOG("Backtrace of last %zu frames:", frame_count);
845 backtrace_symbols_fd(stack, frame_count, fr_fault_log_fd);
846 }
847 #endif
848 abort();
849 }
850
851 /** Callback executed on fatal talloc error
852 *
853 * Translates a talloc abort into a fr_fault call.
854 * Mostly to work around issues with some debuggers not being able to
855 * attach after a SIGABRT has been raised.
856 *
857 * @param reason string provided by talloc.
858 */
859 static void _fr_talloc_fault(char const *reason) CC_HINT(noreturn);
_fr_talloc_fault(char const * reason)860 static void _fr_talloc_fault(char const *reason)
861 {
862 FR_FAULT_LOG("talloc abort: %s", reason);
863 #ifdef SIGABRT
864 fr_fault(SIGABRT);
865 #endif
866 fr_exit_now(1);
867 }
868
869 /** Wrapper to pass talloc log output to our fr_fault_log function
870 *
871 */
_fr_talloc_log(char const * msg)872 static void _fr_talloc_log(char const *msg)
873 {
874 fr_fault_log("%s\n", msg);
875 }
876
877 /** Generate a talloc memory report for a context and print to stderr/stdout
878 *
879 * @param ctx to generate a report for, may be NULL in which case the root context is used.
880 */
fr_log_talloc_report(TALLOC_CTX * ctx)881 int fr_log_talloc_report(TALLOC_CTX *ctx)
882 {
883 #define TALLOC_REPORT_MAX_DEPTH 20
884
885 FILE *log;
886 int fd;
887
888 fd = dup(fr_fault_log_fd);
889 if (fd < 0) {
890 fr_strerror_printf("Couldn't write memory report, failed to dup log fd: %s", fr_syserror(errno));
891 return -1;
892 }
893 log = fdopen(fd, "w");
894 if (!log) {
895 close(fd);
896 fr_strerror_printf("Couldn't write memory report, fdopen failed: %s", fr_syserror(errno));
897 return -1;
898 }
899
900 if (!ctx) {
901 fprintf(log, "Current state of talloced memory:\n");
902 talloc_report_full(talloc_null_ctx, log);
903 } else {
904 int i;
905
906 fprintf(log, "Talloc chunk lineage:\n");
907 fprintf(log, "%p (%s)", ctx, talloc_get_name(ctx));
908
909 i = 0;
910 while ((i < TALLOC_REPORT_MAX_DEPTH) && (ctx = talloc_parent(ctx))) {
911 fprintf(log, " < %p (%s)", ctx, talloc_get_name(ctx));
912 i++;
913 }
914 fprintf(log, "\n");
915
916 i = 0;
917 do {
918 fprintf(log, "Talloc context level %i:\n", i++);
919 talloc_report_full(ctx, log);
920 } while ((ctx = talloc_parent(ctx)) &&
921 (i < TALLOC_REPORT_MAX_DEPTH) &&
922 (talloc_parent(ctx) != talloc_autofree_ctx) && /* Stop before we hit the autofree ctx */
923 (talloc_parent(ctx) != talloc_null_ctx)); /* Stop before we hit NULL ctx */
924 }
925
926 fclose(log);
927
928 return 0;
929 }
930
931
_fr_disable_null_tracking(UNUSED bool * p)932 static int _fr_disable_null_tracking(UNUSED bool *p)
933 {
934 talloc_disable_null_tracking();
935 return 0;
936 }
937
938 /** Register talloc fault handlers
939 *
940 * Just register the fault handlers we need to make talloc
941 * produce useful debugging output.
942 */
fr_talloc_fault_setup(void)943 void fr_talloc_fault_setup(void)
944 {
945 talloc_set_log_fn(_fr_talloc_log);
946 talloc_set_abort_fn(_fr_talloc_fault_simple);
947 }
948
949 /** Registers signal handlers to execute panic_action on fatal signal
950 *
951 * May be called multiple time to change the panic_action/program.
952 *
953 * @param cmd to execute on fault. If present %p will be substituted
954 * for the parent PID before the command is executed, and %e
955 * will be substituted for the currently running program.
956 * @param program Name of program currently executing (argv[0]).
957 * @return 0 on success -1 on failure.
958 */
fr_fault_setup(char const * cmd,char const * program)959 int fr_fault_setup(char const *cmd, char const *program)
960 {
961 static bool setup = false;
962
963 char *out = panic_action;
964 size_t left = sizeof(panic_action);
965
966 char const *p = cmd;
967 char const *q;
968
969 if (cmd) {
970 size_t ret;
971
972 /* Substitute %e for the current program */
973 while ((q = strstr(p, "%e"))) {
974 out += ret = snprintf(out, left, "%.*s%s", (int) (q - p), p, program ? program : "");
975 if (left <= ret) {
976 oob:
977 fr_strerror_printf("Panic action too long");
978 return -1;
979 }
980 left -= ret;
981 p = q + 2;
982 }
983 if (strlen(p) >= left) goto oob;
984 strlcpy(out, p, left);
985 } else {
986 *panic_action = '\0';
987 }
988
989 /*
990 * Check for administrator sanity.
991 */
992 if (fr_fault_check_permissions() < 0) return -1;
993
994 /* Unsure what the side effects of changing the signal handler mid execution might be */
995 if (!setup) {
996 char *env;
997 fr_debug_state_t debug_state;
998
999 /*
1000 * Installing signal handlers interferes with some debugging
1001 * operations. Give the developer control over whether the
1002 * signal handlers are installed or not.
1003 */
1004 env = getenv("DEBUG");
1005 if (!env || (strcmp(env, "no") == 0)) {
1006 debug_state = DEBUG_STATE_NOT_ATTACHED;
1007 } else if (!strcmp(env, "auto") || !strcmp(env, "yes")) {
1008 /*
1009 * Figure out if we were started under a debugger
1010 */
1011 if (fr_debug_state < 0) fr_debug_state = fr_get_debug_state();
1012 debug_state = fr_debug_state;
1013 } else {
1014 debug_state = DEBUG_STATE_ATTACHED;
1015 }
1016
1017 talloc_set_log_fn(_fr_talloc_log);
1018
1019 /*
1020 * These signals can't be properly dealt with in the debugger
1021 * if we set our own signal handlers.
1022 */
1023 switch (debug_state) {
1024 default:
1025 #ifndef NDEBUG
1026 FR_FAULT_LOG("Debugger check failed: %s", fr_strerror());
1027 FR_FAULT_LOG("Signal processing in debuggers may not work as expected");
1028 #endif
1029 /* FALL-THROUGH */
1030
1031 case DEBUG_STATE_NOT_ATTACHED:
1032 #ifdef SIGABRT
1033 if (fr_set_signal(SIGABRT, fr_fault) < 0) return -1;
1034
1035 /*
1036 * Use this instead of abort so we get a
1037 * full backtrace with broken versions of LLDB
1038 */
1039 talloc_set_abort_fn(_fr_talloc_fault);
1040 #endif
1041 #ifdef SIGILL
1042 if (fr_set_signal(SIGILL, fr_fault) < 0) return -1;
1043 #endif
1044 #ifdef SIGFPE
1045 if (fr_set_signal(SIGFPE, fr_fault) < 0) return -1;
1046 #endif
1047 #ifdef SIGSEGV
1048 if (fr_set_signal(SIGSEGV, fr_fault) < 0) return -1;
1049 #endif
1050 break;
1051
1052 case DEBUG_STATE_ATTACHED:
1053 break;
1054 }
1055
1056 /*
1057 * Needed for memory reports
1058 */
1059 {
1060 TALLOC_CTX *tmp;
1061 bool *marker;
1062
1063 tmp = talloc(NULL, bool);
1064 talloc_null_ctx = talloc_parent(tmp);
1065 talloc_free(tmp);
1066
1067 /*
1068 * Disable null tracking on exit, else valgrind complains
1069 */
1070 talloc_autofree_ctx = talloc_autofree_context();
1071 marker = talloc(talloc_autofree_ctx, bool);
1072 talloc_set_destructor(marker, _fr_disable_null_tracking);
1073 }
1074
1075 #if defined(HAVE_MALLOPT) && !defined(NDEBUG)
1076 /*
1077 * If were using glibc malloc > 2.4 this scribbles over
1078 * uninitialised and freed memory, to make memory issues easier
1079 * to track down.
1080 */
1081 if (!getenv("TALLOC_FREE_FILL")) mallopt(M_PERTURB, 0x42);
1082 mallopt(M_CHECK_ACTION, 3);
1083 #endif
1084
1085 #if defined(HAVE_EXECINFO) && defined(__GNUC__) && !defined(NDEBUG)
1086 /*
1087 * We need to pre-load lgcc_s, else we can get into a deadlock
1088 * in fr_fault, as backtrace() attempts to dlopen it.
1089 *
1090 * Apparently there's a performance impact of loading lgcc_s,
1091 * so only do it if this is a debug build.
1092 *
1093 * See: https://sourceware.org/bugzilla/show_bug.cgi?id=16159
1094 */
1095 {
1096 void *stack[10];
1097
1098 backtrace(stack, 10);
1099 }
1100 #endif
1101 }
1102 setup = true;
1103
1104 return 0;
1105 }
1106
1107 /** Set a callback to be called before fr_fault()
1108 *
1109 * @param func to execute. If callback returns < 0
1110 * fr_fault will exit before running panic_action code.
1111 */
fr_fault_set_cb(fr_fault_cb_t func)1112 void fr_fault_set_cb(fr_fault_cb_t func)
1113 {
1114 panic_cb = func;
1115 }
1116
1117 /** Log output to the fr_fault_log_fd
1118 *
1119 * We used to support a user defined callback, which was set to a radlog
1120 * function. Unfortunately, when logging to syslog, syslog would malloc memory
1121 * which would result in a deadlock if fr_fault was triggered from within
1122 * a malloc call.
1123 *
1124 * Now we just write directly to the FD.
1125 */
fr_fault_log(char const * msg,...)1126 void fr_fault_log(char const *msg, ...)
1127 {
1128 va_list ap;
1129
1130 if (fr_fault_log_fd < 0) return;
1131
1132 va_start(ap, msg);
1133 vdprintf(fr_fault_log_fd, msg, ap);
1134 va_end(ap);
1135 }
1136
1137 /** Set a file descriptor to log memory reports to.
1138 *
1139 * @param fd to write output to.
1140 */
fr_fault_set_log_fd(int fd)1141 void fr_fault_set_log_fd(int fd)
1142 {
1143 fr_fault_log_fd = fd;
1144 }
1145
1146 /** A soft assertion which triggers the fault handler in debug builds
1147 *
1148 * @param file the assertion failed in.
1149 * @param line of the assertion in the file.
1150 * @param expr that was evaluated.
1151 * @param cond Result of evaluating the expression.
1152 * @return the value of cond.
1153 */
fr_assert_cond(char const * file,int line,char const * expr,bool cond)1154 bool fr_assert_cond(char const *file, int line, char const *expr, bool cond)
1155 {
1156 if (!cond) {
1157 FR_FAULT_LOG("SOFT ASSERT FAILED %s[%u]: %s", file, line, expr);
1158 #if !defined(NDEBUG)
1159 fr_fault(SIGABRT);
1160 #endif
1161 return false;
1162 }
1163
1164 return cond;
1165 }
1166
1167 /** Exit possibly printing a message about why we're exiting.
1168 *
1169 * @note Use the fr_exit(status) macro instead of calling this function directly.
1170 *
1171 * @param file where fr_exit() was called.
1172 * @param line where fr_exit() was called.
1173 * @param status we're exiting with.
1174 */
_fr_exit(char const * file,int line,int status)1175 void NEVER_RETURNS _fr_exit(char const *file, int line, int status)
1176 {
1177 #ifndef NDEBUG
1178 char const *error = fr_strerror();
1179
1180 if (error && (status != 0)) {
1181 FR_FAULT_LOG("EXIT(%i) CALLED %s[%u]. Last error was: %s", status, file, line, error);
1182 } else {
1183 FR_FAULT_LOG("EXIT(%i) CALLED %s[%u]", status, file, line);
1184 }
1185 #endif
1186 fr_debug_break(false); /* If running under GDB we'll break here */
1187
1188 exit(status);
1189 }
1190
1191 /** Exit possibly printing a message about why we're exiting.
1192 *
1193 * @note Use the fr_exit_now(status) macro instead of calling this function directly.
1194 *
1195 * @param file where fr_exit_now() was called.
1196 * @param line where fr_exit_now() was called.
1197 * @param status we're exiting with.
1198 */
_fr_exit_now(char const * file,int line,int status)1199 void NEVER_RETURNS _fr_exit_now(char const *file, int line, int status)
1200 {
1201 #ifndef NDEBUG
1202 char const *error = fr_strerror();
1203
1204 if (error && (status != 0)) {
1205 FR_FAULT_LOG("_EXIT(%i) CALLED %s[%u]. Last error was: %s", status, file, line, error);
1206 } else {
1207 FR_FAULT_LOG("_EXIT(%i) CALLED %s[%u]", status, file, line);
1208 }
1209 #endif
1210 fr_debug_break(false); /* If running under GDB we'll break here */
1211
1212 _exit(status);
1213 }
1214