1 /**
2 * @file
3 * @brief UNIX-style crash handling functions (also used on Windows).
4 **/
5
6 #include "AppHdr.h"
7
8 #include "crash.h"
9
10 #if defined(UNIX)
11 #include <unistd.h>
12 #include <sys/param.h>
13 #define BACKTRACE_SUPPORTED
14 #endif
15
16 #ifdef USE_UNIX_SIGNALS
17 #include <sys/time.h>
18 #include <csignal>
19 #endif
20
21
22 #ifndef TARGET_OS_WINDOWS
23 # include <cerrno>
24 # include <sys/wait.h>
25 #endif
26
27 #ifdef BACKTRACE_SUPPORTED
28 #if defined(TARGET_CPU_MIPS) || \
29 defined(TARGET_OS_FREEBSD) || \
30 defined(TARGET_OS_NETBSD) || \
31 defined(TARGET_OS_OPENBSD) || \
32 defined(TARGET_COMPILER_CYGWIN) || \
33 defined(__ANDROID__)
34 #undef BACKTRACE_SUPPORTED
35 #endif
36 #endif
37
38 #ifdef BACKTRACE_SUPPORTED
39
40 #include <cxxabi.h>
41
42 #if !defined(TARGET_OS_MACOSX) && \
43 !defined(TARGET_OS_WINDOWS) && \
44 !defined(TARGET_COMPILER_CYGWIN)
45 #include <execinfo.h>
46 #endif
47
48 #ifdef TARGET_OS_MACOSX
49 #include <dlfcn.h>
50
51 typedef int (*backtrace_t)(void * *, int);
52 typedef char **(*backtrace_symbols_t)(void * const *, int);
53
54 // Used to convert from void* to function pointer (without a
55 // compiler warning).
56 template <typename TO, typename FROM> TO nasty_cast(FROM f)
57 {
58 union
59 {
60 FROM f;
61 TO t;
62 } u;
63
64 u.f = f;
65
66 return u.t;
67 }
68 #endif // TARGET_OS_MACOSX
69
70 #endif // BACKTRACE_SUPPORTED
71
72 // Support Yama LSM ptrace restrictions
73 #ifdef TARGET_OS_LINUX
74 # include <sys/prctl.h>
75 # ifndef PR_SET_PTRACER
76 # define PR_SET_PTRACER 0x59616d61
77 # endif
78 # ifndef PR_SET_PTRACER_ANY
79 # define PR_SET_PTRACER_ANY ((unsigned long)-1)
80 # endif
81 #endif
82
83 #include "files.h"
84 #include "initfile.h"
85 #include "options.h"
86 #include "state.h"
87 #include "stringutil.h"
88 #include "syscalls.h"
89 #include "threads.h"
90 #include "tiles-build-specific.h"
91
92 /////////////////////////////////////////////////////////////////////////////
93 // Code for printing out debugging info on a crash.
94 ////////////////////////////////////////////////////////////////////////////
95 #ifdef USE_UNIX_SIGNALS
96 static int _crash_signal = 0;
97 static int _recursion_depth = 0;
98 static mutex_t crash_mutex;
99
100 // Make this non-static so stack traces are easier to follow
101 void crash_signal_handler(int sig_num);
102
crash_signal_handler(int sig_num)103 void crash_signal_handler(int sig_num)
104 {
105 // We rely on mutexes ignoring locks held by the same thread.
106 // On some platforms, this must be explicitly enabled (which we do).
107
108 // This mutex is never unlocked again -- the first thread to crash will
109 // do a dump then terminate the process while everyone else waits here
110 // forever.
111
112 // XXX: This is a bit dangerous: if we catch a signal while any
113 // non-asynch-signal-safe function is executing, and then call
114 // pthread_mutex_lock() (which is also not asynch-signal-safe),
115 // the behaviour is undefined.
116 mutex_lock(crash_mutex);
117
118 if (crawl_state.game_crashed)
119 {
120 if (_recursion_depth > 0)
121 return;
122 _recursion_depth++;
123
124 fprintf(stderr, "Recursive crash.\n");
125
126 string dir = (!Options.morgue_dir.empty() ? Options.morgue_dir :
127 !SysEnv.crawl_dir.empty() ? SysEnv.crawl_dir
128 : "");
129
130 if (!dir.empty() && dir[dir.length() - 1] != FILE_SEPARATOR)
131 dir += FILE_SEPARATOR;
132
133 char name[180];
134
135 snprintf(name, sizeof(name), "%scrash-recursive-%s-%s.txt", dir.c_str(),
136 you.your_name.c_str(), make_file_time(time(nullptr)).c_str());
137
138 FILE* file = fopen_replace(name);
139
140 if (file == nullptr)
141 file = stderr;
142
143 write_stack_trace(file);
144
145 if (file != stderr)
146 fclose(file);
147 return;
148 }
149
150 _crash_signal = sig_num;
151 crawl_state.game_crashed = true;
152
153 // During a crash, we may be in an inconsistent state (duh). Doing a number
154 // of things can cause a lock up, especially calling non-reentrant functions
155 // like malloc() and friends, used by C++ basics like std::string
156 // internally.
157 // There's no reliable way to ensure such things won't happen. A pragmatic
158 // solution is to abort the crash dump.
159 alarm(120);
160
161 // In case the crash dumper is unable to open a file and has to dump
162 // to stderr.
163 #ifndef USE_TILE_LOCAL
164 if (crawl_state.io_inited)
165 console_shutdown();
166 #endif
167
168 #ifdef USE_TILE_WEB
169 tiles.shutdown();
170 #endif
171
172 #ifdef WATCHDOG
173 /* Infinite loop protection.
174
175 Not tickling the watchdog for 60 seconds of user CPU time (not wall
176 time!) means something is terribly wrong. Even worst hogs like
177 pre-0.6 god renouncement or current Time Step in the Abyss don't take
178 more than several seconds.
179
180 DGL only -- local players will notice the game is stuck and be able
181 to kill it.
182
183 It's likely to die horribly -- it's one of signals that is often
184 received while in non-signal safe functions, especially malloc()
185 which _will_ fuck the process up (remember, C++ can't blink without
186 malloc()ing something). In such cases, alarm() above will kill us.
187 That's nasty and random, but at least should give us backtraces most
188 of the time, and avoid dragging down the servers. And even if for
189 some odd reason SIGALRM won't kill us, the worst that can happen is
190 wasting 100% CPU which is precisely what happens right now.
191 */
192 if (sig_num == SIGVTALRM)
193 die_noline("Stuck game with 100%% CPU use\n");
194 #endif
195
196 do_crash_dump();
197
198 // Now crash for real.
199 signal(sig_num, SIG_DFL);
200 raise(sig_num);
201 }
202 #endif
203
init_crash_handler()204 void init_crash_handler()
205 {
206 #if defined(USE_UNIX_SIGNALS)
207 mutex_init(crash_mutex);
208
209 for (int i = 1; i <= 64; i++)
210 {
211 #ifdef SIGALRM
212 if (i == SIGALRM)
213 continue;
214 #endif
215 #ifdef SIGHUP
216 if (i == SIGHUP)
217 continue;
218 #endif
219 #ifdef SIGQUIT
220 if (i == SIGQUIT)
221 continue;
222 #endif
223 #ifdef SIGINT
224 if (i == SIGINT)
225 continue;
226 #endif
227 #ifdef SIGCHLD
228 if (i == SIGCHLD)
229 continue;
230 #endif
231 #ifdef SIGTSTP
232 if (i == SIGTSTP)
233 continue;
234 #endif
235 #ifdef SIGCONT
236 if (i == SIGCONT)
237 continue;
238 #endif
239 #ifdef SIGIO
240 if (i == SIGIO)
241 continue;
242 #endif
243 #ifdef SIGPROF
244 if (i == SIGPROF)
245 continue;
246 #endif
247 #ifdef SIGTTOU
248 if (i == SIGTTOU)
249 continue;
250 #endif
251 #ifdef SIGTTIN
252 if (i == SIGTTIN)
253 continue;
254 #endif
255 #ifdef SIGKILL
256 if (i == SIGKILL)
257 continue;
258 #endif
259 #ifdef SIGSTOP
260 if (i == SIGSTOP)
261 continue;
262 #endif
263 #ifdef SIGWINCH
264 if (i == SIGWINCH)
265 continue;
266 #endif
267
268 signal(i, crash_signal_handler);
269 }
270
271 #endif // if defined(USE_UNIX_SIGNALS)
272 }
273
crash_signal_info()274 string crash_signal_info()
275 {
276 #if defined(UNIX)
277 #ifdef TARGET_OS_FREEBSD
278 // FreeBSD's strsignal was not working properly so we just check
279 // to make sure that the signal is in the available list of signals,
280 // and then look it up in the table of signal handler names that
281 // FreeBSD exposes.
282 const char *name = nullptr;
283 if (_crash_signal >= SIGHUP && _crash_signal <= SIGLIBRT)
284 name = sys_signame[_crash_signal];
285 #else
286 const char *name = strsignal(_crash_signal);
287 #endif
288
289 if (name == nullptr)
290 name = "INVALID";
291 return make_stringf("Crash caused by signal #%d: %s", _crash_signal, name);
292 #else
293 return "";
294 #endif
295
296 }
297
298 #if defined(BACKTRACE_SUPPORTED)
write_stack_trace(FILE * file)299 void write_stack_trace(FILE* file)
300 {
301 void* frames[50];
302
303 #if defined(TARGET_OS_MACOSX)
304 backtrace_t backtrace;
305 backtrace_symbols_t backtrace_symbols;
306 backtrace = nasty_cast<backtrace_t, void*>(dlsym(RTLD_DEFAULT, "backtrace"));
307 backtrace_symbols = nasty_cast<backtrace_symbols_t, void*>(dlsym(RTLD_DEFAULT, "backtrace_symbols"));
308 if (!backtrace || !backtrace_symbols)
309 {
310 fprintf(stderr, "Couldn't get a stack trace.\n");
311 fprintf(file, "Couldn't get a stack trace.\n");
312 return;
313 }
314 #endif
315
316 int num_frames = backtrace(frames, ARRAYSZ(frames));
317 char **symbols = backtrace_symbols(frames, num_frames);
318
319 #if !defined(TARGET_OS_MACOSX)
320 if (symbols == nullptr)
321 {
322 fprintf(stderr, "Out of memory.\n");
323 fprintf(file, "Out of memory.\n");
324
325 // backtrace_symbols_fd() can print out the stack trace even if
326 // malloc() can't find any free memory.
327 backtrace_symbols_fd(frames, num_frames, fileno(file));
328 return;
329 }
330 #endif
331
332 fprintf(file, "Obtained %d stack frames.\n", num_frames);
333
334 // Now we prettify the printout to even show demangled C++ function names.
335 string bt = "";
336 for (int i = 0; i < num_frames; i++)
337 {
338 #if defined(TARGET_OS_MACOSX)
339 char *addr = ::strstr(symbols[i], "0x");
340 char *mangled = ::strchr(addr, ' ') + 1;
341 char *offset = ::strchr(addr, '+');
342 char *postmangle = ::strchr(mangled, ' ');
343 if (mangled)
344 *(mangled - 1) = 0;
345 bt += addr;
346 int status;
347 bt += ": ";
348 if (addr && mangled)
349 {
350 if (postmangle)
351 *postmangle = '\0';
352 char *realname = abi::__cxa_demangle(mangled, 0, 0, &status);
353 if (realname)
354 bt += realname;
355 else
356 bt += mangled;
357 bt += " ";
358 bt += offset;
359 free(realname);
360 }
361 #else // TARGET_OS_MACOSX
362 bt += symbols[i];
363 int status;
364 // Extract the identifier from symbols[i]. It's inside of parens.
365 char *firstparen = ::strchr(symbols[i], '(');
366 char *lastparen = ::strchr(symbols[i], '+');
367 if (firstparen != 0 && lastparen != 0 && firstparen < lastparen)
368 {
369 bt += ": ";
370 *lastparen = '\0';
371 char *realname = abi::__cxa_demangle(firstparen + 1, 0, 0, &status);
372 if (realname != nullptr)
373 bt += realname;
374 free(realname);
375 }
376 #endif
377 bt += "\n";
378 }
379
380 fprintf(file, "%s", bt.c_str());
381
382 free(symbols);
383 }
384 #else // BACKTRACE_SUPPORTED
write_stack_trace(FILE * file)385 void write_stack_trace(FILE* file)
386 {
387 const char* msg = "Unable to get stack trace on this platform.\n";
388 fprintf(stderr, "%s", msg);
389 fprintf(file, "%s", msg);
390 }
391 #endif
392
call_gdb(FILE * file)393 void call_gdb(FILE *file)
394 {
395 #ifndef TARGET_OS_WINDOWS
396 if (crawl_state.no_gdb)
397 return (void)fprintf(file, "%s\n", crawl_state.no_gdb);
398
399 fprintf(file, "Trying to run gdb.\n");
400 fflush(file); // so we can use fileno()
401
402 char attach_cmd[20] = {};
403 snprintf(attach_cmd, sizeof(attach_cmd), "attach %d", getpid());
404
405 #ifdef TARGET_OS_LINUX
406 prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0);
407 #endif
408 switch (int gdb = fork())
409 {
410 case -1:
411 return (void)fprintf(file, "Couldn't fork: %s\n", strerror(errno));
412 case 0:
413 {
414 int fd = fileno(file);
415 dup2(fd, 1);
416 dup2(fd, 2);
417 close(fd);
418
419 const char* argv[] =
420 {
421 "nice",
422 GDB_PATH,
423 "-batch",
424 "-ex", "show version", // Too bad -iex needs gdb >=7.5 (jessie)
425 "-ex", attach_cmd,
426 "-ex", "bt full",
427 0
428 };
429 execv("/usr/bin/nice", (char* const*)argv);
430 printf("Failed to start gdb: %s\n", strerror(errno));
431 fflush(stdout);
432 _exit(0);
433 }
434 return;
435 default:
436 waitpid(gdb, 0, 0);
437 }
438 #else
439 UNUSED(file);
440 #endif
441 }
442
disable_other_crashes()443 void disable_other_crashes()
444 {
445 // If one thread calls end() without going through a crash (a handled
446 // fatal error), no one else should be allowed to crash. We're already
447 // going down so blocking the other thread is ok.
448 #ifdef USE_UNIX_SIGNALS
449 mutex_lock(crash_mutex);
450 #endif
451 }
452
watchdog()453 void watchdog()
454 {
455 #ifdef UNIX
456 struct itimerval t;
457 t.it_interval.tv_sec = 0;
458 t.it_interval.tv_usec = 0;
459 t.it_value.tv_sec = 60;
460 t.it_value.tv_usec = 0;
461 setitimer(ITIMER_VIRTUAL, &t, 0);
462 #else
463 // Real time rather than CPU time.
464 // This will break DGL, but it makes no sense on Windows anyway.
465 // Mapstat is cool with this.
466 alarm(60);
467 #endif
468 }
469