1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // This is heavily inspired by the signal handler from google-glog
18 
19 #include <folly/experimental/symbolizer/SignalHandler.h>
20 
21 #include <signal.h>
22 #include <sys/types.h>
23 
24 #include <algorithm>
25 #include <atomic>
26 #include <cerrno>
27 #include <ctime>
28 #include <mutex>
29 #include <vector>
30 
31 #include <glog/logging.h>
32 
33 #include <folly/ScopeGuard.h>
34 #include <folly/experimental/symbolizer/Symbolizer.h>
35 #include <folly/lang/ToAscii.h>
36 #include <folly/portability/SysSyscall.h>
37 #include <folly/portability/Unistd.h>
38 
39 namespace folly {
40 namespace symbolizer {
41 
42 #ifndef _WIN32
43 
44 const unsigned long kAllFatalSignals = (1UL << SIGSEGV) | (1UL << SIGILL) |
45     (1UL << SIGFPE) | (1UL << SIGABRT) | (1UL << SIGBUS) | (1UL << SIGTERM) |
46     (1UL << SIGQUIT);
47 
48 #endif
49 
50 namespace {
51 
52 /**
53  * Fatal signal handler registry.
54  */
55 class FatalSignalCallbackRegistry {
56  public:
57   FatalSignalCallbackRegistry();
58 
59   void add(SignalCallback func);
60   void markInstalled();
61   void run();
62 
63  private:
64   std::atomic<bool> installed_;
65   std::mutex mutex_;
66   std::vector<SignalCallback> handlers_;
67 };
68 
FatalSignalCallbackRegistry()69 FatalSignalCallbackRegistry::FatalSignalCallbackRegistry()
70     : installed_(false) {}
71 
add(SignalCallback func)72 void FatalSignalCallbackRegistry::add(SignalCallback func) {
73   std::lock_guard<std::mutex> lock(mutex_);
74   CHECK(!installed_) << "FatalSignalCallbackRegistry::add may not be used "
75                         "after installing the signal handlers.";
76   handlers_.push_back(func);
77 }
78 
markInstalled()79 void FatalSignalCallbackRegistry::markInstalled() {
80   std::lock_guard<std::mutex> lock(mutex_);
81   CHECK(!installed_.exchange(true))
82       << "FatalSignalCallbackRegistry::markInstalled must be called "
83       << "at most once";
84 }
85 
run()86 void FatalSignalCallbackRegistry::run() {
87   if (!installed_) {
88     return;
89   }
90 
91   for (auto& fn : handlers_) {
92     fn();
93   }
94 }
95 
96 std::atomic<FatalSignalCallbackRegistry*> gFatalSignalCallbackRegistry{};
97 
getFatalSignalCallbackRegistry()98 FatalSignalCallbackRegistry* getFatalSignalCallbackRegistry() {
99   // Leak it so we don't have to worry about destruction order
100   static FatalSignalCallbackRegistry* fatalSignalCallbackRegistry =
101       new FatalSignalCallbackRegistry();
102 
103   return fatalSignalCallbackRegistry;
104 }
105 
106 } // namespace
107 
addFatalSignalCallback(SignalCallback cb)108 void addFatalSignalCallback(SignalCallback cb) {
109   getFatalSignalCallbackRegistry()->add(cb);
110 }
111 
installFatalSignalCallbacks()112 void installFatalSignalCallbacks() {
113   getFatalSignalCallbackRegistry()->markInstalled();
114 }
115 
116 #ifndef _WIN32
117 
118 namespace {
119 
120 struct {
121   int number;
122   const char* name;
123   struct sigaction oldAction;
124 } kFatalSignals[] = {
125     {SIGSEGV, "SIGSEGV", {}},
126     {SIGILL, "SIGILL", {}},
127     {SIGFPE, "SIGFPE", {}},
128     {SIGABRT, "SIGABRT", {}},
129     {SIGBUS, "SIGBUS", {}},
130     {SIGTERM, "SIGTERM", {}},
131     {SIGQUIT, "SIGQUIT", {}},
132     {0, nullptr, {}},
133 };
134 
callPreviousSignalHandler(int signum)135 FOLLY_MAYBE_UNUSED void callPreviousSignalHandler(int signum) {
136   // Restore disposition to old disposition, then kill ourselves with the same
137   // signal. The signal will be blocked until we return from our handler,
138   // then it will invoke the default handler and abort.
139   for (auto p = kFatalSignals; p->name; ++p) {
140     if (p->number == signum) {
141       sigaction(signum, &p->oldAction, nullptr);
142       raise(signum);
143       return;
144     }
145   }
146 
147   // Not one of the signals we know about. Oh well. Reset to default.
148   struct sigaction sa;
149   memset(&sa, 0, sizeof(sa));
150   sa.sa_handler = SIG_DFL;
151   sigaction(signum, &sa, nullptr);
152   raise(signum);
153 }
154 
155 #if FOLLY_USE_SYMBOLIZER
156 
157 // Note: not thread-safe, but that's okay, as we only let one thread
158 // in our signal handler at a time.
159 //
160 // Leak it so we don't have to worry about destruction order
161 //
162 // Initialized by installFatalSignalHandler
163 SafeStackTracePrinter* gStackTracePrinter;
164 
print(StringPiece sp)165 void print(StringPiece sp) {
166   gStackTracePrinter->print(sp);
167 }
168 
flush()169 void flush() {
170   gStackTracePrinter->flush();
171 }
172 
printDec(uint64_t val)173 void printDec(uint64_t val) {
174   char buf[to_ascii_size_max_decimal<uint64_t>];
175   size_t n = to_ascii_decimal(buf, val);
176   gStackTracePrinter->print(StringPiece(buf, n));
177 }
178 
printHex(uint64_t val)179 void printHex(uint64_t val) {
180   char buf[2 + to_ascii_size_max<16, uint64_t>];
181   auto out = buf + 0;
182   *out++ = '0';
183   *out++ = 'x';
184   out += to_ascii_lower<16>(out, buf + sizeof(buf), val);
185   gStackTracePrinter->print(StringPiece(buf, out - buf));
186 }
187 
dumpTimeInfo()188 void dumpTimeInfo() {
189   SCOPE_EXIT { flush(); };
190   time_t now = time(nullptr);
191   print("*** Aborted at ");
192   printDec(now);
193   print(" (Unix time, try 'date -d @");
194   printDec(now);
195   print("') ***\n");
196 }
197 
sigill_reason(int si_code)198 const char* sigill_reason(int si_code) {
199   switch (si_code) {
200     case ILL_ILLOPC:
201       return "illegal opcode";
202     case ILL_ILLOPN:
203       return "illegal operand";
204     case ILL_ILLADR:
205       return "illegal addressing mode";
206     case ILL_ILLTRP:
207       return "illegal trap";
208     case ILL_PRVOPC:
209       return "privileged opcode";
210     case ILL_PRVREG:
211       return "privileged register";
212     case ILL_COPROC:
213       return "coprocessor error";
214     case ILL_BADSTK:
215       return "internal stack error";
216 
217     default:
218       return nullptr;
219   }
220 }
221 
sigfpe_reason(int si_code)222 const char* sigfpe_reason(int si_code) {
223   switch (si_code) {
224     case FPE_INTDIV:
225       return "integer divide by zero";
226     case FPE_INTOVF:
227       return "integer overflow";
228     case FPE_FLTDIV:
229       return "floating-point divide by zero";
230     case FPE_FLTOVF:
231       return "floating-point overflow";
232     case FPE_FLTUND:
233       return "floating-point underflow";
234     case FPE_FLTRES:
235       return "floating-point inexact result";
236     case FPE_FLTINV:
237       return "floating-point invalid operation";
238     case FPE_FLTSUB:
239       return "subscript out of range";
240 
241     default:
242       return nullptr;
243   }
244 }
245 
sigsegv_reason(int si_code)246 const char* sigsegv_reason(int si_code) {
247   switch (si_code) {
248     case SEGV_MAPERR:
249       return "address not mapped to object";
250     case SEGV_ACCERR:
251       return "invalid permissions for mapped object";
252 
253     default:
254       return nullptr;
255   }
256 }
257 
sigbus_reason(int si_code)258 const char* sigbus_reason(int si_code) {
259   switch (si_code) {
260     case BUS_ADRALN:
261       return "invalid address alignment";
262     case BUS_ADRERR:
263       return "nonexistent physical address";
264     case BUS_OBJERR:
265       return "object-specific hardware error";
266 
267       // MCEERR_AR and MCEERR_AO: in sigaction(2) but not in headers.
268 
269     default:
270       return nullptr;
271   }
272 }
273 
sigtrap_reason(int si_code)274 const char* sigtrap_reason(int si_code) {
275   switch (si_code) {
276     case TRAP_BRKPT:
277       return "process breakpoint";
278     case TRAP_TRACE:
279       return "process trace trap";
280 
281       // TRAP_BRANCH and TRAP_HWBKPT: in sigaction(2) but not in headers.
282 
283     default:
284       return nullptr;
285   }
286 }
287 
sigchld_reason(int si_code)288 const char* sigchld_reason(int si_code) {
289   switch (si_code) {
290     case CLD_EXITED:
291       return "child has exited";
292     case CLD_KILLED:
293       return "child was killed";
294     case CLD_DUMPED:
295       return "child terminated abnormally";
296     case CLD_TRAPPED:
297       return "traced child has trapped";
298     case CLD_STOPPED:
299       return "child has stopped";
300     case CLD_CONTINUED:
301       return "stopped child has continued";
302 
303     default:
304       return nullptr;
305   }
306 }
307 
sigio_reason(int si_code)308 const char* sigio_reason(int si_code) {
309   switch (si_code) {
310     case POLL_IN:
311       return "data input available";
312     case POLL_OUT:
313       return "output buffers available";
314     case POLL_MSG:
315       return "input message available";
316     case POLL_ERR:
317       return "I/O error";
318     case POLL_PRI:
319       return "high priority input available";
320     case POLL_HUP:
321       return "device disconnected";
322 
323     default:
324       return nullptr;
325   }
326 }
327 
signal_reason(int signum,int si_code)328 const char* signal_reason(int signum, int si_code) {
329   switch (signum) {
330     case SIGILL:
331       return sigill_reason(si_code);
332     case SIGFPE:
333       return sigfpe_reason(si_code);
334     case SIGSEGV:
335       return sigsegv_reason(si_code);
336     case SIGBUS:
337       return sigbus_reason(si_code);
338     case SIGTRAP:
339       return sigtrap_reason(si_code);
340     case SIGCHLD:
341       return sigchld_reason(si_code);
342     case SIGIO:
343       return sigio_reason(si_code); // aka SIGPOLL
344 
345     default:
346       return nullptr;
347   }
348 }
349 
dumpSignalInfo(int signum,siginfo_t * siginfo)350 void dumpSignalInfo(int signum, siginfo_t* siginfo) {
351   SCOPE_EXIT { flush(); };
352   // Get the signal name, if possible.
353   const char* name = nullptr;
354   for (auto p = kFatalSignals; p->name; ++p) {
355     if (p->number == signum) {
356       name = p->name;
357       break;
358     }
359   }
360 
361   print("*** Signal ");
362   printDec(signum);
363   if (name) {
364     print(" (");
365     print(name);
366     print(")");
367   }
368 
369   print(" (");
370   printHex(reinterpret_cast<uint64_t>(siginfo->si_addr));
371   print(") received by PID ");
372   printDec(getpid());
373   print(" (pthread TID ");
374   printHex((uint64_t)pthread_self());
375 #if defined(__linux__)
376   print(") (linux TID ");
377   printDec(syscall(__NR_gettid));
378 #elif defined(__FreeBSD__)
379   long tid = 0;
380   syscall(432, &tid);
381   print(") (freebsd TID ");
382   printDec(tid);
383 #endif
384 
385   // Kernel-sourced signals don't give us useful info for pid/uid.
386   if (siginfo->si_code <= 0) {
387     print(") (maybe from PID ");
388     printDec(siginfo->si_pid);
389     print(", UID ");
390     printDec(siginfo->si_uid);
391   }
392 
393   auto reason = signal_reason(signum, siginfo->si_code);
394 
395   print(") (code: ");
396   // If we can't find a reason code make a best effort to print the (int) code.
397   if (reason != nullptr) {
398     print(reason);
399   } else {
400     if (siginfo->si_code < 0) {
401       print("-");
402       printDec(-siginfo->si_code);
403     } else {
404       printDec(siginfo->si_code);
405     }
406   }
407 
408   print("), stack trace: ***\n");
409 }
410 
411 // On Linux, pthread_t is a pointer, so 0 is an invalid value, which we
412 // take to indicate "no thread in the signal handler".
413 //
414 // POSIX defines PTHREAD_NULL for this purpose, but that's not available.
415 constexpr pthread_t kInvalidThreadId = 0;
416 
417 std::atomic<pthread_t> gSignalThread(kInvalidThreadId);
418 std::atomic<bool> gInRecursiveSignalHandler(false);
419 
420 // Here be dragons.
innerSignalHandler(int signum,siginfo_t * info,void *)421 void innerSignalHandler(int signum, siginfo_t* info, void* /* uctx */) {
422   // First, let's only let one thread in here at a time.
423   pthread_t myId = pthread_self();
424 
425   pthread_t prevSignalThread = kInvalidThreadId;
426   while (!gSignalThread.compare_exchange_strong(prevSignalThread, myId)) {
427     if (pthread_equal(prevSignalThread, myId)) {
428       // First time here. Try to dump the stack trace without symbolization.
429       // If we still fail, well, we're mightily screwed, so we do nothing the
430       // next time around.
431       if (!gInRecursiveSignalHandler.exchange(true)) {
432         print("Entered fatal signal handler recursively. We're in trouble.\n");
433         gStackTracePrinter->printStackTrace(false); // no symbolization
434       }
435       return;
436     }
437 
438     // Wait a while, try again.
439     timespec ts;
440     ts.tv_sec = 0;
441     ts.tv_nsec = 100L * 1000 * 1000; // 100ms
442     nanosleep(&ts, nullptr);
443 
444     prevSignalThread = kInvalidThreadId;
445   }
446 
447   dumpTimeInfo();
448   dumpSignalInfo(signum, info);
449   gStackTracePrinter->printStackTrace(true); // with symbolization
450 
451   // Run user callbacks
452   auto callbacks = gFatalSignalCallbackRegistry.load(std::memory_order_acquire);
453   if (callbacks) {
454     callbacks->run();
455   }
456 }
457 
458 namespace {
459 std::atomic<bool> gFatalSignalReceived{false};
460 } // namespace
461 
signalHandler(int signum,siginfo_t * info,void * uctx)462 void signalHandler(int signum, siginfo_t* info, void* uctx) {
463   gFatalSignalReceived.store(true, std::memory_order_relaxed);
464 
465   int savedErrno = errno;
466   SCOPE_EXIT {
467     flush();
468     errno = savedErrno;
469   };
470   innerSignalHandler(signum, info, uctx);
471 
472   gSignalThread = kInvalidThreadId;
473   // Kill ourselves with the previous handler.
474   callPreviousSignalHandler(signum);
475 }
476 
477 #endif // FOLLY_USE_SYMBOLIZER
478 
479 // Small sigaltstack size threshold.
480 // 8931 is known to cause the signal handler to stack overflow during
481 // symbolization even for a simple one-liner "kill(getpid(), SIGTERM)".
482 constexpr size_t kSmallSigAltStackSize = 8931;
483 
isSmallSigAltStackEnabled()484 FOLLY_MAYBE_UNUSED bool isSmallSigAltStackEnabled() {
485   stack_t ss;
486   if (sigaltstack(nullptr, &ss) != 0) {
487     return false;
488   }
489   if ((ss.ss_flags & SS_DISABLE) != 0) {
490     return false;
491   }
492   return ss.ss_size <= kSmallSigAltStackSize;
493 }
494 
495 } // namespace
496 
497 #endif // _WIN32
498 
499 namespace {
500 std::atomic<bool> gAlreadyInstalled;
501 }
502 
installFatalSignalHandler(std::bitset<64> signals)503 void installFatalSignalHandler(std::bitset<64> signals) {
504   if (gAlreadyInstalled.exchange(true)) {
505     // Already done.
506     return;
507   }
508 
509   // make sure gFatalSignalCallbackRegistry is created before we
510   // install the fatal signal handler
511   gFatalSignalCallbackRegistry.store(
512       getFatalSignalCallbackRegistry(), std::memory_order_release);
513 
514 #if FOLLY_USE_SYMBOLIZER
515   // If a small sigaltstack is enabled (ex. Rust stdlib might use sigaltstack
516   // to set a small stack), the default SafeStackTracePrinter would likely
517   // stack overflow. Replace it with the unsafe self-allocate printer.
518   bool useUnsafePrinter = kIsLinux && isSmallSigAltStackEnabled();
519   if (useUnsafePrinter) {
520 #if FOLLY_HAVE_SWAPCONTEXT
521     gStackTracePrinter = new UnsafeSelfAllocateStackTracePrinter();
522 #else
523     // This environment does not support swapcontext, so always use
524     // SafeStackTracePrinter.
525     gStackTracePrinter = new SafeStackTracePrinter();
526 #endif // FOLLY_HAVE_SWAPCONTEXT
527   } else {
528     gStackTracePrinter = new SafeStackTracePrinter();
529   }
530 
531   struct sigaction sa;
532   memset(&sa, 0, sizeof(sa));
533   if (useUnsafePrinter) {
534     // The signal handler is not async-signal-safe. Block all signals to
535     // make it safer. But it's still unsafe.
536     sigfillset(&sa.sa_mask);
537   } else {
538     sigemptyset(&sa.sa_mask);
539   }
540   // By default signal handlers are run on the signaled thread's stack.
541   // In case of stack overflow running the SIGSEGV signal handler on
542   // the same stack leads to another SIGSEGV and crashes the program.
543   // Use SA_ONSTACK, so alternate stack is used (only if configured via
544   // sigaltstack).
545   // Golang also requires SA_ONSTACK. See:
546   // https://golang.org/pkg/os/signal/#hdr-Go_programs_that_use_cgo_or_SWIG
547   sa.sa_flags |= SA_SIGINFO | SA_ONSTACK;
548   sa.sa_sigaction = &signalHandler;
549 
550   for (auto p = kFatalSignals; p->name; ++p) {
551     if ((p->number < static_cast<int>(signals.size())) &&
552         signals.test(p->number)) {
553       CHECK_ERR(sigaction(p->number, &sa, &p->oldAction));
554     }
555   }
556 #endif // FOLLY_USE_SYMBOLIZER
557 }
558 
fatalSignalReceived()559 bool fatalSignalReceived() {
560 #ifdef FOLLY_USE_SYMBOLIZER
561   return gFatalSignalReceived.load(std::memory_order_relaxed);
562 #else
563   return false;
564 #endif
565 }
566 
567 } // namespace symbolizer
568 } // namespace folly
569