1 //
2 // Copyright 2016 Pixar
3 //
4 // Licensed under the Apache License, Version 2.0 (the "Apache License")
5 // with the following modification; you may not use this file except in
6 // compliance with the Apache License and the following modification to it:
7 // Section 6. Trademarks. is deleted and replaced with:
8 //
9 // 6. Trademarks. This License does not grant permission to use the trade
10 // names, trademarks, service marks, or product names of the Licensor
11 // and its affiliates, except as required to comply with Section 4(c) of
12 // the License and to reproduce the content of the NOTICE file.
13 //
14 // You may obtain a copy of the Apache License at
15 //
16 // http://www.apache.org/licenses/LICENSE-2.0
17 //
18 // Unless required by applicable law or agreed to in writing, software
19 // distributed under the Apache License with the above modification is
20 // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 // KIND, either express or implied. See the Apache License for the specific
22 // language governing permissions and limitations under the Apache License.
23 //
24 #include "pxr/pxr.h"
25 #include "pxr/base/arch/defines.h"
26 #include "pxr/base/arch/stackTrace.h"
27 #include "pxr/base/arch/attributes.h"
28 #include "pxr/base/arch/debugger.h"
29 #include "pxr/base/arch/defines.h"
30 #include "pxr/base/arch/demangle.h"
31 #include "pxr/base/arch/env.h"
32 #include "pxr/base/arch/error.h"
33 #include "pxr/base/arch/errno.h"
34 #include "pxr/base/arch/export.h"
35 #include "pxr/base/arch/fileSystem.h"
36 #include "pxr/base/arch/inttypes.h"
37 #include "pxr/base/arch/symbols.h"
38 #include "pxr/base/arch/vsnprintf.h"
39 #if defined(ARCH_OS_WINDOWS)
40 #include <io.h>
41 #include <process.h>
42 #include <Winsock2.h>
43 #include <DbgHelp.h>
44 #ifndef MAXHOSTNAMELEN
45 #define MAXHOSTNAMELEN 64
46 #endif
47 #else
48 #include <dlfcn.h>
49 #include <netdb.h>
50 #include <unistd.h>
51 #include <sys/param.h>
52 #include <sys/resource.h>
53 #include <sys/wait.h>
54 #endif
55 #include <algorithm>
56 #include <atomic>
57 #include <fstream>
58 #include <ostream>
59 #include <iterator>
60 #include <limits>
61 #include <cstdlib>
62 #include <errno.h>
63 #include <signal.h>
64 #include <sys/types.h>
65 #include <cstdio>
66 #include <cstring>
67 #include <mutex>
68 #include <thread>
69
70 /* Darwin/ppc did not do stack traces. Darwin/i386 still
71 needs some work, this has been stubbed out for now. */
72
73 #if defined(ARCH_OS_LINUX) || defined(ARCH_OS_FREEBSD)
74 #include <ucontext.h>
75 #endif
76
77 #if defined(ARCH_OS_LINUX) || defined(ARCH_OS_FREEBSD) && defined(ARCH_BITS_64)
78 #include <unwind.h>
79 #endif
80
81 #if defined(ARCH_OS_DARWIN)
82 #include <execinfo.h>
83 #endif
84
85 #if defined(ARCH_OS_WINDOWS)
86 #define getpid() _getpid()
87 #define write(fd_, data_, size_) _write(fd_, data_, size_)
88 #define strdup(str_) _strdup(str_)
89 #endif
90
91 #include <string>
92 #include <vector>
93 #include <map>
94 #include <sstream>
95 #include <time.h>
96
97 PXR_NAMESPACE_OPEN_SCOPE
98
99 using namespace std;
100
101 #define MAX_STACK_DEPTH 4096
102
103 #if !defined(ARCH_OS_WINDOWS)
104 // XXX Darwin
105 // total hack -- no idea if this will work if we die in malloc...
106 typedef int (*ForkFunc)(void);
107 ForkFunc Arch_nonLockingFork =
108 #if defined(ARCH_OS_LINUX) || defined(ARCH_OS_FREEBSD)
109 (ForkFunc)dlsym(RTLD_NEXT, "__libc_fork");
110 #elif defined(ARCH_OS_DARWIN)
111 NULL;
112 #else
113 #error Unknown architecture.
114 #endif
115 #endif
116
117 /*** Stack Logging Global Variables ***/
118
119 // Stores the application's launch time
120 static time_t _appLaunchTime;
121
122 // This bool determines whether a stack trace should be
123 // logged upon catching a crash. Use ArchSetFatalStackLogging
124 // to set this value.
125 static bool _shouldLogStackToDb = false;
126
127 // This string holds the path the script used to log sessions
128 // to a database.
129 static const char * _logStackToDbCmd = nullptr;
130
131 // Arguments to _logStackToDbCmd for non-crash and crash reports, respectively.
132 static const char* const* _sessionLogArgv = nullptr;
133 static const char* const* _sessionCrashLogArgv = nullptr;
134
135 // This string stores the program name to be used when
136 // displaying error information. Initialized in
137 // Arch_InitConfig() to ArchGetExecutablePath()
138 static char * _progNameForErrors = NULL;
139
140 namespace {
141 // Key-value map for program info. Stores additional
142 // program info to be used when displaying error information.
143 class Arch_ProgInfo
144 {
145 public:
146
Arch_ProgInfo()147 Arch_ProgInfo() : _progInfoForErrors(NULL) {}
148
149 ~Arch_ProgInfo();
150
151 void SetProgramInfoForErrors(const std::string& key,
152 const std::string& value);
153
154 std::string GetProgramInfoForErrors(const std::string& key) const;
155
156 void PrintInfoForErrors() const;
157
158 private:
159 typedef std::map<std::string, std::string> _MapType;
160 _MapType _progInfoMap;
161 mutable std::mutex _progInfoForErrorsMutex;
162
163 // Printed version of _progInfo map, since we can't
164 // traverse it during an error.
165 char *_progInfoForErrors;
166 };
167
~Arch_ProgInfo()168 Arch_ProgInfo::~Arch_ProgInfo()
169 {
170 if (_progInfoForErrors)
171 free(_progInfoForErrors);
172 }
173
174 void
SetProgramInfoForErrors(const std::string & key,const std::string & value)175 Arch_ProgInfo::SetProgramInfoForErrors(
176 const std::string& key, const std::string& value)
177 {
178 std::lock_guard<std::mutex> lock(_progInfoForErrorsMutex);
179
180 if (value.empty()) {
181 _progInfoMap.erase(key);
182 } else {
183 _progInfoMap[key] = value;
184 }
185
186 std::ostringstream ss;
187
188 // update the error info string
189 for(_MapType::iterator iter = _progInfoMap.begin();
190 iter != _progInfoMap.end(); ++iter) {
191
192 ss << iter->first << ": " << iter->second << '\n';
193 }
194
195 if (_progInfoForErrors)
196 free(_progInfoForErrors);
197
198 _progInfoForErrors = strdup(ss.str().c_str());
199 }
200
201 std::string
GetProgramInfoForErrors(const std::string & key) const202 Arch_ProgInfo::GetProgramInfoForErrors(const std::string& key) const
203 {
204 std::lock_guard<std::mutex> lock(_progInfoForErrorsMutex);
205
206 _MapType::const_iterator iter = _progInfoMap.find(key);
207 std::string result;
208 if (iter != _progInfoMap.end())
209 result = iter->second;
210
211 return result;
212 }
213
214 void
PrintInfoForErrors() const215 Arch_ProgInfo::PrintInfoForErrors() const
216 {
217 std::lock_guard<std::mutex> lock(_progInfoForErrorsMutex);
218 if (_progInfoForErrors) {
219 fprintf(stderr, "%s", _progInfoForErrors);
220 }
221 }
222
223 } // anon-namespace
224
225 static Arch_ProgInfo &
ArchStackTrace_GetProgInfo()226 ArchStackTrace_GetProgInfo()
227 {
228 static Arch_ProgInfo progInfo;
229 return progInfo;
230 }
231
232
233
234 namespace {
235
236 // Key-value map for extra log info. Stores unowned pointers to text to be
237 // emitted in stack trace logs in case of fatal errors or crashes.
238 class Arch_LogInfo
239 {
240 public:
241
242 void SetExtraLogInfoForErrors(const std::string &key,
243 std::vector<std::string> const *lines);
244 void EmitAnyExtraLogInfo(FILE *outFile, size_t max = 0) const;
245
246 private:
247 typedef std::map<std::string, std::vector<std::string> const *> _LogInfoMap;
248 _LogInfoMap _logInfoForErrors;
249 mutable std::mutex _logInfoForErrorsMutex;
250 };
251
252 void
SetExtraLogInfoForErrors(const std::string & key,std::vector<std::string> const * lines)253 Arch_LogInfo::SetExtraLogInfoForErrors(const std::string &key,
254 std::vector<std::string> const *lines)
255 {
256 std::lock_guard<std::mutex> lock(_logInfoForErrorsMutex);
257 if (!lines || lines->empty()) {
258 _logInfoForErrors.erase(key);
259 } else {
260 _logInfoForErrors[key] = lines;
261 }
262 }
263
264 void
EmitAnyExtraLogInfo(FILE * outFile,size_t max) const265 Arch_LogInfo::EmitAnyExtraLogInfo(FILE *outFile, size_t max) const
266 {
267 // This function can't cause any heap allocation, be careful.
268 // XXX -- std::string::c_str and fprintf can do allocations.
269 std::lock_guard<std::mutex> lock(_logInfoForErrorsMutex);
270 size_t n = 0;
271 for (_LogInfoMap::const_iterator i = _logInfoForErrors.begin(),
272 end = _logInfoForErrors.end(); i != end; ++i) {
273 fputs("\n", outFile);
274 fputs(i->first.c_str(), outFile);
275 fputs(":\n", outFile);
276 for (std::string const &line: *i->second) {
277 if (max && n++ >= max) {
278 fputs("... see full diagnostics in crash report.\n", outFile);
279 return;
280 }
281 fputs(line.c_str(), outFile);
282 }
283 }
284 }
285
286 } // anon-namespace
287
288 static Arch_LogInfo &
ArchStackTrace_GetLogInfo()289 ArchStackTrace_GetLogInfo()
290 {
291 static Arch_LogInfo logInfo;
292 return logInfo;
293 }
294
295
296 static void
_atexitCallback()297 _atexitCallback()
298 {
299 ArchLogSessionInfo();
300 }
301
302 void
ArchEnableSessionLogging()303 ArchEnableSessionLogging()
304 {
305 static int unused = atexit(_atexitCallback);
306 (void)unused;
307 }
308
309 static const char* const stackTracePrefix = "st";
310 static const char* stackTraceCmd = nullptr;
311 static const char* const* stackTraceArgv = nullptr;
312
313 static long _GetAppElapsedTime();
314
315 namespace {
316
317 // Return the length of s.
asstrlen(const char * s)318 size_t asstrlen(const char* s)
319 {
320 size_t result = 0;
321 if (s) {
322 while (*s++) {
323 ++result;
324 }
325 }
326 return result;
327 }
328
329 // Copy the string at src to dst, returning a pointer to the NUL terminator
330 // in dst (NOT a pointer to dst).
331 //
332 // ARCH_NOINLINE because old clang versions generated incorrect optimized
333 // code.
334 char* asstrcpy(char* dst, const char* src) ARCH_NOINLINE;
asstrcpy(char * dst,const char * src)335 char* asstrcpy(char* dst, const char* src)
336 {
337 while ((*dst++ = *src++)) {
338 // Do nothing
339 }
340 return dst - 1;
341 }
342
343 // Compare the strings for equality.
asstreq(const char * dst,const char * src)344 bool asstreq(const char* dst, const char* src)
345 {
346 if (!dst || !src) {
347 return dst == src;
348 }
349 while (*dst || *src) {
350 if (*dst++ != *src++) {
351 return false;
352 }
353 }
354 return true;
355 }
356
357 // Compare the strings for equality up to n characters.
asstrneq(const char * dst,const char * src,size_t n)358 bool asstrneq(const char* dst, const char* src, size_t n)
359 {
360 if (!dst || !src) {
361 return dst == src;
362 }
363 while ((*dst || *src) && n) {
364 if (*dst++ != *src++) {
365 return false;
366 }
367 --n;
368 }
369 return true;
370 }
371
372 // Returns the environment variable named name, or NULL if it doesn't exist.
asgetenv(const char * name)373 const char* asgetenv(const char* name)
374 {
375 if (name) {
376 const size_t len = asstrlen(name);
377 for (char** i = ArchEnviron(); *i; ++i) {
378 const char* var = *i;
379 if (asstrneq(var, name, len)) {
380 if (var[len] == '=') {
381 return var + len + 1;
382 }
383 }
384 }
385 }
386 return nullptr;
387 }
388
389 // Minimum safe size for a buffer to hold a long converted to decimal ASCII.
390 static constexpr int numericBufferSize =
391 std::numeric_limits<long>::digits10
392 + 1 // sign
393 + 1 // overflow (digits10 doesn't necessarily count the high digit)
394 + 1 // trailing NUL
395 + 1; // paranoia
396
397 // Return the number of digits in the decimal string representation of x.
asNumDigits(long x)398 size_t asNumDigits(long x)
399 {
400 size_t result = 1;
401 if (x < 0) {
402 x = -x;
403 ++result;
404 }
405 while (x >= 10) {
406 ++result;
407 x /= 10;
408 }
409 return result;
410 }
411
412 // Write the decimal string representation of x to s, which must have
413 // sufficient space available.
asitoa(char * s,long x)414 char* asitoa(char* s, long x)
415 {
416 // Write the minus sign.
417 if (x < 0) {
418 x = -x;
419 *s = '-';
420 }
421
422 // Skip to the end and write the terminating NUL.
423 char* end = s += asNumDigits(x);
424 *s = '\0';
425
426 // Write each digit, starting with the 1's column, working backwards.
427 if (x == 0) {
428 *--s = '0';
429 }
430 else {
431 static const char digit[] = "0123456789";
432 while (x) {
433 *--s = digit[x % 10];
434 x /= 10;
435 }
436 }
437 return end;
438 }
439
440 // Write a string to a file descriptor.
aswrite(int fd,const char * msg)441 void aswrite(int fd, const char* msg)
442 {
443 int saved = errno;
444 write(fd, msg, asstrlen(msg));
445 errno = saved;
446 }
447
_GetStackTraceName(char * buf,size_t len)448 int _GetStackTraceName(char* buf, size_t len)
449 {
450 // Take care to avoid non-async-safe functions.
451 // NOTE: This doesn't protect against other threads changing the
452 // temporary directory or program name for errors.
453
454 // Count the string length required.
455 size_t required =
456 asstrlen(ArchGetTmpDir()) +
457 1 + // "/"
458 asstrlen(stackTracePrefix) +
459 1 + // "_"
460 asstrlen(ArchGetProgramNameForErrors()) +
461 1 + // "."
462 asNumDigits(getpid()) +
463 1; // "\0"
464
465 // Fill in buf with the default name.
466 char* end = buf;
467 if (len < required) {
468 // No space. Not quite an accurate error code.
469 errno = ENOMEM;
470 return -1;
471 }
472 else {
473 end = asstrcpy(end, ArchGetTmpDir());
474 end = asstrcpy(end, "/");
475 end = asstrcpy(end, stackTracePrefix);
476 end = asstrcpy(end, "_");
477 end = asstrcpy(end, ArchGetProgramNameForErrors());
478 end = asstrcpy(end, ".");
479 end = asitoa(end, getpid());
480 }
481
482 // Return a name that isn't currently in use. Simultaneously create
483 // the empty file.
484 int suffix = 0;
485 #if defined(ARCH_OS_WINDOWS)
486 int fd = _open(buf, O_CREAT | O_WRONLY | O_TRUNC | O_EXCL,
487 _S_IREAD | _S_IWRITE);
488 #else
489 int fd = open(buf, O_CREAT | O_WRONLY | O_TRUNC | O_EXCL, 0640);
490 #endif
491
492 while (fd == -1 && errno == EEXIST) {
493 // File exists. Try a new suffix if there's space.
494 ++suffix;
495 if (len < required + 1 + asNumDigits(suffix)) {
496 // No space. Not quite an accurate error code.
497 errno = ENOMEM;
498 return -1;
499 }
500 asstrcpy(end, ".");
501 asitoa(end + 1, suffix);
502 #if defined(ARCH_OS_WINDOWS)
503 fd = _open(buf, O_CREAT | O_WRONLY | O_TRUNC | O_EXCL,
504 _S_IREAD | _S_IWRITE);
505 #else
506 fd = open(buf, O_CREAT | O_WRONLY | O_TRUNC | O_EXCL, 0640);
507 #endif
508 }
509 if (fd != -1) {
510 ArchCloseFile(fd);
511 fd = 0;
512 }
513 return fd;
514 }
515
516 // Build an argument list (async-safe).
517 static bool
_MakeArgv(const char * dstArgv[],size_t maxDstArgs,const char * cmd,const char * const srcArgv[],const char * const substitutions[][2],size_t numSubstitutions)518 _MakeArgv(
519 const char* dstArgv[],
520 size_t maxDstArgs,
521 const char* cmd,
522 const char* const srcArgv[],
523 const char* const substitutions[][2],
524 size_t numSubstitutions)
525 {
526 if (!cmd || !srcArgv) {
527 return false;
528 }
529
530 // Count the maximum number of arguments needed.
531 size_t n = 1;
532 for (const char *const* i = srcArgv; *i; ++n, ++i) {
533 // Do nothing
534 }
535
536 // Make sure we don't have too many arguments.
537 if (n >= maxDstArgs) {
538 return false;
539 }
540
541 // Build the command line.
542 size_t j = 0;
543 for (size_t i = 0; i != n; ++i) {
544 if (asstreq(srcArgv[i], "$cmd")) {
545 dstArgv[j++] = cmd;
546 }
547 else {
548 dstArgv[j] = srcArgv[i];
549 for (size_t k = 0; k != numSubstitutions; ++k) {
550 if (asstreq(srcArgv[i], substitutions[k][0])) {
551 dstArgv[j] = substitutions[k][1];
552 break;
553 }
554 }
555 ++j;
556 }
557 }
558 dstArgv[j] = nullptr;
559
560 return true;
561 }
562
563 #if !defined(ARCH_OS_WINDOWS)
564 /* We use a 'non-locking' fork so that we won't get hung up if we've
565 * had malloc corruption when we crash. The crash recovery behavior
566 * can be tested with ArchTestCrash(), which should crash with this
567 * malloc corruption.
568 */
569 static int
nonLockingFork()570 nonLockingFork()
571 {
572 if (Arch_nonLockingFork != NULL) {
573 return (Arch_nonLockingFork)();
574 }
575 return fork();
576 }
577 #endif
578
579 #if defined(ARCH_OS_LINUX) || defined(ARCH_OS_FREEBSD)
580 static int
nonLockingLinux__execve(const char * file,char * const argv[],char * const envp[])581 nonLockingLinux__execve (const char *file,
582 char *const argv[],
583 char *const envp[])
584 {
585 /*
586 * We make a direct system call here, because we can't find an
587 * execve which corresponds with the non-locking fork we call
588 * (__libc_fork().)
589 *
590 * This code doesn't mess with other threads, and avoids the bug
591 * that calling regular execv after the nonLockingFork() causes
592 * hangs in a threaded app. (We use the non-locking fork to get
593 * around problems with forking when we have had memory
594 * corruption.) whew.
595 */
596
597 unsigned long result;
598
599 #if defined (ARCH_CPU_ARM)
600 {
601 register long __file_result asm ("x0") = (long)file;
602 register char* const* __argv asm ("x1") = argv;
603 register char* const* __envp asm ("x2") = envp;
604 register long __num_execve asm ("x8") = 221;
605 __asm__ __volatile__ (
606 "svc 0"
607 : "=r" (__file_result)
608 : "r"(__num_execve), "r" (__file_result), "r" (__argv), "r" (__envp)
609 : "memory"
610 );
611 result = __file_result;
612 }
613 #elif defined(ARCH_CPU_INTEL) && defined(ARCH_BITS_64)
614
615 /*
616 * %rdi, %rsi, %rdx, %rcx, %r8, %r9 are args 0-5
617 * syscall clobbers %rcx and %r11
618 *
619 * why do we put args 1, 2 into cx, dx and then move them?
620 * because it doesn't work if you directly specify them as
621 * constraints to gcc.
622 */
623
624 __asm__ __volatile__ (
625 "mov %0, %%rdi \n\t"
626 "mov %%rcx, %%rsi \n\t"
627 "mov %%rdx, %%rdx \n\t"
628 "mov $0x3b, %%rax \n\t"
629 "syscall \n\t"
630 : "=a" (result)
631 : "0" (file), "c" (argv), "d" (envp)
632 : "memory", "cc", "r11"
633 );
634 #else
635 #error Unknown architecture
636 #endif
637
638 if (result >= 0xfffffffffffff000) {
639 errno = -result;
640 result = (unsigned int)-1;
641 }
642
643 return result;
644 }
645
646 #endif
647
648 #if !defined(ARCH_OS_WINDOWS)
649 /* This is the corresponding execv which works with nonLockingFork().
650 * currently, it's only different from execv for linux. The crash
651 * recovery behavior can be tested with ArchTestCrash().
652 */
653 static int
nonLockingExecv(const char * path,char * const argv[])654 nonLockingExecv(const char *path, char *const argv[])
655 {
656 #if defined(ARCH_OS_LINUX)
657 return nonLockingLinux__execve (path, argv, __environ);
658 #else
659 return execv(path, argv);
660 #endif
661 }
662 #endif
663
664 /*
665 * Return the base of a filename.
666 */
667
668 static std::string
getBase(const char * path)669 getBase(const char* path)
670 {
671 #if defined(ARCH_OS_WINDOWS)
672 const std::string tmp = path;
673 std::string::size_type i = tmp.find_last_of("/\\");
674 if (i != std::string::npos) {
675 std::string::size_type j = tmp.find(".exe");
676 if (j != std::string::npos) {
677 return tmp.substr(i + 1, j - i - 1);
678 }
679 return tmp.substr(i + 1);
680 }
681 return tmp;
682 #else
683 const char* base = strrchr(path, '/');
684 if (!base)
685 return path;
686
687 base++;
688 return strlen(base) > 0 ? base : path;
689 #endif
690 }
691
692 } // anonymous namespace
693
694 /*
695 * Run an external program to write post-mortem information to logfile for
696 * process pid. This waits until the program completes.
697 *
698 * This is an internal function used by ArchLogPostMortem(). It must call
699 * only async-safe functions.
700 */
701
702 static
_LogStackTraceForPid(const char * logfile)703 int _LogStackTraceForPid(const char *logfile)
704 {
705 // Get the command to run.
706 const char* cmd = asgetenv("ARCH_POSTMORTEM");
707 if (!cmd) {
708 cmd = stackTraceCmd;
709 }
710 if (!cmd || !stackTraceArgv) {
711 // Silently do nothing.
712 return 0;
713 }
714
715 // Construct the substitutions.
716 char pidBuffer[numericBufferSize], timeBuffer[numericBufferSize];
717 asitoa(pidBuffer, getpid());
718 asitoa(timeBuffer, _GetAppElapsedTime());
719 const char* const substitutions[3][2] = {
720 { "$pid", pidBuffer }, { "$log", logfile }, { "$time", timeBuffer }
721 };
722
723 // Build the argument list.
724 static constexpr size_t maxArgs = 32;
725 const char* argv[maxArgs];
726 if (!_MakeArgv(argv, maxArgs, cmd, stackTraceArgv, substitutions, 2)) {
727 static const char msg[] = "Too many arguments to postmortem command\n";
728 aswrite(2, msg);
729 return 0;
730 }
731
732 // Invoke the command.
733 ArchCrashHandlerSystemv(argv[0], (char *const*)argv,
734 300 /* wait up to 300 seconds */ , NULL, NULL);
735 return 1;
736 }
737
738 void
ArchSetPostMortem(const char * command,const char * const argv[])739 ArchSetPostMortem(const char* command, const char *const argv[] )
740 {
741 stackTraceCmd = command;
742 stackTraceArgv = argv;
743 }
744
745 /*
746 * Arch_SetAppLaunchTime()
747 * -------------------------------
748 * Stores the current time as the application's launch time.
749 * This function is internal.
750 */
751 ARCH_HIDDEN
752 void
Arch_SetAppLaunchTime()753 Arch_SetAppLaunchTime()
754 {
755 _appLaunchTime = time(NULL);
756 }
757
758 /*
759 * ArchGetAppLaunchTime()
760 * -------------------------------
761 * Returns the application's launch time, or NULL if a timestamp hasn't
762 * been created with AchSetAppLaunchTime().
763 */
764 time_t
ArchGetAppLaunchTime()765 ArchGetAppLaunchTime()
766 {
767 // Defaults to NULL
768 return _appLaunchTime;
769 }
770
771 /*
772 * ArchSetFatalStackLogging()
773 * -------------------------------
774 * This enables the logging of the stack trace and other build
775 * information upon intercepting a crash.
776 *
777 * This function can be called from python.
778 */
779 void
ArchSetFatalStackLogging(bool flag)780 ArchSetFatalStackLogging( bool flag )
781 {
782 _shouldLogStackToDb = flag;
783 }
784
785 /*
786 * ArchGetFatalStackLogging()
787 * ---------------------------
788 * Returns the current value of the logging flag.
789 *
790 * This function can be called from python.
791 */
792 bool
ArchGetFatalStackLogging()793 ArchGetFatalStackLogging()
794 {
795 return _shouldLogStackToDb;
796 }
797
798 void
ArchSetProgramInfoForErrors(const std::string & key,const std::string & value)799 ArchSetProgramInfoForErrors(const std::string& key,
800 const std::string& value)
801 {
802 ArchStackTrace_GetProgInfo().SetProgramInfoForErrors(key, value);
803 }
804
805 std::string
ArchGetProgramInfoForErrors(const std::string & key)806 ArchGetProgramInfoForErrors(const std::string& key)
807 {
808 return ArchStackTrace_GetProgInfo().GetProgramInfoForErrors(key);
809 }
810
811 void
ArchSetExtraLogInfoForErrors(const std::string & key,std::vector<std::string> const * lines)812 ArchSetExtraLogInfoForErrors(const std::string &key,
813 std::vector<std::string> const *lines)
814 {
815 ArchStackTrace_GetLogInfo().SetExtraLogInfoForErrors(key, lines);
816 }
817
818 /*
819 * ArchSetProgramNameForErrors
820 * ---------------------------
821 * Set's the program name that is to be used for diagnostic output.
822 */
823 void
ArchSetProgramNameForErrors(const char * progName)824 ArchSetProgramNameForErrors( const char *progName )
825 {
826
827 if (_progNameForErrors)
828 free(_progNameForErrors);
829
830 if (progName)
831 _progNameForErrors = strdup(getBase(progName).c_str());
832 else
833 _progNameForErrors = NULL;
834 }
835
836 /*
837 * ArchGetProgramNameForErrors
838 * ----------------------------
839 * Returns the currently set program name used for
840 * reporting error information. Returns "libArch"
841 * if a value hasn't been set.
842 */
843 const char *
ArchGetProgramNameForErrors()844 ArchGetProgramNameForErrors()
845 {
846 if (_progNameForErrors)
847 return _progNameForErrors;
848
849 return "libArch";
850 }
851
852 #if defined(ARCH_OS_WINDOWS)
853 static long
_GetAppElapsedTime()854 _GetAppElapsedTime()
855 {
856 FILETIME starttime;
857 FILETIME exittime;
858 FILETIME kerneltime;
859 FILETIME usertime;
860 ULARGE_INTEGER li;
861
862 if (::GetProcessTimes(GetCurrentProcess(),
863 &starttime, &exittime, &kerneltime, &usertime) == 0) {
864 ARCH_WARNING("_GetAppElapsedTime failed");
865 return 0L;
866 }
867 memcpy(&li, &usertime, sizeof(FILETIME));
868 return static_cast<long>(li.QuadPart / 10000000ULL);
869 }
870 #else
871 static long
_GetAppElapsedTime()872 _GetAppElapsedTime()
873 {
874 rusage ru;
875
876 // We only record the amount of time spent in user instructions,
877 // so as to discount idle time when logging up time.
878 if (getrusage(RUSAGE_SELF, &ru) == 0) {
879 return long(ru.ru_utime.tv_sec);
880 }
881
882 // Fallback to logging the entire session time, if we could
883 // not get the user time from the resource usage.
884
885 // Note: Total time measurement will be a little off because this
886 // calculation happens after the stack trace is generated which can
887 // take a long time.
888 //
889 return long(time(0) - _appLaunchTime);
890 }
891 #endif
892
893 static void
_InvokeSessionLogger(const char * progname,const char * stackTrace)894 _InvokeSessionLogger(const char* progname, const char *stackTrace)
895 {
896 // Get the command to run.
897 const char* cmd = asgetenv("ARCH_LOGSESSION");
898 const char* const* srcArgv =
899 stackTrace ? _sessionCrashLogArgv : _sessionLogArgv;
900 if (!cmd) {
901 cmd = _logStackToDbCmd;
902 }
903 if (!cmd || !srcArgv) {
904 // Silently do nothing.
905 return;
906 }
907
908 // Construct the substitutions.
909 char pidBuffer[numericBufferSize], timeBuffer[numericBufferSize];
910 asitoa(pidBuffer, getpid());
911 asitoa(timeBuffer, _GetAppElapsedTime());
912 const char* const substitutions[4][2] = {
913 {"$pid", pidBuffer}, {"$time", timeBuffer},
914 {"$prog", progname}, {"$stack", stackTrace}
915 };
916
917 // Build the argument list.
918 static constexpr size_t maxArgs = 32;
919 const char* argv[maxArgs];
920 if (!_MakeArgv(argv, maxArgs, cmd, srcArgv, substitutions, 4)) {
921 static const char msg[] = "Too many arguments to log session command\n";
922 aswrite(2, msg);
923 return;
924 }
925
926 // Invoke the command.
927 ArchCrashHandlerSystemv(argv[0], (char *const*)argv,
928 60 /* wait up to 60 seconds */, NULL, NULL);
929 }
930
931 /*
932 * '_FinishLoggingFatalStackTrace' appends the sessionLog
933 * to the stackTrace, and then calls an external program to add it
934 * to the stack_trace database table.
935 */
936 static void
_FinishLoggingFatalStackTrace(const char * progname,const char * stackTrace,const char * sessionLog,bool crashingHard)937 _FinishLoggingFatalStackTrace(const char *progname, const char *stackTrace,
938 const char *sessionLog, bool crashingHard)
939 {
940 if (!crashingHard && sessionLog) {
941 // If we were given a session log, cat it to the end of the stack.
942 if (FILE* stackFd = ArchOpenFile(stackTrace, "a")) {
943 if (FILE* sessionLogFd = ArchOpenFile(sessionLog, "r")) {
944 fputs("\n\n********** Session Log **********\n\n", stackFd);
945 // Cat the session log
946 char line[4096];
947 while (fgets(line, 4096, sessionLogFd)) {
948 fputs(line, stackFd);
949 }
950 fclose(sessionLogFd);
951 }
952 fclose(stackFd);
953 }
954 }
955
956 // Add trace to database if _shouldLogStackToDb is true
957 if (_shouldLogStackToDb)
958 {
959 _InvokeSessionLogger(progname, stackTrace);
960 }
961
962 }
963
964 void
ArchLogSessionInfo(const char * crashStackTrace)965 ArchLogSessionInfo(const char *crashStackTrace)
966 {
967 if (_shouldLogStackToDb)
968 {
969 _InvokeSessionLogger(ArchGetProgramNameForErrors(), crashStackTrace);
970 }
971 }
972
973 void
ArchSetLogSession(const char * command,const char * const argv[],const char * const crashArgv[])974 ArchSetLogSession(
975 const char* command,
976 const char* const argv[],
977 const char* const crashArgv[])
978 {
979 _logStackToDbCmd = command;
980 _sessionLogArgv = argv;
981 _sessionCrashLogArgv = crashArgv;
982 }
983
984 /*
985 * Run an external program to make a report and tell the user where the report
986 * file is.
987 *
988 * Use of char*'s is deliberate: only async-safe calls allowed past this point!
989 */
990 void
ArchLogPostMortem(const char * reason,const char * message,const char * extraLogMsg)991 ArchLogPostMortem(const char* reason,
992 const char* message /* = nullptr */,
993 const char* extraLogMsg /* = nullptr */)
994 {
995 static std::atomic_flag busy = ATOMIC_FLAG_INIT;
996
997 // Disallow recursion and allow only one thread at a time.
998 while (busy.test_and_set(std::memory_order_acquire)) {
999 // Spin!
1000 std::this_thread::yield();
1001 }
1002
1003 const char* progname = ArchGetProgramNameForErrors();
1004
1005 // If we can attach a debugger then just exit here.
1006 if (ArchDebuggerAttach()) {
1007 ARCH_DEBUGGER_TRAP;
1008 _exit(0);
1009 }
1010
1011 /* Could use tmpnam but we're trying to be minimalist here. */
1012 char logfile[1024];
1013 if (_GetStackTraceName(logfile, sizeof(logfile)) == -1) {
1014 // Cannot create the logfile.
1015 static const char msg[] = "Cannot create a log file\n";
1016 aswrite(2, msg);
1017 busy.clear(std::memory_order_release);
1018 return;
1019 }
1020
1021 // Write reason for stack trace to logfile.
1022 if (FILE* stackFd = ArchOpenFile(logfile, "a")) {
1023 if (reason) {
1024 fputs("This stack trace was requested because: ", stackFd);
1025 fputs(reason, stackFd);
1026 fputs("\n", stackFd);
1027 }
1028 if (message) {
1029 fputs(message, stackFd);
1030 fputs("\n", stackFd);
1031 }
1032 ArchStackTrace_GetLogInfo().EmitAnyExtraLogInfo(stackFd);
1033 if (extraLogMsg) {
1034 fputs(extraLogMsg, stackFd);
1035 fputs("\n", stackFd);
1036 }
1037 fputs("\nPostmortem Stack Trace\n", stackFd);
1038 fclose(stackFd);
1039 }
1040
1041 /* get hostname for printing out in the error message only */
1042 char hostname[MAXHOSTNAMELEN];
1043 if (gethostname(hostname,MAXHOSTNAMELEN) != 0) {
1044 /* error getting hostname; don't try to print it */
1045 hostname[0] = '\0';
1046 }
1047
1048 auto printNDashes = [](int nDashes) {
1049 const char *dash64 =
1050 "----------------------------------------------------------------";
1051 int dividend = nDashes / 64;
1052 int remainder = nDashes % 64;
1053 while (dividend--) {
1054 fputs(dash64, stderr);
1055 }
1056 fputs(dash64 + 64 - remainder, stderr);
1057 };
1058
1059 const char *haltMsg = " terminated";
1060 int labelSize = strlen(progname) + strlen(haltMsg);
1061 int bannerSize = std::max<int>(80, labelSize + strlen("-- ") * 2);
1062
1063 fputs("\n", stderr);
1064 int numLeadingDashes = (bannerSize - labelSize) / 2 - 1;
1065 printNDashes(numLeadingDashes);
1066 fputs(" ", stderr);
1067 fputs(progname, stderr);
1068 fputs(haltMsg, stderr);
1069 fputs(" ", stderr);
1070 printNDashes(bannerSize - numLeadingDashes - labelSize - 2);
1071 fputs("\n", stderr);
1072
1073 // print out any registered program info
1074 {
1075 ArchStackTrace_GetProgInfo().PrintInfoForErrors();
1076 }
1077
1078 if (reason) {
1079 fputs("This stack trace was requested because: ", stderr);
1080 fputs(reason, stderr);
1081 fputs("\n", stderr);
1082 }
1083 if (message) {
1084 fputs(message, stderr);
1085 fputs("\n", stderr);
1086 }
1087
1088 fputs("writing crash report to [ ", stderr);
1089 fputs(hostname, stderr);
1090 fputs(":", stderr);
1091 fputs(logfile, stderr);
1092 fputs(" ] ...", stderr);
1093 fflush(stderr);
1094
1095 int loggedStack = _LogStackTraceForPid(logfile);
1096 fputs(" done.\n", stderr);
1097 // Additionally, print the first few lines of extra log information since
1098 // developers don't always think to look for it in the stack trace file.
1099 ArchStackTrace_GetLogInfo().EmitAnyExtraLogInfo(stderr, 3 /* max */);
1100 printNDashes(bannerSize);
1101 fputs("\n", stderr);
1102
1103 if (loggedStack) {
1104 _FinishLoggingFatalStackTrace(progname, logfile, NULL /*session log*/,
1105 true /* crashing hard? */);
1106 }
1107
1108 busy.clear(std::memory_order_release);
1109 }
1110
1111 /*
1112 * Write a stack trace to a file, without forking.
1113 */
1114 void
ArchLogStackTrace(const std::string & reason,bool fatal,const string & sessionLog)1115 ArchLogStackTrace(const std::string& reason, bool fatal,
1116 const string &sessionLog)
1117 {
1118 ArchLogStackTrace(ArchGetProgramNameForErrors(), reason, fatal,
1119 sessionLog);
1120 }
1121
1122 /*
1123 * Write a stack trace to a file, without forking.
1124 *
1125 * Note: use of mktemp is not threadsafe.
1126 */
1127 void
ArchLogStackTrace(const std::string & progname,const std::string & reason,bool fatal,const string & sessionLog)1128 ArchLogStackTrace(const std::string& progname, const std::string& reason,
1129 bool fatal, const string &sessionLog)
1130 {
1131 string tmpFile;
1132 int fd = ArchMakeTmpFile(ArchStringPrintf("%s_%s",
1133 stackTracePrefix,
1134 ArchGetProgramNameForErrors()),
1135 &tmpFile);
1136
1137 /* get hostname for printing out in the error message only */
1138 char hostname[MAXHOSTNAMELEN];
1139 if (gethostname(hostname,MAXHOSTNAMELEN) != 0) {
1140 hostname[0]= '\0';
1141 }
1142
1143 fprintf(stderr,
1144 "--------------------------------------------------------------\n"
1145 "A stack trace has been requested by %s because of %s\n",
1146 progname.c_str(), reason.c_str());
1147
1148 // print out any registered program info
1149 {
1150 ArchStackTrace_GetProgInfo().PrintInfoForErrors();
1151 }
1152
1153 if (fd != -1) {
1154 FILE* fout = ArchFdOpen(fd, "w");
1155 fprintf(stderr, "The stack can be found in %s:%s\n"
1156 "--------------------------------------------------------------"
1157 "\n", hostname, tmpFile.c_str());
1158 ArchPrintStackTrace(fout, progname, reason);
1159 /* If this is a fatal stack trace, attempt to add it to the db */
1160 if (fatal) {
1161 ArchStackTrace_GetLogInfo().EmitAnyExtraLogInfo(fout);
1162 }
1163 fclose(fout);
1164 if (fatal) {
1165 _FinishLoggingFatalStackTrace(progname.c_str(), tmpFile.c_str(),
1166 sessionLog.empty() ?
1167 NULL : sessionLog.c_str(),
1168 false /* crashing hard? */);
1169 }
1170 }
1171 else {
1172 /* we couldn't open the tmp file, so write the stack trace to stderr */
1173 fprintf(stderr,
1174 "--------------------------------------------------------------"
1175 "\n");
1176 ArchPrintStackTrace(stderr, progname, reason);
1177 ArchStackTrace_GetLogInfo().EmitAnyExtraLogInfo(stderr);
1178 }
1179 fprintf(stderr,
1180 "--------------------------------------------------------------\n");
1181 }
1182
1183 #if defined(ARCH_OS_DARWIN)
1184
1185 /*
1186 * This function will use _LogStackTraceForPid(const char*), which uses
1187 * the stacktrace script, to log the stack to a file. Then it reads the lines
1188 * back in and puts them into an output iterator.
1189 */
1190 template <class OutputIterator>
1191 static void
_LogStackTraceToOutputIterator(OutputIterator oi,size_t maxDepth,bool addEndl)1192 _LogStackTraceToOutputIterator(OutputIterator oi, size_t maxDepth, bool addEndl)
1193 {
1194 /* Could use tmpnam but we're trying to be minimalist here. */
1195 char logfile[1024];
1196 _GetStackTraceName(logfile, sizeof(logfile));
1197
1198 _LogStackTraceForPid(logfile);
1199
1200 ifstream inFile(logfile);
1201 string line;
1202 size_t currentDepth = 0;
1203 while(!inFile.eof() && currentDepth < maxDepth) {
1204 getline(inFile, line);
1205 if(addEndl && !inFile.eof())
1206 line += "\n";
1207 *oi++ = line;
1208 currentDepth ++;
1209 }
1210
1211 inFile.close();
1212 ArchUnlinkFile(logfile);
1213 }
1214
1215 #endif
1216
1217 /*
1218 * ArchPrintStackTrace
1219 * print out a stack trace to the given FILE *.
1220 */
1221 void
ArchPrintStackTrace(FILE * fout,const std::string & programName,const std::string & reason)1222 ArchPrintStackTrace(FILE *fout, const std::string& programName, const std::string& reason)
1223 {
1224 ostringstream oss;
1225
1226 ArchPrintStackTrace(oss, programName, reason);
1227
1228 if (fout == NULL) {
1229 fout = stderr;
1230 }
1231
1232 fprintf(fout, "%s", oss.str().c_str());
1233 fflush(fout);
1234 }
1235
1236 void
ArchPrintStackTrace(FILE * fout,const std::string & reason)1237 ArchPrintStackTrace(FILE* fout, const std::string& reason)
1238 {
1239 ArchPrintStackTrace(fout, ArchGetProgramNameForErrors(), reason);
1240 }
1241
1242 void
ArchPrintStackTrace(std::ostream & out,const std::string & reason)1243 ArchPrintStackTrace(std::ostream& out, const std::string& reason)
1244 {
1245 ArchPrintStackTrace(out, ArchGetProgramNameForErrors(), reason);
1246 }
1247
1248 /*
1249 * ArchPrintStackTrace
1250 * print out a stack trace to the given ostream.
1251 *
1252 * This function should probably not be called from a signal handler as
1253 * it calls printf and other unsafe functions.
1254 */
1255 void
ArchPrintStackTrace(ostream & oss,const std::string & programName,const std::string & reason)1256 ArchPrintStackTrace(ostream& oss,
1257 const std::string& programName,
1258 const std::string& reason)
1259 {
1260 oss << "==============================================================\n"
1261 << " A stack trace has been requested by "
1262 << programName << " because: " << reason << endl;
1263
1264 #if defined(ARCH_OS_DARWIN)
1265
1266 _LogStackTraceToOutputIterator(ostream_iterator<string>(oss), numeric_limits<size_t>::max(), true);
1267
1268 #else
1269
1270 vector<uintptr_t> frames;
1271 ArchGetStackFrames(MAX_STACK_DEPTH, &frames);
1272 ArchPrintStackFrames(oss, frames);
1273
1274 #endif
1275
1276 oss << "==============================================================\n";
1277 }
1278
1279 void
ArchGetStackTrace(ostream & oss,const std::string & reason)1280 ArchGetStackTrace(ostream& oss, const std::string& reason)
1281 {
1282 ArchPrintStackTrace(oss, ArchGetProgramNameForErrors(), reason);
1283 }
1284
1285 void
ArchGetStackFrames(size_t maxDepth,vector<uintptr_t> * frames)1286 ArchGetStackFrames(size_t maxDepth, vector<uintptr_t> *frames)
1287 {
1288 ArchGetStackFrames(maxDepth, /* skip = */ 0, frames);
1289 }
1290
1291 #if defined(ARCH_OS_LINUX) && defined(ARCH_BITS_64)
1292 struct Arch_UnwindContext {
1293 public:
Arch_UnwindContextArch_UnwindContext1294 Arch_UnwindContext(size_t inMaxdepth, size_t inSkip,
1295 vector<uintptr_t>* inFrames) :
1296 maxdepth(inMaxdepth), skip(inSkip), frames(inFrames) { }
1297
1298 public:
1299 size_t maxdepth;
1300 size_t skip;
1301 vector<uintptr_t>* frames;
1302 };
1303
1304 static _Unwind_Reason_Code
Arch_unwindcb(struct _Unwind_Context * ctx,void * data)1305 Arch_unwindcb(struct _Unwind_Context *ctx, void *data)
1306 {
1307 Arch_UnwindContext* context = static_cast<Arch_UnwindContext*>(data);
1308
1309 // never extend frames because it is unsafe to alloc inside a
1310 // signal handler, and this function is called sometimes (when
1311 // profiling) from a signal handler.
1312 if (context->frames->size() >= context->maxdepth) {
1313 return _URC_END_OF_STACK;
1314 }
1315 else {
1316 if (context->skip > 0) {
1317 --context->skip;
1318 }
1319 else {
1320 context->frames->push_back(_Unwind_GetIP(ctx));
1321 }
1322 return _URC_NO_REASON;
1323 }
1324 }
1325
1326 /*
1327 * ArchGetStackFrames
1328 * save some of stack into buffer.
1329 */
1330 void
ArchGetStackFrames(size_t maxdepth,size_t skip,vector<uintptr_t> * frames)1331 ArchGetStackFrames(size_t maxdepth, size_t skip, vector<uintptr_t> *frames)
1332 {
1333 /* use the exception handling mechanism to unwind our stack.
1334 * note this is gcc >= 3.3.3 only.
1335 */
1336 frames->reserve(maxdepth);
1337 Arch_UnwindContext context(maxdepth, skip, frames);
1338 _Unwind_Backtrace(Arch_unwindcb, (void*)&context);
1339 }
1340
1341 #elif defined(ARCH_OS_WINDOWS)
1342
1343 void
ArchGetStackFrames(size_t maxdepth,size_t skip,vector<uintptr_t> * frames)1344 ArchGetStackFrames(size_t maxdepth, size_t skip, vector<uintptr_t> *frames)
1345 {
1346 void* stack[MAX_STACK_DEPTH];
1347 size_t frameCount = CaptureStackBackTrace(skip, MAX_STACK_DEPTH, stack, NULL);
1348 frameCount = std::min(frameCount, maxdepth);
1349 frames->reserve(frameCount);
1350 for (size_t frame = 0; frame < frameCount; ++frame) {
1351 frames->push_back(reinterpret_cast<uintptr_t>(stack[frame]));
1352 }
1353 }
1354
1355 #elif defined(ARCH_OS_DARWIN)
1356
1357 void
ArchGetStackFrames(size_t maxdepth,size_t skip,vector<uintptr_t> * frames)1358 ArchGetStackFrames(size_t maxdepth, size_t skip, vector<uintptr_t> *frames)
1359 {
1360 void* stack[MAX_STACK_DEPTH];
1361 const size_t frameCount =
1362 backtrace(stack, std::max((size_t)MAX_STACK_DEPTH, maxdepth));
1363 frames->reserve(frameCount);
1364 for (size_t frame = skip; frame < frameCount; ++frame) {
1365 frames->push_back(reinterpret_cast<uintptr_t>(stack[frame]));
1366 }
1367 }
1368
1369 #else
1370
1371 void
ArchGetStackFrames(size_t,size_t,vector<uintptr_t> *)1372 ArchGetStackFrames(size_t, size_t, vector<uintptr_t> *)
1373 {
1374 }
1375
1376 #endif
1377
1378 static
1379 std::string
Arch_DefaultStackTraceCallback(uintptr_t address)1380 Arch_DefaultStackTraceCallback(uintptr_t address)
1381 {
1382 // Subtract one from the address before getting the info because
1383 // the stack frames have the addresses where we'll return to,
1384 // not where we called from. We don't want the info for the
1385 // instruction after our calls, we want it for the call itself.
1386 // We don't need the exact address of the call because
1387 // ArchGetAddressInfo() will return the info for the closest
1388 // address is knows about that not after the given address.
1389 // (That's good because the address minus one is not the start
1390 // of the call instruction but there's no way to figure that out
1391 // here without decoding assembly instructions.)
1392 std::string objectPath, symbolName;
1393 void* baseAddress, *symbolAddress;
1394 if (ArchGetAddressInfo(reinterpret_cast<void*>(address - 1),
1395 &objectPath, &baseAddress,
1396 &symbolName, &symbolAddress) && symbolAddress) {
1397 Arch_DemangleFunctionName(&symbolName);
1398 const uintptr_t symbolOffset =
1399 (uint64_t)(address - (uintptr_t)symbolAddress);
1400 return ArchStringPrintf("%s+%#0lx", symbolName.c_str(), symbolOffset);
1401 }
1402 else {
1403 return "<unknown>";
1404 }
1405 }
1406
1407 static
1408 vector<string>
1409 Arch_GetStackTrace(const vector<uintptr_t> &frames,
1410 bool skipUnknownFrames=false);
1411
1412 /*
1413 * ArchPrintStackFrames
1414 * print out stack frames to the given ostream.
1415 */
1416 void
ArchPrintStackFrames(ostream & oss,const vector<uintptr_t> & frames,bool skipUnknownFrames)1417 ArchPrintStackFrames(ostream& oss, const vector<uintptr_t> &frames,
1418 bool skipUnknownFrames)
1419 {
1420 const vector<string> result = Arch_GetStackTrace(frames, skipUnknownFrames);
1421 for (size_t i = 0; i < result.size(); i++) {
1422 oss << result[i] << std::endl;
1423 }
1424 }
1425
1426 /*
1427 * ArchGetStackTrace
1428 * vector of strings
1429 */
1430 vector<string>
ArchGetStackTrace(size_t maxDepth)1431 ArchGetStackTrace(size_t maxDepth)
1432 {
1433 vector<uintptr_t> frames;
1434 ArchGetStackFrames(maxDepth, &frames);
1435 return Arch_GetStackTrace(frames);
1436 }
1437
1438
1439 static
1440 ArchStackTraceCallback*
Arch_GetStackTraceCallback()1441 Arch_GetStackTraceCallback()
1442 {
1443 static ArchStackTraceCallback callback;
1444 return &callback;
1445 }
1446
1447 static vector<string>
Arch_GetStackTrace(const vector<uintptr_t> & frames,bool skipUnknownFrames)1448 Arch_GetStackTrace(const vector<uintptr_t> &frames,
1449 bool skipUnknownFrames)
1450 {
1451 vector<string> rv;
1452
1453 if (frames.empty()) {
1454 rv.push_back("No frames saved, stack traces probably not supported "
1455 "on this architecture.");
1456 return rv;
1457 }
1458
1459 ArchStackTraceCallback callback = *Arch_GetStackTraceCallback();
1460 if (!callback) {
1461 callback = Arch_DefaultStackTraceCallback;
1462 }
1463 int n = 0;
1464 for (size_t i = 0; i < frames.size(); i++) {
1465 const std::string symbolic = callback(frames[i]);
1466 if (skipUnknownFrames && symbolic == "<unknown>") {
1467 continue;
1468 }
1469 rv.push_back(ArchStringPrintf(" #%-3i 0x%016lx in %s",
1470 n++, frames[i], symbolic.c_str()));
1471 }
1472
1473 return rv;
1474 }
1475
1476 void
ArchSetStackTraceCallback(const ArchStackTraceCallback & cb)1477 ArchSetStackTraceCallback(const ArchStackTraceCallback& cb)
1478 {
1479 *Arch_GetStackTraceCallback() = cb;
1480 }
1481
1482 void
ArchGetStackTraceCallback(ArchStackTraceCallback * cb)1483 ArchGetStackTraceCallback(ArchStackTraceCallback* cb)
1484 {
1485 if (cb) {
1486 *cb = *Arch_GetStackTraceCallback();
1487 }
1488 }
1489
1490 static void
archAlarmHandler(int)1491 archAlarmHandler(int /*sig */)
1492 {
1493 /* do nothing. we just have to wake up. */
1494 }
1495
1496 /*
1497 * Replacement for 'system' safe for a crash handler
1498 *
1499 * This function is a substitute for system() which does not allocate
1500 * or free any data, and times out after timeout seconds if the
1501 * operation in argv is not complete. callback is called every
1502 * second. userData is passed to callback. callback can be used,
1503 * for example, to print a '.' repeatedly to show progress. The alarm
1504 * used in this function could interfere with setitimer or other calls
1505 * to alarm, and this function uses non-locking fork and exec if available
1506 * so should not generally be used except following a catastrophe.
1507 */
1508 int
ArchCrashHandlerSystemv(const char * pathname,char * const argv[],int timeout,ArchCrashHandlerSystemCB callback,void * userData)1509 ArchCrashHandlerSystemv(const char* pathname, char *const argv[],
1510 int timeout, ArchCrashHandlerSystemCB callback,
1511 void* userData)
1512 {
1513 #if defined(ARCH_OS_WINDOWS)
1514 fprintf(stderr, "ArchCrashHandlerSystemv unimplemented for Windows\n");
1515 return -1;
1516 #else
1517 struct sigaction act, oldact;
1518 int retval = 0;
1519 int savedErrno;
1520 pid_t pid = nonLockingFork(); /* use non-locking fork */
1521 if (pid == -1) {
1522 /* fork() failed */
1523 char errBuffer[numericBufferSize];
1524 asitoa(errBuffer, errno);
1525 aswrite(2, "FAIL: Unable to fork() crash handler: errno=");
1526 aswrite(2, errBuffer);
1527 aswrite(2, "\n");
1528 return -1;
1529 }
1530 else if (pid == 0) {
1531 // Call setsid() in the child, which is intended to start a new
1532 // "session", and detach from the controlling tty. We do this because
1533 // the stack tracing stuff invokes gdb, which wants to fiddle with the
1534 // tty, and if we're run in the background, that blocks, so we hang
1535 // trying to take the stacktrace. This seems to fix that.
1536 //
1537 // If standard input is not a TTY then skip this. This ensures
1538 // the child is part of the same process group as this process,
1539 // which is important on the renderfarm.
1540 if (isatty(0)) {
1541 setsid();
1542 }
1543
1544 // Exec the handler.
1545 nonLockingExecv(pathname, argv);
1546
1547 /* Exec failed */
1548 char errBuffer[numericBufferSize];
1549 asitoa(errBuffer, errno);
1550 aswrite(2, "FAIL: Unable to exec crash handler ");
1551 aswrite(2, pathname);
1552 aswrite(2, ": errno=");
1553 aswrite(2, errBuffer);
1554 aswrite(2, "\n");
1555 _exit(127);
1556 }
1557 else {
1558 int delta = 0;
1559 sigemptyset(&act.sa_mask);
1560 # if defined(SA_INTERRUPT)
1561 act.sa_flags = SA_INTERRUPT;
1562 # else
1563 act.sa_flags = 0;
1564 # endif
1565 act.sa_handler = &archAlarmHandler;
1566 sigaction(SIGALRM, &act, &oldact);
1567
1568 /* loop until timeout seconds have passed */
1569 do {
1570 int status;
1571 pid_t child;
1572
1573 /* a timeout <= 0 means forever */
1574 if (timeout > 0) {
1575 delta = 1; /* callback every delta seconds */
1576 alarm(delta);
1577 }
1578
1579 /* see what the child is up to */
1580 child = waitpid(pid, &status, 0 /* forever, unless interrupted */);
1581 if (child == (pid_t)-1) {
1582 /* waitpid error. return if not due to signal. */
1583 if (errno != EINTR) {
1584 retval = -1;
1585 char errBuffer[numericBufferSize];
1586 asitoa(errBuffer, errno);
1587 aswrite(2, "FAIL: Crash handler wait failed: errno=");
1588 aswrite(2, errBuffer);
1589 aswrite(2, "\n");
1590 goto out;
1591 }
1592 /* continue below */
1593 }
1594 else if (child != 0) {
1595 /* child finished */
1596 if (WIFEXITED(status)) {
1597 /* child exited successfully. it returned 127
1598 * if the exec() failed. we'll set errno to
1599 * ENOENT in that case though the actual error
1600 * could be something else. */
1601 retval = WEXITSTATUS(status);
1602 if (retval == 127) {
1603 errno = ENOENT;
1604 aswrite(2, "FAIL: Crash handler failed to exec\n");
1605 }
1606 goto out;
1607 }
1608
1609 if (WIFSIGNALED(status)) {
1610 /* child died due to uncaught signal */
1611 errno = EINTR;
1612 retval = -1;
1613 char sigBuffer[numericBufferSize];
1614 asitoa(sigBuffer, WTERMSIG(status));
1615 aswrite(2, "FAIL: Crash handler died: signal=");
1616 aswrite(2, sigBuffer);
1617 aswrite(2, "\n");
1618 goto out;
1619 }
1620 /* child died for an unknown reason */
1621 errno = EINTR;
1622 retval = -1;
1623 char statusBuffer[numericBufferSize];
1624 asitoa(statusBuffer, status);
1625 aswrite(2, "FAIL: Crash handler unexpected wait status=");
1626 aswrite(2, statusBuffer);
1627 aswrite(2, "\n");
1628 goto out;
1629 }
1630
1631 /* child is still going. invoke callback, countdown, and
1632 * wait again for next interrupt. */
1633 if (callback)
1634 callback(userData);
1635 timeout -= delta;
1636 } while (timeout > 0);
1637
1638 /* timed out. kill the child and wait for that. */
1639 alarm(0); /* turn off alarm so it doesn't wake us during kill */
1640 kill(pid, SIGKILL);
1641 waitpid(pid, NULL, 0);
1642
1643 /*
1644 * Set the errno to 'EBUSY' to imply that some resource was busy
1645 * and hence we're 'timing out'.
1646 */
1647 errno = EBUSY;
1648 retval = -1;
1649 aswrite(2, "FAIL: Crash handler timed out\n");
1650 }
1651
1652 out:
1653 savedErrno = errno;
1654 alarm(0);
1655 sigaction(SIGALRM, &oldact, NULL);
1656
1657 errno = savedErrno;
1658 return retval;
1659 #endif
1660 }
1661
1662 PXR_NAMESPACE_CLOSE_SCOPE
1663