1 //
2 // Copyright 2016 Pixar
3 //
4 // Licensed under the Apache License, Version 2.0 (the "Apache License")
5 // with the following modification; you may not use this file except in
6 // compliance with the Apache License and the following modification to it:
7 // Section 6. Trademarks. is deleted and replaced with:
8 //
9 // 6. Trademarks. This License does not grant permission to use the trade
10 //    names, trademarks, service marks, or product names of the Licensor
11 //    and its affiliates, except as required to comply with Section 4(c) of
12 //    the License and to reproduce the content of the NOTICE file.
13 //
14 // You may obtain a copy of the Apache License at
15 //
16 //     http://www.apache.org/licenses/LICENSE-2.0
17 //
18 // Unless required by applicable law or agreed to in writing, software
19 // distributed under the Apache License with the above modification is
20 // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 // KIND, either express or implied. See the Apache License for the specific
22 // language governing permissions and limitations under the Apache License.
23 //
24 #include "pxr/pxr.h"
25 #include "pxr/base/arch/defines.h"
26 #include "pxr/base/arch/stackTrace.h"
27 #include "pxr/base/arch/attributes.h"
28 #include "pxr/base/arch/debugger.h"
29 #include "pxr/base/arch/defines.h"
30 #include "pxr/base/arch/demangle.h"
31 #include "pxr/base/arch/env.h"
32 #include "pxr/base/arch/error.h"
33 #include "pxr/base/arch/errno.h"
34 #include "pxr/base/arch/export.h"
35 #include "pxr/base/arch/fileSystem.h"
36 #include "pxr/base/arch/inttypes.h"
37 #include "pxr/base/arch/symbols.h"
38 #include "pxr/base/arch/vsnprintf.h"
39 #if defined(ARCH_OS_WINDOWS)
40 #include <io.h>
41 #include <process.h>
42 #include <Winsock2.h>
43 #include <DbgHelp.h>
44 #ifndef MAXHOSTNAMELEN
45 #define MAXHOSTNAMELEN 64
46 #endif
47 #else
48 #include <dlfcn.h>
49 #include <netdb.h>
50 #include <unistd.h>
51 #include <sys/param.h>
52 #include <sys/resource.h>
53 #include <sys/wait.h>
54 #endif
55 #include <algorithm>
56 #include <atomic>
57 #include <fstream>
58 #include <ostream>
59 #include <iterator>
60 #include <limits>
61 #include <cstdlib>
62 #include <errno.h>
63 #include <signal.h>
64 #include <sys/types.h>
65 #include <cstdio>
66 #include <cstring>
67 #include <mutex>
68 #include <thread>
69 
70 /* Darwin/ppc did not do stack traces.  Darwin/i386 still
71    needs some work, this has been stubbed out for now.  */
72 
73 #if defined(ARCH_OS_LINUX) || defined(ARCH_OS_FREEBSD)
74 #include <ucontext.h>
75 #endif
76 
77 #if defined(ARCH_OS_LINUX) || defined(ARCH_OS_FREEBSD) && defined(ARCH_BITS_64)
78 #include <unwind.h>
79 #endif
80 
81 #if defined(ARCH_OS_DARWIN)
82 #include <execinfo.h>
83 #endif
84 
85 #if defined(ARCH_OS_WINDOWS)
86 #define getpid() _getpid()
87 #define write(fd_, data_, size_) _write(fd_, data_, size_)
88 #define strdup(str_) _strdup(str_)
89 #endif
90 
91 #include <string>
92 #include <vector>
93 #include <map>
94 #include <sstream>
95 #include <time.h>
96 
97 PXR_NAMESPACE_OPEN_SCOPE
98 
99 using namespace std;
100 
101 #define MAX_STACK_DEPTH 4096
102 
103 #if !defined(ARCH_OS_WINDOWS)
104 // XXX Darwin
105 // total hack -- no idea if this will work if we die in malloc...
106 typedef int (*ForkFunc)(void);
107 ForkFunc Arch_nonLockingFork =
108 #if defined(ARCH_OS_LINUX) || defined(ARCH_OS_FREEBSD)
109     (ForkFunc)dlsym(RTLD_NEXT, "__libc_fork");
110 #elif defined(ARCH_OS_DARWIN)
111     NULL;
112 #else
113 #error Unknown architecture.
114 #endif
115 #endif
116 
117 /*** Stack Logging Global Variables ***/
118 
119 // Stores the application's launch time
120 static time_t _appLaunchTime;
121 
122 // This bool determines whether a stack trace should be
123 // logged upon catching a crash. Use ArchSetFatalStackLogging
124 // to set this value.
125 static bool _shouldLogStackToDb = false;
126 
127 // This string holds the path the script used to log sessions
128 // to a database.
129 static const char * _logStackToDbCmd = nullptr;
130 
131 // Arguments to _logStackToDbCmd for non-crash and crash reports, respectively.
132 static const char* const* _sessionLogArgv = nullptr;
133 static const char* const* _sessionCrashLogArgv = nullptr;
134 
135 // This string stores the program name to be used when
136 // displaying error information.  Initialized in
137 // Arch_InitConfig() to ArchGetExecutablePath()
138 static char * _progNameForErrors = NULL;
139 
140 namespace {
141 // Key-value map for program info. Stores additional
142 // program info to be used when displaying error information.
143 class Arch_ProgInfo
144 {
145 public:
146 
Arch_ProgInfo()147     Arch_ProgInfo() : _progInfoForErrors(NULL) {}
148 
149     ~Arch_ProgInfo();
150 
151     void SetProgramInfoForErrors(const std::string& key,
152                                  const std::string& value);
153 
154     std::string GetProgramInfoForErrors(const std::string& key) const;
155 
156     void PrintInfoForErrors() const;
157 
158 private:
159     typedef std::map<std::string, std::string> _MapType;
160     _MapType _progInfoMap;
161     mutable std::mutex _progInfoForErrorsMutex;
162 
163     // Printed version of _progInfo map, since we can't
164     // traverse it during an error.
165     char *_progInfoForErrors;
166 };
167 
~Arch_ProgInfo()168 Arch_ProgInfo::~Arch_ProgInfo()
169 {
170     if (_progInfoForErrors)
171         free(_progInfoForErrors);
172 }
173 
174 void
SetProgramInfoForErrors(const std::string & key,const std::string & value)175 Arch_ProgInfo::SetProgramInfoForErrors(
176     const std::string& key, const std::string& value)
177 {
178     std::lock_guard<std::mutex> lock(_progInfoForErrorsMutex);
179 
180     if (value.empty()) {
181         _progInfoMap.erase(key);
182     } else {
183         _progInfoMap[key] = value;
184     }
185 
186     std::ostringstream ss;
187 
188     // update the error info string
189     for(_MapType::iterator iter = _progInfoMap.begin();
190         iter != _progInfoMap.end(); ++iter) {
191 
192         ss << iter->first << ": " << iter->second << '\n';
193     }
194 
195     if (_progInfoForErrors)
196         free(_progInfoForErrors);
197 
198     _progInfoForErrors = strdup(ss.str().c_str());
199 }
200 
201 std::string
GetProgramInfoForErrors(const std::string & key) const202 Arch_ProgInfo::GetProgramInfoForErrors(const std::string& key) const
203 {
204     std::lock_guard<std::mutex> lock(_progInfoForErrorsMutex);
205 
206     _MapType::const_iterator iter = _progInfoMap.find(key);
207     std::string result;
208     if (iter != _progInfoMap.end())
209         result = iter->second;
210 
211     return result;
212 }
213 
214 void
PrintInfoForErrors() const215 Arch_ProgInfo::PrintInfoForErrors() const
216 {
217     std::lock_guard<std::mutex> lock(_progInfoForErrorsMutex);
218     if (_progInfoForErrors) {
219         fprintf(stderr, "%s", _progInfoForErrors);
220     }
221 }
222 
223 } // anon-namespace
224 
225 static Arch_ProgInfo &
ArchStackTrace_GetProgInfo()226 ArchStackTrace_GetProgInfo()
227 {
228     static Arch_ProgInfo progInfo;
229     return progInfo;
230 }
231 
232 
233 
234 namespace {
235 
236 // Key-value map for extra log info.  Stores unowned pointers to text to be
237 // emitted in stack trace logs in case of fatal errors or crashes.
238 class Arch_LogInfo
239 {
240 public:
241 
242     void SetExtraLogInfoForErrors(const std::string &key,
243                                   std::vector<std::string> const *lines);
244     void EmitAnyExtraLogInfo(FILE *outFile, size_t max = 0) const;
245 
246 private:
247     typedef std::map<std::string, std::vector<std::string> const *> _LogInfoMap;
248     _LogInfoMap _logInfoForErrors;
249     mutable std::mutex _logInfoForErrorsMutex;
250 };
251 
252 void
SetExtraLogInfoForErrors(const std::string & key,std::vector<std::string> const * lines)253 Arch_LogInfo::SetExtraLogInfoForErrors(const std::string &key,
254                                        std::vector<std::string> const *lines)
255 {
256     std::lock_guard<std::mutex> lock(_logInfoForErrorsMutex);
257     if (!lines || lines->empty()) {
258         _logInfoForErrors.erase(key);
259     } else {
260         _logInfoForErrors[key] = lines;
261     }
262 }
263 
264 void
EmitAnyExtraLogInfo(FILE * outFile,size_t max) const265 Arch_LogInfo::EmitAnyExtraLogInfo(FILE *outFile, size_t max) const
266 {
267     // This function can't cause any heap allocation, be careful.
268     // XXX -- std::string::c_str and fprintf can do allocations.
269     std::lock_guard<std::mutex> lock(_logInfoForErrorsMutex);
270     size_t n = 0;
271     for (_LogInfoMap::const_iterator i = _logInfoForErrors.begin(),
272              end = _logInfoForErrors.end(); i != end; ++i) {
273         fputs("\n", outFile);
274         fputs(i->first.c_str(), outFile);
275         fputs(":\n", outFile);
276         for (std::string const &line: *i->second) {
277             if (max && n++ >= max) {
278                 fputs("... see full diagnostics in crash report.\n", outFile);
279                 return;
280             }
281             fputs(line.c_str(), outFile);
282         }
283     }
284 }
285 
286 } // anon-namespace
287 
288 static Arch_LogInfo &
ArchStackTrace_GetLogInfo()289 ArchStackTrace_GetLogInfo()
290 {
291     static Arch_LogInfo logInfo;
292     return logInfo;
293 }
294 
295 
296 static void
_atexitCallback()297 _atexitCallback()
298 {
299     ArchLogSessionInfo();
300 }
301 
302 void
ArchEnableSessionLogging()303 ArchEnableSessionLogging()
304 {
305     static int unused = atexit(_atexitCallback);
306     (void)unused;
307 }
308 
309 static const char* const stackTracePrefix = "st";
310 static const char* stackTraceCmd = nullptr;
311 static const char* const* stackTraceArgv = nullptr;
312 
313 static long _GetAppElapsedTime();
314 
315 namespace {
316 
317 // Return the length of s.
asstrlen(const char * s)318 size_t asstrlen(const char* s)
319 {
320     size_t result = 0;
321     if (s) {
322         while (*s++) {
323             ++result;
324         }
325     }
326     return result;
327 }
328 
329 // Copy the string at src to dst, returning a pointer to the NUL terminator
330 // in dst (NOT a pointer to dst).
331 //
332 // ARCH_NOINLINE because old clang versions generated incorrect optimized
333 // code.
334 char* asstrcpy(char* dst, const char* src) ARCH_NOINLINE;
asstrcpy(char * dst,const char * src)335 char* asstrcpy(char* dst, const char* src)
336 {
337     while ((*dst++ = *src++)) {
338         // Do nothing
339     }
340     return dst - 1;
341 }
342 
343 // Compare the strings for equality.
asstreq(const char * dst,const char * src)344 bool asstreq(const char* dst, const char* src)
345 {
346     if (!dst || !src) {
347         return dst == src;
348     }
349     while (*dst || *src) {
350         if (*dst++ != *src++) {
351             return false;
352         }
353     }
354     return true;
355 }
356 
357 // Compare the strings for equality up to n characters.
asstrneq(const char * dst,const char * src,size_t n)358 bool asstrneq(const char* dst, const char* src, size_t n)
359 {
360     if (!dst || !src) {
361         return dst == src;
362     }
363     while ((*dst || *src) && n) {
364         if (*dst++ != *src++) {
365             return false;
366         }
367         --n;
368     }
369     return true;
370 }
371 
372 // Returns the environment variable named name, or NULL if it doesn't exist.
asgetenv(const char * name)373 const char* asgetenv(const char* name)
374 {
375     if (name) {
376         const size_t len = asstrlen(name);
377         for (char** i = ArchEnviron(); *i; ++i) {
378             const char* var = *i;
379             if (asstrneq(var, name, len)) {
380                 if (var[len] == '=') {
381                     return var + len + 1;
382                 }
383             }
384         }
385     }
386     return nullptr;
387 }
388 
389 // Minimum safe size for a buffer to hold a long converted to decimal ASCII.
390 static constexpr int numericBufferSize =
391     std::numeric_limits<long>::digits10
392     + 1     // sign
393     + 1     // overflow (digits10 doesn't necessarily count the high digit)
394     + 1     // trailing NUL
395     + 1;    // paranoia
396 
397 // Return the number of digits in the decimal string representation of x.
asNumDigits(long x)398 size_t asNumDigits(long x)
399 {
400     size_t result = 1;
401     if (x < 0) {
402         x = -x;
403         ++result;
404     }
405     while (x >= 10) {
406         ++result;
407         x /= 10;
408     }
409     return result;
410 }
411 
412 // Write the decimal string representation of x to s, which must have
413 // sufficient space available.
asitoa(char * s,long x)414 char* asitoa(char* s, long x)
415 {
416     // Write the minus sign.
417     if (x < 0) {
418         x = -x;
419         *s = '-';
420     }
421 
422     // Skip to the end and write the terminating NUL.
423     char* end = s += asNumDigits(x);
424     *s = '\0';
425 
426     // Write each digit, starting with the 1's column, working backwards.
427     if (x == 0) {
428         *--s = '0';
429     }
430     else {
431         static const char digit[] = "0123456789";
432         while (x) {
433             *--s = digit[x % 10];
434             x /= 10;
435         }
436     }
437     return end;
438 }
439 
440 // Write a string to a file descriptor.
aswrite(int fd,const char * msg)441 void aswrite(int fd, const char* msg)
442 {
443     int saved = errno;
444     write(fd, msg, asstrlen(msg));
445     errno = saved;
446 }
447 
_GetStackTraceName(char * buf,size_t len)448 int _GetStackTraceName(char* buf, size_t len)
449 {
450     // Take care to avoid non-async-safe functions.
451     // NOTE: This doesn't protect against other threads changing the
452     //       temporary directory or program name for errors.
453 
454     // Count the string length required.
455     size_t required =
456         asstrlen(ArchGetTmpDir()) +
457         1 +     // "/"
458         asstrlen(stackTracePrefix) +
459         1 +     // "_"
460         asstrlen(ArchGetProgramNameForErrors()) +
461         1 +     // "."
462         asNumDigits(getpid()) +
463         1;      // "\0"
464 
465     // Fill in buf with the default name.
466     char* end = buf;
467     if (len < required) {
468         // No space.  Not quite an accurate error code.
469         errno = ENOMEM;
470         return -1;
471     }
472     else {
473         end = asstrcpy(end, ArchGetTmpDir());
474         end = asstrcpy(end, "/");
475         end = asstrcpy(end, stackTracePrefix);
476         end = asstrcpy(end, "_");
477         end = asstrcpy(end, ArchGetProgramNameForErrors());
478         end = asstrcpy(end, ".");
479         end = asitoa(end, getpid());
480     }
481 
482     // Return a name that isn't currently in use.  Simultaneously create
483     // the empty file.
484     int suffix = 0;
485 #if defined(ARCH_OS_WINDOWS)
486     int fd = _open(buf, O_CREAT | O_WRONLY | O_TRUNC | O_EXCL,
487                    _S_IREAD | _S_IWRITE);
488 #else
489     int fd =  open(buf, O_CREAT | O_WRONLY | O_TRUNC | O_EXCL, 0640);
490 #endif
491 
492     while (fd == -1 && errno == EEXIST) {
493         // File exists.  Try a new suffix if there's space.
494         ++suffix;
495         if (len < required + 1 + asNumDigits(suffix)) {
496             // No space.  Not quite an accurate error code.
497             errno = ENOMEM;
498             return -1;
499         }
500         asstrcpy(end, ".");
501         asitoa(end + 1, suffix);
502 #if defined(ARCH_OS_WINDOWS)
503         fd = _open(buf, O_CREAT | O_WRONLY | O_TRUNC | O_EXCL,
504                    _S_IREAD | _S_IWRITE);
505 #else
506         fd =  open(buf, O_CREAT | O_WRONLY | O_TRUNC | O_EXCL, 0640);
507 #endif
508     }
509     if (fd != -1) {
510         ArchCloseFile(fd);
511         fd = 0;
512     }
513     return fd;
514 }
515 
516 // Build an argument list (async-safe).
517 static bool
_MakeArgv(const char * dstArgv[],size_t maxDstArgs,const char * cmd,const char * const srcArgv[],const char * const substitutions[][2],size_t numSubstitutions)518 _MakeArgv(
519     const char* dstArgv[],
520     size_t maxDstArgs,
521     const char* cmd,
522     const char* const srcArgv[],
523     const char* const substitutions[][2],
524     size_t numSubstitutions)
525 {
526     if (!cmd || !srcArgv) {
527         return false;
528     }
529 
530     // Count the maximum number of arguments needed.
531     size_t n = 1;
532     for (const char *const* i = srcArgv; *i; ++n, ++i) {
533         // Do nothing
534     }
535 
536     // Make sure we don't have too many arguments.
537     if (n >= maxDstArgs) {
538         return false;
539     }
540 
541     // Build the command line.
542     size_t j = 0;
543     for (size_t i = 0; i != n; ++i) {
544         if (asstreq(srcArgv[i], "$cmd")) {
545             dstArgv[j++] = cmd;
546         }
547         else {
548             dstArgv[j] = srcArgv[i];
549             for (size_t k = 0; k != numSubstitutions; ++k) {
550                 if (asstreq(srcArgv[i], substitutions[k][0])) {
551                     dstArgv[j] = substitutions[k][1];
552                     break;
553                 }
554             }
555             ++j;
556         }
557     }
558     dstArgv[j] = nullptr;
559 
560     return true;
561 }
562 
563 #if !defined(ARCH_OS_WINDOWS)
564 /* We use a 'non-locking' fork so that we won't get hung up if we've
565  * had malloc corruption when we crash.  The crash recovery behavior
566  * can be tested with ArchTestCrash(), which should crash with this
567  * malloc corruption.
568  */
569 static int
nonLockingFork()570 nonLockingFork()
571 {
572     if (Arch_nonLockingFork != NULL) {
573         return (Arch_nonLockingFork)();
574     }
575     return fork();
576 }
577 #endif
578 
579 #if defined(ARCH_OS_LINUX) || defined(ARCH_OS_FREEBSD)
580 static int
nonLockingLinux__execve(const char * file,char * const argv[],char * const envp[])581 nonLockingLinux__execve (const char *file,
582                          char *const argv[],
583                          char *const envp[])
584 {
585     /*
586      * We make a direct system call here, because we can't find an
587      * execve which corresponds with the non-locking fork we call
588      * (__libc_fork().)
589      *
590      * This code doesn't mess with other threads, and avoids the bug
591      * that calling regular execv after the nonLockingFork() causes
592      * hangs in a threaded app.  (We use the non-locking fork to get
593      * around problems with forking when we have had memory
594      * corruption.)  whew.
595      */
596 
597     unsigned long result;
598 
599 #if defined (ARCH_CPU_ARM)
600     {
601         register long __file_result asm ("x0") = (long)file;
602         register char* const* __argv asm ("x1") = argv;
603         register char* const* __envp asm ("x2") = envp;
604         register long __num_execve asm ("x8") = 221;
605         __asm__ __volatile__ (
606             "svc 0"
607             : "=r" (__file_result)
608             : "r"(__num_execve), "r" (__file_result), "r" (__argv), "r" (__envp)
609             : "memory"
610         );
611         result = __file_result;
612     }
613 #elif defined(ARCH_CPU_INTEL) && defined(ARCH_BITS_64)
614 
615     /*
616      * %rdi, %rsi, %rdx, %rcx, %r8, %r9 are args 0-5
617      * syscall clobbers %rcx and %r11
618      *
619      * why do we put args 1, 2 into cx, dx and then move them?
620      * because it doesn't work if you directly specify them as
621      * constraints to gcc.
622      */
623 
624     __asm__ __volatile__ (
625         "mov    %0, %%rdi    \n\t"
626         "mov    %%rcx, %%rsi \n\t"
627         "mov    %%rdx, %%rdx \n\t"
628         "mov    $0x3b, %%rax \n\t"
629         "syscall             \n\t"
630         : "=a" (result)
631         : "0" (file), "c" (argv), "d" (envp)
632         : "memory", "cc", "r11"
633     );
634 #else
635 #error Unknown architecture
636 #endif
637 
638     if (result >= 0xfffffffffffff000) {
639         errno = -result;
640         result = (unsigned int)-1;
641     }
642 
643     return result;
644 }
645 
646 #endif
647 
648 #if !defined(ARCH_OS_WINDOWS)
649 /* This is the corresponding execv which works with nonLockingFork().
650  * currently, it's only different from execv for linux.  The crash
651  * recovery behavior can be tested with ArchTestCrash().
652  */
653 static int
nonLockingExecv(const char * path,char * const argv[])654 nonLockingExecv(const char *path, char *const argv[])
655 {
656 #if defined(ARCH_OS_LINUX)
657      return nonLockingLinux__execve (path, argv, __environ);
658 #else
659      return execv(path, argv);
660 #endif
661 }
662 #endif
663 
664 /*
665  * Return the base of a filename.
666  */
667 
668 static std::string
getBase(const char * path)669 getBase(const char* path)
670 {
671 #if defined(ARCH_OS_WINDOWS)
672     const std::string tmp = path;
673     std::string::size_type i = tmp.find_last_of("/\\");
674     if (i != std::string::npos) {
675         std::string::size_type j = tmp.find(".exe");
676         if (j != std::string::npos) {
677             return tmp.substr(i + 1, j - i - 1);
678         }
679         return tmp.substr(i + 1);
680     }
681     return tmp;
682 #else
683     const char* base = strrchr(path, '/');
684     if (!base)
685         return path;
686 
687     base++;
688     return strlen(base) > 0 ? base : path;
689 #endif
690 }
691 
692 } // anonymous namespace
693 
694 /*
695  * Run an external program to write post-mortem information to logfile for
696  * process pid.  This waits until the program completes.
697  *
698  * This is an internal function used by ArchLogPostMortem().  It must call
699  * only async-safe functions.
700  */
701 
702 static
_LogStackTraceForPid(const char * logfile)703 int _LogStackTraceForPid(const char *logfile)
704 {
705     // Get the command to run.
706     const char* cmd = asgetenv("ARCH_POSTMORTEM");
707     if (!cmd) {
708         cmd = stackTraceCmd;
709     }
710     if (!cmd || !stackTraceArgv) {
711         // Silently do nothing.
712         return 0;
713     }
714 
715     // Construct the substitutions.
716     char pidBuffer[numericBufferSize], timeBuffer[numericBufferSize];
717     asitoa(pidBuffer, getpid());
718     asitoa(timeBuffer, _GetAppElapsedTime());
719     const char* const substitutions[3][2] = {
720         { "$pid", pidBuffer }, { "$log", logfile }, { "$time", timeBuffer }
721     };
722 
723     // Build the argument list.
724     static constexpr size_t maxArgs = 32;
725     const char* argv[maxArgs];
726     if (!_MakeArgv(argv, maxArgs, cmd, stackTraceArgv, substitutions, 2)) {
727         static const char msg[] = "Too many arguments to postmortem command\n";
728         aswrite(2, msg);
729         return 0;
730     }
731 
732     // Invoke the command.
733     ArchCrashHandlerSystemv(argv[0], (char *const*)argv,
734                             300 /* wait up to 300 seconds */ , NULL, NULL);
735     return 1;
736 }
737 
738 void
ArchSetPostMortem(const char * command,const char * const argv[])739 ArchSetPostMortem(const char* command, const char *const argv[] )
740 {
741     stackTraceCmd  = command;
742     stackTraceArgv = argv;
743 }
744 
745 /*
746  * Arch_SetAppLaunchTime()
747  * -------------------------------
748  * Stores the current time as the application's launch time.
749  * This function is internal.
750  */
751 ARCH_HIDDEN
752 void
Arch_SetAppLaunchTime()753 Arch_SetAppLaunchTime()
754 {
755     _appLaunchTime = time(NULL);
756 }
757 
758 /*
759  * ArchGetAppLaunchTime()
760  * -------------------------------
761  * Returns the application's launch time, or NULL if a timestamp hasn't
762  * been created with AchSetAppLaunchTime().
763  */
764 time_t
ArchGetAppLaunchTime()765 ArchGetAppLaunchTime()
766 {
767     // Defaults to NULL
768     return _appLaunchTime;
769 }
770 
771 /*
772  * ArchSetFatalStackLogging()
773  * -------------------------------
774  * This enables the logging of the stack trace and other build
775  * information upon intercepting a crash.
776  *
777  * This function can be called from python.
778  */
779 void
ArchSetFatalStackLogging(bool flag)780 ArchSetFatalStackLogging( bool flag )
781 {
782     _shouldLogStackToDb = flag;
783 }
784 
785 /*
786  * ArchGetFatalStackLogging()
787  * ---------------------------
788  * Returns the current value of the logging flag.
789  *
790  * This function can be called from python.
791  */
792 bool
ArchGetFatalStackLogging()793 ArchGetFatalStackLogging()
794 {
795     return _shouldLogStackToDb;
796 }
797 
798 void
ArchSetProgramInfoForErrors(const std::string & key,const std::string & value)799 ArchSetProgramInfoForErrors(const std::string& key,
800                             const std::string& value)
801 {
802     ArchStackTrace_GetProgInfo().SetProgramInfoForErrors(key, value);
803 }
804 
805 std::string
ArchGetProgramInfoForErrors(const std::string & key)806 ArchGetProgramInfoForErrors(const std::string& key)
807 {
808     return ArchStackTrace_GetProgInfo().GetProgramInfoForErrors(key);
809 }
810 
811 void
ArchSetExtraLogInfoForErrors(const std::string & key,std::vector<std::string> const * lines)812 ArchSetExtraLogInfoForErrors(const std::string &key,
813                              std::vector<std::string> const *lines)
814 {
815     ArchStackTrace_GetLogInfo().SetExtraLogInfoForErrors(key, lines);
816 }
817 
818 /*
819  * ArchSetProgramNameForErrors
820  * ---------------------------
821  * Set's the program name that is to be used for diagnostic output.
822  */
823 void
ArchSetProgramNameForErrors(const char * progName)824 ArchSetProgramNameForErrors( const char *progName )
825 {
826 
827     if (_progNameForErrors)
828         free(_progNameForErrors);
829 
830     if (progName)
831         _progNameForErrors = strdup(getBase(progName).c_str());
832     else
833         _progNameForErrors = NULL;
834 }
835 
836 /*
837  * ArchGetProgramNameForErrors
838  * ----------------------------
839  * Returns the currently set program name used for
840  * reporting error information.  Returns "libArch"
841  * if a value hasn't been set.
842  */
843 const char *
ArchGetProgramNameForErrors()844 ArchGetProgramNameForErrors()
845 {
846     if (_progNameForErrors)
847         return _progNameForErrors;
848 
849     return "libArch";
850 }
851 
852 #if defined(ARCH_OS_WINDOWS)
853 static long
_GetAppElapsedTime()854 _GetAppElapsedTime()
855 {
856     FILETIME       starttime;
857     FILETIME       exittime;
858     FILETIME       kerneltime;
859     FILETIME       usertime;
860     ULARGE_INTEGER li;
861 
862     if (::GetProcessTimes(GetCurrentProcess(),
863             &starttime, &exittime, &kerneltime, &usertime) == 0) {
864         ARCH_WARNING("_GetAppElapsedTime failed");
865         return 0L;
866     }
867     memcpy(&li, &usertime, sizeof(FILETIME));
868     return static_cast<long>(li.QuadPart / 10000000ULL);
869 }
870 #else
871 static long
_GetAppElapsedTime()872 _GetAppElapsedTime()
873 {
874     rusage ru;
875 
876     // We only record the amount of time spent in user instructions,
877     // so as to discount idle time when logging up time.
878     if (getrusage(RUSAGE_SELF, &ru) == 0) {
879         return long(ru.ru_utime.tv_sec);
880     }
881 
882     // Fallback to logging the entire session time, if we could
883     // not get the user time from the resource usage.
884 
885     // Note: Total time measurement will be a little off because this
886     // calculation happens after the stack trace is generated which can
887     // take a long time.
888     //
889     return long(time(0) - _appLaunchTime);
890 }
891 #endif
892 
893 static void
_InvokeSessionLogger(const char * progname,const char * stackTrace)894 _InvokeSessionLogger(const char* progname, const char *stackTrace)
895 {
896     // Get the command to run.
897     const char* cmd = asgetenv("ARCH_LOGSESSION");
898     const char* const* srcArgv =
899         stackTrace ? _sessionCrashLogArgv : _sessionLogArgv;
900     if (!cmd) {
901         cmd = _logStackToDbCmd;
902     }
903     if (!cmd || !srcArgv) {
904         // Silently do nothing.
905         return;
906     }
907 
908     // Construct the substitutions.
909     char pidBuffer[numericBufferSize], timeBuffer[numericBufferSize];
910     asitoa(pidBuffer, getpid());
911     asitoa(timeBuffer, _GetAppElapsedTime());
912     const char* const substitutions[4][2] = {
913         {"$pid", pidBuffer}, {"$time", timeBuffer},
914         {"$prog", progname}, {"$stack", stackTrace}
915     };
916 
917     // Build the argument list.
918     static constexpr size_t maxArgs = 32;
919     const char* argv[maxArgs];
920     if (!_MakeArgv(argv, maxArgs, cmd, srcArgv, substitutions, 4)) {
921         static const char msg[] = "Too many arguments to log session command\n";
922         aswrite(2, msg);
923         return;
924     }
925 
926     // Invoke the command.
927     ArchCrashHandlerSystemv(argv[0], (char *const*)argv,
928                             60 /* wait up to 60 seconds */, NULL, NULL);
929 }
930 
931 /*
932  * '_FinishLoggingFatalStackTrace' appends the sessionLog
933  * to the stackTrace, and then calls an external program to add it
934  * to the stack_trace database table.
935  */
936 static void
_FinishLoggingFatalStackTrace(const char * progname,const char * stackTrace,const char * sessionLog,bool crashingHard)937 _FinishLoggingFatalStackTrace(const char *progname, const char *stackTrace,
938                               const char *sessionLog, bool crashingHard)
939 {
940     if (!crashingHard && sessionLog) {
941         // If we were given a session log, cat it to the end of the stack.
942         if (FILE* stackFd = ArchOpenFile(stackTrace, "a")) {
943             if (FILE* sessionLogFd = ArchOpenFile(sessionLog, "r")) {
944                 fputs("\n\n********** Session Log **********\n\n", stackFd);
945                 // Cat the session log
946                 char line[4096];
947                 while (fgets(line, 4096, sessionLogFd)) {
948                     fputs(line, stackFd);
949                 }
950                 fclose(sessionLogFd);
951             }
952             fclose(stackFd);
953         }
954     }
955 
956     // Add trace to database if _shouldLogStackToDb is true
957     if (_shouldLogStackToDb)
958     {
959         _InvokeSessionLogger(progname, stackTrace);
960     }
961 
962 }
963 
964 void
ArchLogSessionInfo(const char * crashStackTrace)965 ArchLogSessionInfo(const char *crashStackTrace)
966 {
967     if (_shouldLogStackToDb)
968     {
969         _InvokeSessionLogger(ArchGetProgramNameForErrors(), crashStackTrace);
970     }
971 }
972 
973 void
ArchSetLogSession(const char * command,const char * const argv[],const char * const crashArgv[])974 ArchSetLogSession(
975     const char* command,
976     const char* const argv[],
977     const char* const crashArgv[])
978 {
979     _logStackToDbCmd     = command;
980     _sessionLogArgv      = argv;
981     _sessionCrashLogArgv = crashArgv;
982 }
983 
984 /*
985  * Run an external program to make a report and tell the user where the report
986  * file is.
987  *
988  * Use of char*'s is deliberate: only async-safe calls allowed past this point!
989  */
990 void
ArchLogPostMortem(const char * reason,const char * message,const char * extraLogMsg)991 ArchLogPostMortem(const char* reason,
992                   const char* message /* = nullptr */,
993                   const char* extraLogMsg /* = nullptr */)
994 {
995     static std::atomic_flag busy = ATOMIC_FLAG_INIT;
996 
997     // Disallow recursion and allow only one thread at a time.
998     while (busy.test_and_set(std::memory_order_acquire)) {
999         // Spin!
1000         std::this_thread::yield();
1001     }
1002 
1003     const char* progname = ArchGetProgramNameForErrors();
1004 
1005     // If we can attach a debugger then just exit here.
1006     if (ArchDebuggerAttach()) {
1007         ARCH_DEBUGGER_TRAP;
1008         _exit(0);
1009     }
1010 
1011     /* Could use tmpnam but we're trying to be minimalist here. */
1012     char logfile[1024];
1013     if (_GetStackTraceName(logfile, sizeof(logfile)) == -1) {
1014         // Cannot create the logfile.
1015         static const char msg[] = "Cannot create a log file\n";
1016         aswrite(2, msg);
1017         busy.clear(std::memory_order_release);
1018         return;
1019     }
1020 
1021     // Write reason for stack trace to logfile.
1022     if (FILE* stackFd = ArchOpenFile(logfile, "a")) {
1023         if (reason) {
1024             fputs("This stack trace was requested because: ", stackFd);
1025             fputs(reason, stackFd);
1026             fputs("\n", stackFd);
1027         }
1028         if (message) {
1029             fputs(message, stackFd);
1030             fputs("\n", stackFd);
1031         }
1032         ArchStackTrace_GetLogInfo().EmitAnyExtraLogInfo(stackFd);
1033         if (extraLogMsg) {
1034             fputs(extraLogMsg, stackFd);
1035             fputs("\n", stackFd);
1036         }
1037         fputs("\nPostmortem Stack Trace\n", stackFd);
1038         fclose(stackFd);
1039     }
1040 
1041     /* get hostname for printing out in the error message only */
1042     char hostname[MAXHOSTNAMELEN];
1043     if (gethostname(hostname,MAXHOSTNAMELEN) != 0) {
1044         /* error getting hostname; don't try to print it */
1045         hostname[0] = '\0';
1046     }
1047 
1048     auto printNDashes = [](int nDashes) {
1049         const char *dash64 =
1050             "----------------------------------------------------------------";
1051         int dividend = nDashes / 64;
1052         int remainder = nDashes % 64;
1053         while (dividend--) {
1054             fputs(dash64, stderr);
1055         }
1056         fputs(dash64 + 64 - remainder, stderr);
1057     };
1058 
1059     const char *haltMsg = " terminated";
1060     int labelSize = strlen(progname) + strlen(haltMsg);
1061     int bannerSize = std::max<int>(80, labelSize + strlen("-- ") * 2);
1062 
1063     fputs("\n", stderr);
1064     int numLeadingDashes = (bannerSize - labelSize) / 2 - 1;
1065     printNDashes(numLeadingDashes);
1066     fputs(" ", stderr);
1067     fputs(progname, stderr);
1068     fputs(haltMsg, stderr);
1069     fputs(" ", stderr);
1070     printNDashes(bannerSize - numLeadingDashes - labelSize - 2);
1071     fputs("\n", stderr);
1072 
1073     // print out any registered program info
1074     {
1075         ArchStackTrace_GetProgInfo().PrintInfoForErrors();
1076     }
1077 
1078     if (reason) {
1079         fputs("This stack trace was requested because: ", stderr);
1080         fputs(reason, stderr);
1081         fputs("\n", stderr);
1082     }
1083     if (message) {
1084         fputs(message, stderr);
1085         fputs("\n", stderr);
1086     }
1087 
1088     fputs("writing crash report to [ ", stderr);
1089     fputs(hostname, stderr);
1090     fputs(":", stderr);
1091     fputs(logfile, stderr);
1092     fputs(" ] ...", stderr);
1093     fflush(stderr);
1094 
1095     int loggedStack = _LogStackTraceForPid(logfile);
1096     fputs(" done.\n", stderr);
1097     // Additionally, print the first few lines of extra log information since
1098     // developers don't always think to look for it in the stack trace file.
1099     ArchStackTrace_GetLogInfo().EmitAnyExtraLogInfo(stderr, 3 /* max */);
1100     printNDashes(bannerSize);
1101     fputs("\n", stderr);
1102 
1103     if (loggedStack) {
1104         _FinishLoggingFatalStackTrace(progname, logfile, NULL /*session log*/,
1105                                       true /* crashing hard? */);
1106     }
1107 
1108     busy.clear(std::memory_order_release);
1109 }
1110 
1111 /*
1112  * Write a stack trace to a file, without forking.
1113  */
1114 void
ArchLogStackTrace(const std::string & reason,bool fatal,const string & sessionLog)1115 ArchLogStackTrace(const std::string& reason, bool fatal,
1116                   const string &sessionLog)
1117 {
1118     ArchLogStackTrace(ArchGetProgramNameForErrors(), reason, fatal,
1119                       sessionLog);
1120 }
1121 
1122 /*
1123  * Write a stack trace to a file, without forking.
1124  *
1125  * Note: use of mktemp is not threadsafe.
1126  */
1127 void
ArchLogStackTrace(const std::string & progname,const std::string & reason,bool fatal,const string & sessionLog)1128 ArchLogStackTrace(const std::string& progname, const std::string& reason,
1129                   bool fatal, const string &sessionLog)
1130 {
1131     string tmpFile;
1132     int fd = ArchMakeTmpFile(ArchStringPrintf("%s_%s",
1133                                               stackTracePrefix,
1134                                               ArchGetProgramNameForErrors()),
1135                              &tmpFile);
1136 
1137     /* get hostname for printing out in the error message only */
1138     char hostname[MAXHOSTNAMELEN];
1139     if (gethostname(hostname,MAXHOSTNAMELEN) != 0) {
1140         hostname[0]= '\0';
1141     }
1142 
1143     fprintf(stderr,
1144             "--------------------------------------------------------------\n"
1145             "A stack trace has been requested by %s because of %s\n",
1146             progname.c_str(), reason.c_str());
1147 
1148     // print out any registered program info
1149     {
1150         ArchStackTrace_GetProgInfo().PrintInfoForErrors();
1151     }
1152 
1153     if (fd != -1) {
1154         FILE* fout = ArchFdOpen(fd, "w");
1155         fprintf(stderr, "The stack can be found in %s:%s\n"
1156                 "--------------------------------------------------------------"
1157                 "\n", hostname, tmpFile.c_str());
1158         ArchPrintStackTrace(fout, progname, reason);
1159         /* If this is a fatal stack trace, attempt to add it to the db */
1160         if (fatal) {
1161             ArchStackTrace_GetLogInfo().EmitAnyExtraLogInfo(fout);
1162         }
1163         fclose(fout);
1164         if (fatal) {
1165             _FinishLoggingFatalStackTrace(progname.c_str(), tmpFile.c_str(),
1166                                           sessionLog.empty() ?
1167                                           NULL : sessionLog.c_str(),
1168                                           false /* crashing hard? */);
1169         }
1170     }
1171     else {
1172         /* we couldn't open the tmp file, so write the stack trace to stderr */
1173         fprintf(stderr,
1174                 "--------------------------------------------------------------"
1175                 "\n");
1176         ArchPrintStackTrace(stderr, progname, reason);
1177         ArchStackTrace_GetLogInfo().EmitAnyExtraLogInfo(stderr);
1178     }
1179     fprintf(stderr,
1180             "--------------------------------------------------------------\n");
1181 }
1182 
1183 #if defined(ARCH_OS_DARWIN)
1184 
1185 /*
1186  * This function will use _LogStackTraceForPid(const char*), which uses
1187  * the stacktrace script, to log the stack to a file.  Then it reads the lines
1188  * back in and puts them into an output iterator.
1189  */
1190 template <class OutputIterator>
1191 static void
_LogStackTraceToOutputIterator(OutputIterator oi,size_t maxDepth,bool addEndl)1192 _LogStackTraceToOutputIterator(OutputIterator oi, size_t maxDepth, bool addEndl)
1193 {
1194     /* Could use tmpnam but we're trying to be minimalist here. */
1195     char logfile[1024];
1196     _GetStackTraceName(logfile, sizeof(logfile));
1197 
1198     _LogStackTraceForPid(logfile);
1199 
1200     ifstream inFile(logfile);
1201     string line;
1202     size_t currentDepth = 0;
1203     while(!inFile.eof() && currentDepth < maxDepth) {
1204         getline(inFile, line);
1205         if(addEndl && !inFile.eof())
1206             line += "\n";
1207         *oi++ = line;
1208         currentDepth ++;
1209     }
1210 
1211     inFile.close();
1212     ArchUnlinkFile(logfile);
1213 }
1214 
1215 #endif
1216 
1217 /*
1218  * ArchPrintStackTrace
1219  *  print out a stack trace to the given FILE *.
1220  */
1221 void
ArchPrintStackTrace(FILE * fout,const std::string & programName,const std::string & reason)1222 ArchPrintStackTrace(FILE *fout, const std::string& programName, const std::string& reason)
1223 {
1224     ostringstream oss;
1225 
1226     ArchPrintStackTrace(oss, programName, reason);
1227 
1228     if (fout == NULL) {
1229         fout = stderr;
1230     }
1231 
1232     fprintf(fout, "%s", oss.str().c_str());
1233     fflush(fout);
1234 }
1235 
1236 void
ArchPrintStackTrace(FILE * fout,const std::string & reason)1237 ArchPrintStackTrace(FILE* fout, const std::string& reason)
1238 {
1239     ArchPrintStackTrace(fout, ArchGetProgramNameForErrors(), reason);
1240 }
1241 
1242 void
ArchPrintStackTrace(std::ostream & out,const std::string & reason)1243 ArchPrintStackTrace(std::ostream& out, const std::string& reason)
1244 {
1245     ArchPrintStackTrace(out, ArchGetProgramNameForErrors(), reason);
1246 }
1247 
1248 /*
1249  * ArchPrintStackTrace
1250  *  print out a stack trace to the given ostream.
1251  *
1252  * This function should probably not be called from a signal handler as
1253  * it calls printf and other unsafe functions.
1254  */
1255 void
ArchPrintStackTrace(ostream & oss,const std::string & programName,const std::string & reason)1256 ArchPrintStackTrace(ostream& oss,
1257                     const std::string& programName,
1258                     const std::string& reason)
1259 {
1260     oss << "==============================================================\n"
1261         << " A stack trace has been requested by "
1262         << programName << " because: " << reason << endl;
1263 
1264 #if defined(ARCH_OS_DARWIN)
1265 
1266     _LogStackTraceToOutputIterator(ostream_iterator<string>(oss), numeric_limits<size_t>::max(), true);
1267 
1268 #else
1269 
1270     vector<uintptr_t> frames;
1271     ArchGetStackFrames(MAX_STACK_DEPTH, &frames);
1272     ArchPrintStackFrames(oss, frames);
1273 
1274 #endif
1275 
1276     oss << "==============================================================\n";
1277 }
1278 
1279 void
ArchGetStackTrace(ostream & oss,const std::string & reason)1280 ArchGetStackTrace(ostream& oss, const std::string& reason)
1281 {
1282     ArchPrintStackTrace(oss, ArchGetProgramNameForErrors(), reason);
1283 }
1284 
1285 void
ArchGetStackFrames(size_t maxDepth,vector<uintptr_t> * frames)1286 ArchGetStackFrames(size_t maxDepth, vector<uintptr_t> *frames)
1287 {
1288     ArchGetStackFrames(maxDepth, /* skip = */ 0, frames);
1289 }
1290 
1291 #if defined(ARCH_OS_LINUX) && defined(ARCH_BITS_64)
1292 struct Arch_UnwindContext {
1293 public:
Arch_UnwindContextArch_UnwindContext1294     Arch_UnwindContext(size_t inMaxdepth, size_t inSkip,
1295                        vector<uintptr_t>* inFrames) :
1296         maxdepth(inMaxdepth), skip(inSkip), frames(inFrames) { }
1297 
1298 public:
1299     size_t maxdepth;
1300     size_t skip;
1301     vector<uintptr_t>* frames;
1302 };
1303 
1304 static _Unwind_Reason_Code
Arch_unwindcb(struct _Unwind_Context * ctx,void * data)1305 Arch_unwindcb(struct _Unwind_Context *ctx, void *data)
1306 {
1307     Arch_UnwindContext* context = static_cast<Arch_UnwindContext*>(data);
1308 
1309     // never extend frames because it is unsafe to alloc inside a
1310     // signal handler, and this function is called sometimes (when
1311     // profiling) from a signal handler.
1312     if (context->frames->size() >= context->maxdepth) {
1313         return _URC_END_OF_STACK;
1314     }
1315     else {
1316         if (context->skip > 0) {
1317             --context->skip;
1318         }
1319         else {
1320             context->frames->push_back(_Unwind_GetIP(ctx));
1321         }
1322         return _URC_NO_REASON;
1323     }
1324 }
1325 
1326 /*
1327  * ArchGetStackFrames
1328  *  save some of stack into buffer.
1329  */
1330 void
ArchGetStackFrames(size_t maxdepth,size_t skip,vector<uintptr_t> * frames)1331 ArchGetStackFrames(size_t maxdepth, size_t skip, vector<uintptr_t> *frames)
1332 {
1333     /* use the exception handling mechanism to unwind our stack.
1334      * note this is gcc >= 3.3.3 only.
1335      */
1336     frames->reserve(maxdepth);
1337     Arch_UnwindContext context(maxdepth, skip, frames);
1338     _Unwind_Backtrace(Arch_unwindcb, (void*)&context);
1339 }
1340 
1341 #elif defined(ARCH_OS_WINDOWS)
1342 
1343 void
ArchGetStackFrames(size_t maxdepth,size_t skip,vector<uintptr_t> * frames)1344 ArchGetStackFrames(size_t maxdepth, size_t skip, vector<uintptr_t> *frames)
1345 {
1346     void* stack[MAX_STACK_DEPTH];
1347     size_t frameCount = CaptureStackBackTrace(skip, MAX_STACK_DEPTH, stack, NULL);
1348     frameCount = std::min(frameCount, maxdepth);
1349     frames->reserve(frameCount);
1350     for (size_t frame = 0; frame < frameCount; ++frame) {
1351         frames->push_back(reinterpret_cast<uintptr_t>(stack[frame]));
1352     }
1353 }
1354 
1355 #elif defined(ARCH_OS_DARWIN)
1356 
1357 void
ArchGetStackFrames(size_t maxdepth,size_t skip,vector<uintptr_t> * frames)1358 ArchGetStackFrames(size_t maxdepth, size_t skip, vector<uintptr_t> *frames)
1359 {
1360     void* stack[MAX_STACK_DEPTH];
1361     const size_t frameCount =
1362         backtrace(stack, std::max((size_t)MAX_STACK_DEPTH, maxdepth));
1363     frames->reserve(frameCount);
1364     for (size_t frame = skip; frame < frameCount; ++frame) {
1365         frames->push_back(reinterpret_cast<uintptr_t>(stack[frame]));
1366     }
1367 }
1368 
1369 #else
1370 
1371 void
ArchGetStackFrames(size_t,size_t,vector<uintptr_t> *)1372 ArchGetStackFrames(size_t, size_t, vector<uintptr_t> *)
1373 {
1374 }
1375 
1376 #endif
1377 
1378 static
1379 std::string
Arch_DefaultStackTraceCallback(uintptr_t address)1380 Arch_DefaultStackTraceCallback(uintptr_t address)
1381 {
1382     // Subtract one from the address before getting the info because
1383     // the stack frames have the addresses where we'll return to,
1384     // not where we called from.  We don't want the info for the
1385     // instruction after our calls, we want it for the call itself.
1386     // We don't need the exact address of the call because
1387     // ArchGetAddressInfo() will return the info for the closest
1388     // address is knows about that not after the given address.
1389     // (That's good because the address minus one is not the start
1390     // of the call instruction but there's no way to figure that out
1391     // here without decoding assembly instructions.)
1392     std::string objectPath, symbolName;
1393     void* baseAddress, *symbolAddress;
1394     if (ArchGetAddressInfo(reinterpret_cast<void*>(address - 1),
1395                    &objectPath, &baseAddress,
1396                    &symbolName, &symbolAddress) && symbolAddress) {
1397         Arch_DemangleFunctionName(&symbolName);
1398         const uintptr_t symbolOffset =
1399             (uint64_t)(address - (uintptr_t)symbolAddress);
1400         return ArchStringPrintf("%s+%#0lx", symbolName.c_str(), symbolOffset);
1401     }
1402     else {
1403         return "<unknown>";
1404     }
1405 }
1406 
1407 static
1408 vector<string>
1409 Arch_GetStackTrace(const vector<uintptr_t> &frames,
1410                    bool skipUnknownFrames=false);
1411 
1412 /*
1413  * ArchPrintStackFrames
1414  *  print out stack frames to the given ostream.
1415  */
1416 void
ArchPrintStackFrames(ostream & oss,const vector<uintptr_t> & frames,bool skipUnknownFrames)1417 ArchPrintStackFrames(ostream& oss, const vector<uintptr_t> &frames,
1418                      bool skipUnknownFrames)
1419 {
1420     const vector<string> result = Arch_GetStackTrace(frames, skipUnknownFrames);
1421     for (size_t i = 0; i < result.size(); i++) {
1422         oss << result[i] << std::endl;
1423     }
1424 }
1425 
1426 /*
1427  * ArchGetStackTrace
1428  *  vector of strings
1429  */
1430 vector<string>
ArchGetStackTrace(size_t maxDepth)1431 ArchGetStackTrace(size_t maxDepth)
1432 {
1433     vector<uintptr_t> frames;
1434     ArchGetStackFrames(maxDepth, &frames);
1435     return Arch_GetStackTrace(frames);
1436 }
1437 
1438 
1439 static
1440 ArchStackTraceCallback*
Arch_GetStackTraceCallback()1441 Arch_GetStackTraceCallback()
1442 {
1443     static ArchStackTraceCallback callback;
1444     return &callback;
1445 }
1446 
1447 static vector<string>
Arch_GetStackTrace(const vector<uintptr_t> & frames,bool skipUnknownFrames)1448 Arch_GetStackTrace(const vector<uintptr_t> &frames,
1449                    bool skipUnknownFrames)
1450 {
1451     vector<string> rv;
1452 
1453     if (frames.empty()) {
1454         rv.push_back("No frames saved, stack traces probably not supported "
1455                      "on this architecture.");
1456         return rv;
1457     }
1458 
1459     ArchStackTraceCallback callback = *Arch_GetStackTraceCallback();
1460     if (!callback) {
1461         callback = Arch_DefaultStackTraceCallback;
1462     }
1463     int n = 0;
1464     for (size_t i = 0; i < frames.size(); i++) {
1465         const std::string symbolic = callback(frames[i]);
1466         if (skipUnknownFrames && symbolic == "<unknown>") {
1467             continue;
1468         }
1469         rv.push_back(ArchStringPrintf(" #%-3i 0x%016lx in %s",
1470                                       n++, frames[i], symbolic.c_str()));
1471     }
1472 
1473     return rv;
1474 }
1475 
1476 void
ArchSetStackTraceCallback(const ArchStackTraceCallback & cb)1477 ArchSetStackTraceCallback(const ArchStackTraceCallback& cb)
1478 {
1479     *Arch_GetStackTraceCallback() = cb;
1480 }
1481 
1482 void
ArchGetStackTraceCallback(ArchStackTraceCallback * cb)1483 ArchGetStackTraceCallback(ArchStackTraceCallback* cb)
1484 {
1485     if (cb) {
1486         *cb = *Arch_GetStackTraceCallback();
1487     }
1488 }
1489 
1490 static void
archAlarmHandler(int)1491 archAlarmHandler(int /*sig */)
1492 {
1493     /* do nothing.  we just have to wake up. */
1494 }
1495 
1496 /*
1497  * Replacement for 'system' safe for a crash handler
1498  *
1499  * This function is a substitute for system() which does not allocate
1500  * or free any data, and times out after timeout seconds if the
1501  * operation in  argv is not complete.  callback is called every
1502  * second.  userData is passed to callback.  callback can be used,
1503  * for example, to print a '.' repeatedly to show progress.  The alarm
1504  * used in this function could interfere with setitimer or other calls
1505  * to alarm, and this function uses non-locking fork and exec if available
1506  * so should  not generally be used except following a catastrophe.
1507  */
1508 int
ArchCrashHandlerSystemv(const char * pathname,char * const argv[],int timeout,ArchCrashHandlerSystemCB callback,void * userData)1509 ArchCrashHandlerSystemv(const char* pathname, char *const argv[],
1510                         int timeout, ArchCrashHandlerSystemCB callback,
1511                         void* userData)
1512 {
1513 #if defined(ARCH_OS_WINDOWS)
1514     fprintf(stderr, "ArchCrashHandlerSystemv unimplemented for Windows\n");
1515     return -1;
1516 #else
1517     struct sigaction act, oldact;
1518     int retval = 0;
1519     int savedErrno;
1520     pid_t pid = nonLockingFork(); /* use non-locking fork */
1521     if (pid == -1) {
1522         /* fork() failed */
1523         char errBuffer[numericBufferSize];
1524         asitoa(errBuffer, errno);
1525         aswrite(2, "FAIL: Unable to fork() crash handler: errno=");
1526         aswrite(2, errBuffer);
1527         aswrite(2, "\n");
1528         return -1;
1529     }
1530     else if (pid == 0) {
1531         // Call setsid() in the child, which is intended to start a new
1532         // "session", and detach from the controlling tty.  We do this because
1533         // the stack tracing stuff invokes gdb, which wants to fiddle with the
1534         // tty, and if we're run in the background, that blocks, so we hang
1535         // trying to take the stacktrace.  This seems to fix that.
1536         //
1537         // If standard input is not a TTY then skip this.  This ensures
1538         // the child is part of the same process group as this process,
1539         // which is important on the renderfarm.
1540         if (isatty(0)) {
1541             setsid();
1542         }
1543 
1544         // Exec the handler.
1545         nonLockingExecv(pathname, argv);
1546 
1547         /* Exec failed */
1548         char errBuffer[numericBufferSize];
1549         asitoa(errBuffer, errno);
1550         aswrite(2, "FAIL: Unable to exec crash handler ");
1551         aswrite(2, pathname);
1552         aswrite(2, ": errno=");
1553         aswrite(2, errBuffer);
1554         aswrite(2, "\n");
1555         _exit(127);
1556     }
1557     else {
1558         int delta = 0;
1559         sigemptyset(&act.sa_mask);
1560 # if defined(SA_INTERRUPT)
1561         act.sa_flags   = SA_INTERRUPT;
1562 # else
1563         act.sa_flags   = 0;
1564 # endif
1565         act.sa_handler = &archAlarmHandler;
1566         sigaction(SIGALRM, &act, &oldact);
1567 
1568         /* loop until timeout seconds have passed */
1569         do {
1570             int status;
1571             pid_t child;
1572 
1573             /* a timeout <= 0 means forever */
1574             if (timeout > 0) {
1575                 delta = 1;  /* callback every delta seconds */
1576                 alarm(delta);
1577             }
1578 
1579             /* see what the child is up to */
1580             child = waitpid(pid, &status, 0 /* forever, unless interrupted */);
1581             if (child == (pid_t)-1) {
1582                 /* waitpid error.  return if not due to signal. */
1583                 if (errno != EINTR) {
1584                     retval = -1;
1585                     char errBuffer[numericBufferSize];
1586                     asitoa(errBuffer, errno);
1587                     aswrite(2, "FAIL: Crash handler wait failed: errno=");
1588                     aswrite(2, errBuffer);
1589                     aswrite(2, "\n");
1590                     goto out;
1591                 }
1592                 /* continue below */
1593             }
1594             else if (child != 0) {
1595                 /* child finished */
1596                 if (WIFEXITED(status)) {
1597                     /* child exited successfully.  it returned 127
1598                      * if the exec() failed.  we'll set errno to
1599                      * ENOENT in that case though the actual error
1600                      * could be something else. */
1601                     retval = WEXITSTATUS(status);
1602                     if (retval == 127) {
1603                         errno = ENOENT;
1604                         aswrite(2, "FAIL: Crash handler failed to exec\n");
1605                     }
1606                     goto out;
1607                 }
1608 
1609                 if (WIFSIGNALED(status)) {
1610                     /* child died due to uncaught signal */
1611                     errno = EINTR;
1612                     retval = -1;
1613                     char sigBuffer[numericBufferSize];
1614                     asitoa(sigBuffer, WTERMSIG(status));
1615                     aswrite(2, "FAIL: Crash handler died: signal=");
1616                     aswrite(2, sigBuffer);
1617                     aswrite(2, "\n");
1618                     goto out;
1619                 }
1620                 /* child died for an unknown reason */
1621                 errno = EINTR;
1622                 retval = -1;
1623                 char statusBuffer[numericBufferSize];
1624                 asitoa(statusBuffer, status);
1625                 aswrite(2, "FAIL: Crash handler unexpected wait status=");
1626                 aswrite(2, statusBuffer);
1627                 aswrite(2, "\n");
1628                 goto out;
1629             }
1630 
1631             /* child is still going.  invoke callback, countdown, and
1632              * wait again for next interrupt. */
1633             if (callback)
1634                 callback(userData);
1635             timeout -= delta;
1636         }  while (timeout > 0);
1637 
1638         /* timed out.  kill the child and wait for that. */
1639         alarm(0);  /* turn off alarm so it doesn't wake us during kill */
1640         kill(pid, SIGKILL);
1641         waitpid(pid, NULL, 0);
1642 
1643         /*
1644          * Set the errno to 'EBUSY' to imply that some resource was busy
1645          * and hence we're 'timing out'.
1646          */
1647         errno = EBUSY;
1648         retval = -1;
1649         aswrite(2, "FAIL: Crash handler timed out\n");
1650     }
1651 
1652   out:
1653     savedErrno = errno;
1654     alarm(0);
1655     sigaction(SIGALRM, &oldact, NULL);
1656 
1657     errno = savedErrno;
1658     return retval;
1659 #endif
1660 }
1661 
1662 PXR_NAMESPACE_CLOSE_SCOPE
1663