1 
2 /**
3  *    Copyright (C) 2018-present MongoDB, Inc.
4  *
5  *    This program is free software: you can redistribute it and/or modify
6  *    it under the terms of the Server Side Public License, version 1,
7  *    as published by MongoDB, Inc.
8  *
9  *    This program is distributed in the hope that it will be useful,
10  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *    Server Side Public License for more details.
13  *
14  *    You should have received a copy of the Server Side Public License
15  *    along with this program. If not, see
16  *    <http://www.mongodb.com/licensing/server-side-public-license>.
17  *
18  *    As a special exception, the copyright holders give permission to link the
19  *    code of portions of this program with the OpenSSL library under certain
20  *    conditions as described in each individual source file and distribute
21  *    linked combinations including the program with the OpenSSL library. You
22  *    must comply with the Server Side Public License in all respects for
23  *    all of the code used other than as permitted herein. If you modify file(s)
24  *    with this exception, you may extend this exception to your version of the
25  *    file(s), but you are not obligated to do so. If you do not wish to do so,
26  *    delete this exception statement from your version. If you delete this
27  *    exception statement from all source files in the program, then also delete
28  *    it in the license file.
29  */
30 
31 #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kControl
32 
33 #include "mongo/platform/basic.h"
34 
35 #include "mongo/util/stacktrace.h"
36 
37 #include <cstdlib>
38 #include <dlfcn.h>
39 #include <iostream>
40 #include <string>
41 #include <sys/utsname.h>
42 
43 #include "mongo/base/init.h"
44 #include "mongo/config.h"
45 #include "mongo/db/jsobj.h"
46 #include "mongo/util/hex.h"
47 #include "mongo/util/log.h"
48 #include "mongo/util/stringutils.h"
49 #include "mongo/util/version.h"
50 
51 #if defined(MONGO_CONFIG_HAVE_EXECINFO_BACKTRACE)
52 #include <execinfo.h>
53 #elif defined(__sun)
54 #include <ucontext.h>
55 #endif
56 
57 namespace mongo {
58 
59 namespace {
60 /// Maximum number of stack frames to appear in a backtrace.
61 const int maxBackTraceFrames = 100;
62 
63 /// Optional string containing extra unwinding information.  Should take the form of a
64 /// JSON document.
65 std::string* soMapJson = NULL;
66 
67 /**
68  * Returns the "basename" of a path.  The returned StringData is valid until the data referenced
69  * by "path" goes out of scope or mutates.
70  *
71  * E.g., for "/foo/bar/my.txt", returns "my.txt".
72  */
getBaseName(StringData path)73 StringData getBaseName(StringData path) {
74     size_t lastSlash = path.rfind('/');
75     if (lastSlash == std::string::npos)
76         return path;
77     return path.substr(lastSlash + 1);
78 }
79 
80 // All platforms we build on have execinfo.h and we use backtrace() directly, with one exception
81 #if defined(MONGO_CONFIG_HAVE_EXECINFO_BACKTRACE)
82 using ::backtrace;
83 
84 // On Solaris 10, there is no execinfo.h, so we need to emulate it.
85 // Solaris 11 has execinfo.h, and this code doesn't get used.
86 #elif defined(__sun)
87 class WalkcontextCallback {
88 public:
WalkcontextCallback(uintptr_t * array,int size)89     WalkcontextCallback(uintptr_t* array, int size)
90         : _position(0), _count(size), _addresses(array) {}
91 
92     // This callback function is called from C code, and so must not throw exceptions
93     //
callbackFunction(uintptr_t address,int signalNumber,WalkcontextCallback * thisContext)94     static int callbackFunction(uintptr_t address,
95                                 int signalNumber,
96                                 WalkcontextCallback* thisContext) {
97         if (thisContext->_position < thisContext->_count) {
98             thisContext->_addresses[thisContext->_position++] = address;
99             return 0;
100         }
101         return 1;
102     }
getCount() const103     int getCount() const {
104         return static_cast<int>(_position);
105     }
106 
107 private:
108     size_t _position;
109     size_t _count;
110     uintptr_t* _addresses;
111 };
112 
113 typedef int (*WalkcontextCallbackFunc)(uintptr_t address, int signalNumber, void* thisContext);
114 
backtrace(void ** array,int size)115 int backtrace(void** array, int size) {
116     WalkcontextCallback walkcontextCallback(reinterpret_cast<uintptr_t*>(array), size);
117     ucontext_t context;
118     if (getcontext(&context) != 0) {
119         return 0;
120     }
121     int wcReturn = walkcontext(
122         &context,
123         reinterpret_cast<WalkcontextCallbackFunc>(WalkcontextCallback::callbackFunction),
124         static_cast<void*>(&walkcontextCallback));
125     if (wcReturn == 0) {
126         return walkcontextCallback.getCount();
127     }
128     return 0;
129 }
130 #else
131 // On unsupported platforms, we print an error instead of printing a stacktrace.
132 #define MONGO_NO_BACKTRACE
133 #endif
134 
135 }  // namespace
136 
137 #if defined(MONGO_NO_BACKTRACE)
printStackTrace(std::ostream & os)138 void printStackTrace(std::ostream& os) {
139     os << "This platform does not support printing stacktraces" << std::endl;
140 }
141 
142 #else
143 /**
144  * Prints a stack backtrace for the current thread to the specified ostream.
145  *
146  * Does not malloc, does not throw.
147  *
148  * The format of the backtrace is:
149  *
150  * ----- BEGIN BACKTRACE -----
151  * JSON backtrace
152  * Human-readable backtrace
153  * -----  END BACKTRACE  -----
154  *
155  * The JSON backtrace will be a JSON object with a "backtrace" field, and optionally others.
156  * The "backtrace" field is an array, whose elements are frame objects.  A frame object has a
157  * "b" field, which is the base-address of the library or executable containing the symbol, and
158  * an "o" field, which is the offset into said library or executable of the symbol.
159  *
160  * The JSON backtrace may optionally contain additional information useful to a backtrace
161  * analysis tool.  For example, on Linux it contains a subobject named "somap", describing
162  * the objects referenced in the "b" fields of the "backtrace" list.
163  *
164  * @param os    ostream& to receive printed stack backtrace
165  */
printStackTrace(std::ostream & os)166 void printStackTrace(std::ostream& os) {
167     static const char unknownFileName[] = "???";
168     void* addresses[maxBackTraceFrames];
169     Dl_info dlinfoForFrames[maxBackTraceFrames];
170 
171     ////////////////////////////////////////////////////////////
172     // Get the backtrace addresses.
173     ////////////////////////////////////////////////////////////
174 
175     const int addressCount = backtrace(addresses, maxBackTraceFrames);
176     if (addressCount == 0) {
177         const int err = errno;
178         os << "Unable to collect backtrace addresses (errno: " << err << ' ' << strerror(err) << ')'
179            << std::endl;
180         return;
181     }
182 
183     ////////////////////////////////////////////////////////////
184     // Collect symbol information for each backtrace address.
185     ////////////////////////////////////////////////////////////
186 
187     os << std::hex << std::uppercase << '\n';
188     for (int i = 0; i < addressCount; ++i) {
189         Dl_info& dlinfo(dlinfoForFrames[i]);
190         if (!dladdr(addresses[i], &dlinfo)) {
191             dlinfo.dli_fname = unknownFileName;
192             dlinfo.dli_fbase = NULL;
193             dlinfo.dli_sname = NULL;
194             dlinfo.dli_saddr = NULL;
195         }
196         os << ' ' << addresses[i];
197     }
198 
199     os << "\n----- BEGIN BACKTRACE -----\n";
200 
201     ////////////////////////////////////////////////////////////
202     // Display the JSON backtrace
203     ////////////////////////////////////////////////////////////
204 
205     os << "{\"backtrace\":[";
206     for (int i = 0; i < addressCount; ++i) {
207         const Dl_info& dlinfo = dlinfoForFrames[i];
208         const uintptr_t fileOffset = uintptr_t(addresses[i]) - uintptr_t(dlinfo.dli_fbase);
209         if (i)
210             os << ',';
211         os << "{\"b\":\"" << uintptr_t(dlinfo.dli_fbase) << "\",\"o\":\"" << fileOffset;
212         if (dlinfo.dli_sname) {
213             os << "\",\"s\":\"" << dlinfo.dli_sname;
214         }
215         os << "\"}";
216     }
217     os << ']';
218 
219     if (soMapJson)
220         os << ",\"processInfo\":" << *soMapJson;
221     os << "}\n";
222 
223     ////////////////////////////////////////////////////////////
224     // Display the human-readable trace
225     ////////////////////////////////////////////////////////////
226     for (int i = 0; i < addressCount; ++i) {
227         Dl_info& dlinfo(dlinfoForFrames[i]);
228         os << ' ';
229         if (dlinfo.dli_fbase) {
230             os << getBaseName(dlinfo.dli_fname) << '(';
231             if (dlinfo.dli_sname) {
232                 const uintptr_t offset = uintptr_t(addresses[i]) - uintptr_t(dlinfo.dli_saddr);
233                 os << dlinfo.dli_sname << "+0x" << offset;
234             } else {
235                 const uintptr_t offset = uintptr_t(addresses[i]) - uintptr_t(dlinfo.dli_fbase);
236                 os << "+0x" << offset;
237             }
238             os << ')';
239         } else {
240             os << unknownFileName;
241         }
242         os << " [" << addresses[i] << ']' << std::endl;
243     }
244 
245     os << std::dec << std::nouppercase;
246     os << "-----  END BACKTRACE  -----" << std::endl;
247 }
248 
249 #endif
250 
251 namespace {
252 
253 void addOSComponentsToSoMap(BSONObjBuilder* soMap);
254 
255 /**
256  * Builds the "soMapJson" string, which is a JSON encoding of various pieces of information
257  * about a running process, including the map from load addresses to shared objects loaded at
258  * those addresses.
259  */
MONGO_INITIALIZER(ExtractSOMap)260 MONGO_INITIALIZER(ExtractSOMap)(InitializerContext*) {
261     BSONObjBuilder soMap;
262 
263     auto&& vii = VersionInfoInterface::instance(VersionInfoInterface::NotEnabledAction::kFallback);
264     soMap << "mongodbVersion" << vii.version();
265     soMap << "gitVersion" << vii.gitVersion();
266     soMap << "compiledModules" << vii.modules();
267 
268     struct utsname unameData;
269     if (!uname(&unameData)) {
270         BSONObjBuilder unameBuilder(soMap.subobjStart("uname"));
271         unameBuilder << "sysname" << unameData.sysname << "release" << unameData.release
272                      << "version" << unameData.version << "machine" << unameData.machine;
273     }
274     addOSComponentsToSoMap(&soMap);
275     soMapJson = new std::string(soMap.done().jsonString(Strict));
276     return Status::OK();
277 }
278 }  // namespace
279 
280 }  // namespace mongo
281 
282 #if defined(__linux__)
283 
284 #include <elf.h>
285 #include <link.h>
286 
287 namespace mongo {
288 namespace {
289 
290 /**
291  * Rounds a byte offset up to the next highest offset that is aligned with an ELF Word.
292  */
roundUpToElfWordAlignment(size_t offset)293 size_t roundUpToElfWordAlignment(size_t offset) {
294     static const size_t elfWordSizeBytes = sizeof(ElfW(Word));
295     return (offset + (elfWordSizeBytes - 1)) & ~(elfWordSizeBytes - 1);
296 }
297 
298 /**
299  * Returns the size in bytes of an ELF note entry with the given header.
300  */
getNoteSizeBytes(const ElfW (Nhdr)& noteHeader)301 size_t getNoteSizeBytes(const ElfW(Nhdr) & noteHeader) {
302     return sizeof(noteHeader) + roundUpToElfWordAlignment(noteHeader.n_namesz) +
303         roundUpToElfWordAlignment(noteHeader.n_descsz);
304 }
305 
306 /**
307  * Returns true of the given ELF program header refers to a runtime-readable segment.
308  */
isSegmentMappedReadable(const ElfW (Phdr)& phdr)309 bool isSegmentMappedReadable(const ElfW(Phdr) & phdr) {
310     return phdr.p_flags & PF_R;
311 }
312 
313 /**
314  * Processes an ELF Phdr for a NOTE segment, updating "soInfo".
315  *
316  * Looks for the GNU Build ID NOTE, and adds a buildId field to soInfo if it finds one.
317  */
processNoteSegment(const dl_phdr_info & info,const ElfW (Phdr)& phdr,BSONObjBuilder * soInfo)318 void processNoteSegment(const dl_phdr_info& info, const ElfW(Phdr) & phdr, BSONObjBuilder* soInfo) {
319 #ifdef NT_GNU_BUILD_ID
320     const char* const notesBegin = reinterpret_cast<const char*>(info.dlpi_addr) + phdr.p_vaddr;
321     const char* const notesEnd = notesBegin + phdr.p_memsz;
322     ElfW(Nhdr) noteHeader;
323     for (const char* notesCurr = notesBegin; (notesCurr + sizeof(noteHeader)) < notesEnd;
324          notesCurr += getNoteSizeBytes(noteHeader)) {
325         memcpy(&noteHeader, notesCurr, sizeof(noteHeader));
326         if (noteHeader.n_type != NT_GNU_BUILD_ID)
327             continue;
328         const char* const noteNameBegin = notesCurr + sizeof(noteHeader);
329         if (StringData(noteNameBegin, noteHeader.n_namesz - 1) != ELF_NOTE_GNU) {
330             continue;
331         }
332         const char* const noteDescBegin =
333             noteNameBegin + roundUpToElfWordAlignment(noteHeader.n_namesz);
334         soInfo->append("buildId", toHex(noteDescBegin, noteHeader.n_descsz));
335     }
336 #endif
337 }
338 
339 /**
340  * Processes an ELF Phdr for a LOAD segment, updating "soInfo".
341  *
342  * The goal of this operation is to find out if the current object is an executable or a shared
343  * object, by looking for the LOAD segment that maps the first several bytes of the file (the
344  * ELF header).  If it's an executable, this method updates soInfo with the load address of the
345  * segment
346  */
processLoadSegment(const dl_phdr_info & info,const ElfW (Phdr)& phdr,BSONObjBuilder * soInfo)347 void processLoadSegment(const dl_phdr_info& info, const ElfW(Phdr) & phdr, BSONObjBuilder* soInfo) {
348     if (phdr.p_offset)
349         return;
350     if (phdr.p_memsz < sizeof(ElfW(Ehdr)))
351         return;
352 
353     // Segment includes beginning of file and is large enough to hold the ELF header
354     ElfW(Ehdr) eHeader;
355     memcpy(&eHeader, reinterpret_cast<const char*>(info.dlpi_addr) + phdr.p_vaddr, sizeof(eHeader));
356 
357     std::string quotedFileName = "\"" + escape(info.dlpi_name) + "\"";
358 
359     if (memcmp(&eHeader.e_ident[0], ELFMAG, SELFMAG)) {
360         warning() << "Bad ELF magic number in image of " << quotedFileName;
361         return;
362     }
363 
364 #define MKELFCLASS(N) _MKELFCLASS(N)
365 #define _MKELFCLASS(N) ELFCLASS##N
366     if (eHeader.e_ident[EI_CLASS] != MKELFCLASS(__ELF_NATIVE_CLASS)) {
367         warning() << "Expected elf file class of " << quotedFileName << " to be "
368                   << MKELFCLASS(__ELF_NATIVE_CLASS) << "(" << __ELF_NATIVE_CLASS
369                   << "-bit), but found " << int(eHeader.e_ident[4]);
370         return;
371     }
372 
373     if (eHeader.e_ident[EI_VERSION] != EV_CURRENT) {
374         warning() << "Wrong ELF version in " << quotedFileName << ".  Expected " << EV_CURRENT
375                   << " but found " << int(eHeader.e_ident[EI_VERSION]);
376         return;
377     }
378 
379     soInfo->append("elfType", eHeader.e_type);
380 
381     switch (eHeader.e_type) {
382         case ET_EXEC:
383             break;
384         case ET_DYN:
385             return;
386         default:
387             warning() << "Surprised to find " << quotedFileName << " is ELF file of type "
388                       << eHeader.e_type;
389             return;
390     }
391 
392     soInfo->append("b", integerToHex(phdr.p_vaddr));
393 }
394 
395 /**
396  * Callback that processes an ELF object linked into the current address space.
397  *
398  * Used by dl_iterate_phdr in ExtractSOMap, below, to build up the list of linked
399  * objects.
400  *
401  * Each entry built by an invocation of ths function may have the following fields:
402  * * "b", the base address at which an object is loaded.
403  * * "path", the path on the file system to the object.
404  * * "buildId", the GNU Build ID of the object.
405  * * "elfType", the ELF type of the object, typically 2 or 3 (executable or SO).
406  *
407  * At post-processing time, the buildId field can be used to identify the file containing
408  * debug symbols for objects loaded at the given "laodAddr", which in turn can be used with
409  * the "backtrace" displayed in printStackTrace to get detailed unwind information.
410  */
outputSOInfo(dl_phdr_info * info,size_t sz,void * data)411 int outputSOInfo(dl_phdr_info* info, size_t sz, void* data) {
412     BSONObjBuilder soInfo(reinterpret_cast<BSONArrayBuilder*>(data)->subobjStart());
413     if (info->dlpi_addr)
414         soInfo.append("b", integerToHex(ElfW(Addr)(info->dlpi_addr)));
415     if (info->dlpi_name && *info->dlpi_name)
416         soInfo.append("path", info->dlpi_name);
417 
418     for (ElfW(Half) i = 0; i < info->dlpi_phnum; ++i) {
419         const ElfW(Phdr) & phdr(info->dlpi_phdr[i]);
420         if (!isSegmentMappedReadable(phdr))
421             continue;
422         switch (phdr.p_type) {
423             case PT_NOTE:
424                 processNoteSegment(*info, phdr, &soInfo);
425                 break;
426             case PT_LOAD:
427                 processLoadSegment(*info, phdr, &soInfo);
428                 break;
429             default:
430                 break;
431         }
432     }
433     return 0;
434 }
435 
addOSComponentsToSoMap(BSONObjBuilder * soMap)436 void addOSComponentsToSoMap(BSONObjBuilder* soMap) {
437     BSONArrayBuilder soList(soMap->subarrayStart("somap"));
438     dl_iterate_phdr(outputSOInfo, &soList);
439     soList.done();
440 }
441 
442 }  // namespace
443 
444 }  // namespace mongo
445 
446 #elif defined(__APPLE__) && defined(__MACH__)
447 
448 #include <mach-o/dyld.h>
449 #include <mach-o/ldsyms.h>
450 #include <mach-o/loader.h>
451 
452 namespace mongo {
453 namespace {
lcNext(const char * lcCurr)454 const char* lcNext(const char* lcCurr) {
455     const load_command* cmd = reinterpret_cast<const load_command*>(lcCurr);
456     return lcCurr + cmd->cmdsize;
457 }
458 
lcType(const char * lcCurr)459 uint32_t lcType(const char* lcCurr) {
460     const load_command* cmd = reinterpret_cast<const load_command*>(lcCurr);
461     return cmd->cmd;
462 }
463 
464 template <typename SegmentCommandType>
maybeAppendLoadAddr(BSONObjBuilder * soInfo,const SegmentCommandType * segmentCommand)465 bool maybeAppendLoadAddr(BSONObjBuilder* soInfo, const SegmentCommandType* segmentCommand) {
466     if (StringData(SEG_TEXT) != segmentCommand->segname) {
467         return false;
468     }
469     *soInfo << "vmaddr" << integerToHex(segmentCommand->vmaddr);
470     return true;
471 }
472 
addOSComponentsToSoMap(BSONObjBuilder * soMap)473 void addOSComponentsToSoMap(BSONObjBuilder* soMap) {
474     const uint32_t numImages = _dyld_image_count();
475     BSONArrayBuilder soList(soMap->subarrayStart("somap"));
476     for (uint32_t i = 0; i < numImages; ++i) {
477         BSONObjBuilder soInfo(soList.subobjStart());
478         const char* name = _dyld_get_image_name(i);
479         if (name)
480             soInfo << "path" << name;
481         const mach_header* header = _dyld_get_image_header(i);
482         if (!header)
483             continue;
484         size_t headerSize;
485         if (header->magic == MH_MAGIC) {
486             headerSize = sizeof(mach_header);
487         } else if (header->magic == MH_MAGIC_64) {
488             headerSize = sizeof(mach_header_64);
489         } else {
490             continue;
491         }
492         soInfo << "machType" << header->filetype;
493         soInfo << "b" << integerToHex(reinterpret_cast<intptr_t>(header));
494         const char* const loadCommandsBegin = reinterpret_cast<const char*>(header) + headerSize;
495         const char* const loadCommandsEnd = loadCommandsBegin + header->sizeofcmds;
496 
497         // Search the "load command" data in the Mach object for the entry encoding the UUID of the
498         // object, and for the __TEXT segment. Adding the "vmaddr" field of the __TEXT segment load
499         // command of an executable or dylib to an offset in that library provides an address
500         // suitable to passing to atos or llvm-symbolizer for symbolization.
501         //
502         // See, for example, http://lldb.llvm.org/symbolication.html.
503         bool foundTextSegment = false;
504         for (const char* lcCurr = loadCommandsBegin; lcCurr < loadCommandsEnd;
505              lcCurr = lcNext(lcCurr)) {
506             switch (lcType(lcCurr)) {
507                 case LC_UUID: {
508                     const auto uuidCmd = reinterpret_cast<const uuid_command*>(lcCurr);
509                     soInfo << "buildId" << toHex(uuidCmd->uuid, 16);
510                     break;
511                 }
512                 case LC_SEGMENT_64:
513                     if (!foundTextSegment) {
514                         foundTextSegment = maybeAppendLoadAddr(
515                             &soInfo, reinterpret_cast<const segment_command_64*>(lcCurr));
516                     }
517                     break;
518                 case LC_SEGMENT:
519                     if (!foundTextSegment) {
520                         foundTextSegment = maybeAppendLoadAddr(
521                             &soInfo, reinterpret_cast<const segment_command*>(lcCurr));
522                     }
523                     break;
524             }
525         }
526     }
527 }
528 }  // namepace
529 }  // namespace mongo
530 #else
531 namespace mongo {
532 namespace {
addOSComponentsToSoMap(BSONObjBuilder * soMap)533 void addOSComponentsToSoMap(BSONObjBuilder* soMap) {}
534 }  // namepace
535 }  // namespace mongo
536 #endif
537