1 /*
2  * Copyright (c) 2017, 2018, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 #include "precompiled.hpp"
26 #include "utilities/globalDefinitions.hpp"
27 #include "symbolengine.hpp"
28 #include "utilities/debug.hpp"
29 #include "utilities/ostream.hpp"
30 #include "windbghelp.hpp"
31 
32 #include <windows.h>
33 
34 #include <imagehlp.h>
35 #include <psapi.h>
36 
37 
38 
39 // This code may be invoked normally but also as part of error reporting
40 // In the latter case, we may run under tight memory constraints (native oom)
41 // or in a stack overflow situation or the C heap may be corrupted. We may
42 // run very early before VM initialization or very late when C exit handlers
43 // run. In all these cases, callstacks would still be nice, so lets be robust.
44 //
45 // We need a number of buffers - for the pdb search path, module handle
46 // lists, for demangled symbols, etc.
47 //
48 // These buffers, while typically small, may need to be large for corner
49 // cases (e.g. templatized C++ symbols, or many DLLs loaded). Where do we
50 // allocate them?
51 //
52 // We may be in error handling for a stack overflow, so lets not put them on
53 // the stack.
54 //
55 // Dynamically allocating them may fail if we are handling a native OOM. It
56 // is also a bit dangerous, as the C heap may be corrupted already.
57 //
58 // That leaves pre-allocating them globally, which is safe and should always
59 // work (if we synchronize access) but incurs an undesirable footprint for
60 // non-error cases.
61 //
62 // We follow a two-way strategy: Allocate the buffers on the C heap in a
63 // reasonable large size. Failing that, fall back to static preallocated
64 // buffers. The size of the latter is large enough to handle common scenarios
65 // but small enough not to drive up the footprint too much (several kb).
66 //
67 // We keep these buffers around once allocated, for subsequent requests. This
68 // means that by running the initialization early at a safe time - before
69 // any error happens - buffers can be pre-allocated. This increases the chance
70 // of useful callstacks in error scenarios in exchange for a some cycles spent
71 // at startup. This behavior can be controlled with -XX:+InitializeDbgHelpEarly
72 // and is off by default.
73 
74 ///////
75 
76 // A simple buffer which attempts to allocate an optimal size but will
77 // fall back to a static minimally sized array on allocation error.
78 template <class T, int MINIMAL_CAPACITY, int OPTIMAL_CAPACITY>
79 class SimpleBufferWithFallback {
80   T _fallback_buffer[MINIMAL_CAPACITY];
81   T* _p;
82   int _capacity;
83 
84   // A sentinel at the end of the buffer to catch overflows.
imprint_sentinel()85   void imprint_sentinel() {
86     assert(_p && _capacity > 0, "Buffer must be allocated");
87     _p[_capacity - 1] = (T)'X';
88     _capacity --;
89   }
90 
91 public:
92 
93   SimpleBufferWithFallback<T, MINIMAL_CAPACITY, OPTIMAL_CAPACITY> ()
94     : _p(NULL), _capacity(0)
95   {}
96 
97   // Note: no destructor because these buffers should, once
98   // allocated, live until process end.
99   // ~SimpleBufferWithFallback()
100 
101   // Note: We use raw ::malloc/::free here instead of os::malloc()/os::free
102   // to prevent circularities or secondary crashes during error reporting.
initialize()103   virtual void initialize () {
104     assert(_p == NULL && _capacity == 0, "Only call once.");
105     const size_t bytes = OPTIMAL_CAPACITY * sizeof(T);
106     T* q = (T*) ::malloc(bytes);
107     if (q != NULL) {
108       _p = q;
109       _capacity = OPTIMAL_CAPACITY;
110     } else {
111       _p = _fallback_buffer;
112       _capacity = (int)(sizeof(_fallback_buffer) / sizeof(T));
113     }
114     _p[0] = '\0';
115     imprint_sentinel();
116   }
117 
118   // We need a way to reset the buffer to fallback size for one special
119   // case, where two buffers need to be of identical capacity.
reset_to_fallback_capacity()120   void reset_to_fallback_capacity() {
121     if (_p != _fallback_buffer) {
122       ::free(_p);
123     }
124     _p = _fallback_buffer;
125     _capacity = (int)(sizeof(_fallback_buffer) / sizeof(T));
126     _p[0] = '\0';
127     imprint_sentinel();
128   }
129 
ptr()130   T* ptr()                { return _p; }
ptr() const131   const T* ptr() const    { return _p; }
capacity() const132   int capacity() const    { return _capacity; }
133 
134 #ifdef ASSERT
check() const135   void check() const {
136     assert(_p[_capacity] == (T)'X', "sentinel lost");
137   }
138 #else
check() const139   void check() const {}
140 #endif
141 
142 };
143 
144 ////
145 
146 // ModuleHandleArray: a list holding module handles. Needs to be large enough
147 // to hold one handle per loaded DLL.
148 // Note: a standard OpenJDK loads normally ~30 libraries, including system
149 // libraries, without third party libraries.
150 
151 typedef SimpleBufferWithFallback <HMODULE, 48, 512> ModuleHandleArrayBase;
152 
153 class ModuleHandleArray : public ModuleHandleArrayBase {
154 
155   int _num; // Number of handles in this array (may be < capacity).
156 
157 public:
158 
initialize()159   void initialize() {
160     ModuleHandleArrayBase::initialize();
161     _num = 0;
162   }
163 
num() const164   int num() const { return _num; }
set_num(int n)165   void set_num(int n) {
166     assert(n <= capacity(), "Too large");
167     _num = n;
168   }
169 
170   // Compare with another list; returns true if all handles are equal (incl.
171   // sort order)
equals(const ModuleHandleArray & other) const172   bool equals(const ModuleHandleArray& other) const {
173     if (_num != other._num) {
174       return false;
175     }
176     if (::memcmp(ptr(), other.ptr(), _num * sizeof(HMODULE)) != 0) {
177       return false;
178     }
179     return true;
180   }
181 
182   // Copy content from other list.
copy_content_from(ModuleHandleArray & other)183   void copy_content_from(ModuleHandleArray& other) {
184     assert(capacity() == other.capacity(), "Different capacities.");
185     memcpy(ptr(), other.ptr(), other._num * sizeof(HMODULE));
186     _num = other._num;
187   }
188 
189 };
190 
191 ////
192 
193 // PathBuffer: a buffer to hold and work with a pdb search PATH - a concatenation
194 // of multiple directories separated by ';'.
195 // A single directory name can be (NTFS) as long as 32K, but in reality is
196 // seldom larger than the (historical) MAX_PATH of 260.
197 
198 #define MINIMUM_PDB_PATH_LENGTH  MAX_PATH * 4
199 #define OPTIMAL_PDB_PATH_LENGTH  MAX_PATH * 64
200 
201 typedef SimpleBufferWithFallback<char, MINIMUM_PDB_PATH_LENGTH, OPTIMAL_PDB_PATH_LENGTH> PathBufferBase;
202 
203 class PathBuffer: public PathBufferBase {
204 public:
205 
206   // Search PDB path for a directory. Search is case insensitive. Returns
207   // true if directory was found in the path, false otherwise.
contains_directory(const char * directory)208   bool contains_directory(const char* directory) {
209     if (ptr() == NULL) {
210       return false;
211     }
212     const size_t len = strlen(directory);
213     if (len == 0) {
214       return false;
215     }
216     char* p = ptr();
217     for(;;) {
218       char* q = strchr(p, ';');
219       if (q != NULL) {
220         if (len == (q - p)) {
221           if (strnicmp(p, directory, len) == 0) {
222             return true;
223           }
224         }
225         p = q + 1;
226       } else {
227         // tail
228         return stricmp(p, directory) == 0 ? true : false;
229       }
230     }
231     return false;
232   }
233 
234   // Appends the given directory to the path. Returns false if internal
235   // buffer size was not sufficient.
append_directory(const char * directory)236   bool append_directory(const char* directory) {
237     const size_t len = strlen(directory);
238     if (len == 0) {
239       return false;
240     }
241     char* p = ptr();
242     const size_t len_now = strlen(p);
243     const size_t needs_capacity = len_now + 1 + len + 1; // xxx;yy\0
244     if (needs_capacity > (size_t)capacity()) {
245       return false; // OOM
246     }
247     if (len_now > 0) { // Not the first path element.
248       p += len_now;
249       *p = ';';
250       p ++;
251     }
252     strcpy(p, directory);
253     return true;
254   }
255 
256 };
257 
258 // A simple buffer to hold one single file name. A file name can be (NTFS) as
259 // long as 32K, but in reality is seldom larger than MAX_PATH.
260 typedef SimpleBufferWithFallback<char, MAX_PATH, 8 * K> FileNameBuffer;
261 
262 // A buffer to hold a C++ symbol. Usually small, but symbols may be larger for
263 // templates.
264 #define MINIMUM_SYMBOL_NAME_LEN 128
265 #define OPTIMAL_SYMBOL_NAME_LEN 1024
266 
267 typedef SimpleBufferWithFallback<uint8_t,
268         sizeof(IMAGEHLP_SYMBOL64) + MINIMUM_SYMBOL_NAME_LEN,
269         sizeof(IMAGEHLP_SYMBOL64) + OPTIMAL_SYMBOL_NAME_LEN> SymbolBuffer;
270 
271 static struct {
272 
273   // Two buffers to hold lists of loaded modules. handles across invocations of
274   // SymbolEngine::recalc_search_path().
275   ModuleHandleArray loaded_modules;
276   ModuleHandleArray last_loaded_modules;
277   // Buffer to retrieve and assemble the pdb search path.
278   PathBuffer search_path;
279   // Buffer to retrieve directory names for loaded modules.
280   FileNameBuffer dir_name;
281   // Buffer to retrieve decoded symbol information (in SymbolEngine::decode)
282   SymbolBuffer decode_buffer;
283 
initialize__anon32130bdd0108284   void initialize() {
285     search_path.initialize();
286     dir_name.initialize();
287     decode_buffer.initialize();
288 
289     loaded_modules.initialize();
290     last_loaded_modules.initialize();
291 
292     // Note: both module lists must have the same capacity. If one allocation
293     // did fail, let them both fall back to the fallback size.
294     if (loaded_modules.capacity() != last_loaded_modules.capacity()) {
295       loaded_modules.reset_to_fallback_capacity();
296       last_loaded_modules.reset_to_fallback_capacity();
297     }
298 
299     assert(search_path.capacity() > 0 && dir_name.capacity() > 0 &&
300             decode_buffer.capacity() > 0 && loaded_modules.capacity() > 0 &&
301             last_loaded_modules.capacity() > 0, "Init error.");
302   }
303 
304 } g_buffers;
305 
306 
307 // Scan the loaded modules.
308 //
309 // For each loaded module, add the directory it is located in to the pdb search
310 // path, but avoid duplicates. Prior search path content is preserved.
311 //
312 // If p_search_path_was_updated is not NULL, points to a bool which, upon
313 // successful return from the function, contains true if the search path
314 // was updated, false if no update was needed because no new DLLs were
315 // loaded or unloaded.
316 //
317 // Returns true for success, false for error.
recalc_search_path_locked(bool * p_search_path_was_updated)318 static bool recalc_search_path_locked(bool* p_search_path_was_updated) {
319 
320   if (p_search_path_was_updated) {
321     *p_search_path_was_updated = false;
322   }
323 
324   HANDLE hProcess = ::GetCurrentProcess();
325 
326   BOOL success = false;
327 
328   // 1) Retrieve current set search path.
329   //    (PDB search path is a global setting and someone might have modified
330   //     it, so take care not to remove directories, just to add our own).
331 
332   if (!WindowsDbgHelp::symGetSearchPath(hProcess, g_buffers.search_path.ptr(),
333                                        (int)g_buffers.search_path.capacity())) {
334     return false;
335   }
336   DEBUG_ONLY(g_buffers.search_path.check();)
337 
338   // 2) Retrieve list of modules handles of all currently loaded modules.
339   DWORD bytes_needed = 0;
340   const DWORD buffer_capacity_bytes = (DWORD)g_buffers.loaded_modules.capacity() * sizeof(HMODULE);
341   success = ::EnumProcessModules(hProcess, g_buffers.loaded_modules.ptr(),
342                                  buffer_capacity_bytes, &bytes_needed);
343   DEBUG_ONLY(g_buffers.loaded_modules.check();)
344 
345   // Note: EnumProcessModules is sloppily defined in terms of whether a
346   // too-small output buffer counts as error. Will it truncate but still
347   // return TRUE? Nobody knows and the manpage is not telling. So we count
348   // truncation it as error, disregarding the return value.
349   if (!success || bytes_needed > buffer_capacity_bytes) {
350     return false;
351   } else {
352     const int num_modules = bytes_needed / sizeof(HMODULE);
353     g_buffers.loaded_modules.set_num(num_modules);
354   }
355 
356   // Compare the list of module handles with the last list. If the lists are
357   // identical, no additional dlls were loaded and we can stop.
358   if (g_buffers.loaded_modules.equals(g_buffers.last_loaded_modules)) {
359     return true;
360   } else {
361     // Remember the new set of module handles and continue.
362     g_buffers.last_loaded_modules.copy_content_from(g_buffers.loaded_modules);
363   }
364 
365   // 3) For each loaded module: retrieve directory from which it was loaded.
366   //    Add directory to search path (but avoid duplicates).
367 
368   bool did_modify_searchpath = false;
369 
370   for (int i = 0; i < (int)g_buffers.loaded_modules.num(); i ++) {
371 
372     const HMODULE hMod = g_buffers.loaded_modules.ptr()[i];
373     char* const filebuffer = g_buffers.dir_name.ptr();
374     const int file_buffer_capacity = g_buffers.dir_name.capacity();
375     const int len_returned = (int)::GetModuleFileName(hMod, filebuffer, (DWORD)file_buffer_capacity);
376     DEBUG_ONLY(g_buffers.dir_name.check();)
377     if (len_returned == 0) {
378       // This may happen when a module gets unloaded after our call to EnumProcessModules.
379       // It should be rare but may sporadically happen. Just ignore and continue with the
380       // next module.
381       continue;
382     } else if (len_returned == file_buffer_capacity) {
383       // Truncation. Just skip this module and continue with the next module.
384       continue;
385     }
386 
387     // Cut file name part off.
388     char* last_slash = ::strrchr(filebuffer, '\\');
389     if (last_slash == NULL) {
390       last_slash = ::strrchr(filebuffer, '/');
391     }
392     if (last_slash) {
393       *last_slash = '\0';
394     }
395 
396     // If this is already part of the search path, ignore it, otherwise
397     // append to search path.
398     if (!g_buffers.search_path.contains_directory(filebuffer)) {
399       if (!g_buffers.search_path.append_directory(filebuffer)) {
400         return false; // oom
401       }
402       DEBUG_ONLY(g_buffers.search_path.check();)
403       did_modify_searchpath = true;
404     }
405 
406   } // for each loaded module.
407 
408   // If we did not modify the search path, nothing further needs to be done.
409   if (!did_modify_searchpath) {
410     return true;
411   }
412 
413   // Set the search path to its new value.
414   if (!WindowsDbgHelp::symSetSearchPath(hProcess, g_buffers.search_path.ptr())) {
415     return false;
416   }
417 
418   if (p_search_path_was_updated) {
419     *p_search_path_was_updated = true;
420   }
421 
422   return true;
423 
424 }
425 
demangle_locked(const char * symbol,char * buf,int buflen)426 static bool demangle_locked(const char* symbol, char *buf, int buflen) {
427 
428   return WindowsDbgHelp::unDecorateSymbolName(symbol, buf, buflen, UNDNAME_COMPLETE) > 0;
429 
430 }
431 
decode_locked(const void * addr,char * buf,int buflen,int * offset,bool do_demangle)432 static bool decode_locked(const void* addr, char* buf, int buflen, int* offset, bool do_demangle) {
433 
434   assert(g_buffers.decode_buffer.capacity() >= (sizeof(IMAGEHLP_SYMBOL64) + MINIMUM_SYMBOL_NAME_LEN),
435          "Decode buffer too small.");
436   assert(buf != NULL && buflen > 0 && offset != NULL, "invalid output buffer.");
437 
438   DWORD64 displacement;
439   PIMAGEHLP_SYMBOL64 pSymbol = NULL;
440   bool success = false;
441 
442   pSymbol = (PIMAGEHLP_SYMBOL64) g_buffers.decode_buffer.ptr();
443   pSymbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64);
444   pSymbol->MaxNameLength = (DWORD)(g_buffers.decode_buffer.capacity() - sizeof(IMAGEHLP_SYMBOL64) - 1);
445 
446   // It is unclear how SymGetSymFromAddr64 handles truncation. Experiments
447   // show it will return TRUE but not zero terminate (which is a really bad
448   // combination). Lets be super careful.
449   ::memset(pSymbol->Name, 0, pSymbol->MaxNameLength); // To catch truncation.
450 
451   if (WindowsDbgHelp::symGetSymFromAddr64(::GetCurrentProcess(), (DWORD64)addr, &displacement, pSymbol)) {
452     success = true;
453     if (pSymbol->Name[pSymbol->MaxNameLength - 1] != '\0') {
454       // Symbol was truncated. Do not attempt to demangle. Instead, zero terminate the
455       // truncated string. We still return success - the truncated string may still
456       // be usable for the caller.
457       pSymbol->Name[pSymbol->MaxNameLength - 1] = '\0';
458       do_demangle = false;
459     }
460 
461     // Attempt to demangle.
462     if (do_demangle && demangle_locked(pSymbol->Name, buf, buflen)) {
463       // ok.
464     } else {
465       ::strncpy(buf, pSymbol->Name, buflen - 1);
466     }
467     buf[buflen - 1] = '\0';
468 
469     *offset = (int)displacement;
470   }
471 
472   DEBUG_ONLY(g_buffers.decode_buffer.check();)
473 
474   return success;
475 }
476 
477 static enum {
478   state_uninitialized = 0,
479   state_ready = 1,
480   state_error = 2
481 } g_state = state_uninitialized;
482 
initialize()483 static void initialize() {
484 
485   assert(g_state == state_uninitialized, "wrong sequence");
486   g_state = state_error;
487 
488   // 1) Initialize buffers.
489   g_buffers.initialize();
490 
491   // 1) Call SymInitialize
492   HANDLE hProcess = ::GetCurrentProcess();
493   WindowsDbgHelp::symSetOptions(SYMOPT_FAIL_CRITICAL_ERRORS | SYMOPT_DEFERRED_LOADS |
494                         SYMOPT_EXACT_SYMBOLS | SYMOPT_LOAD_LINES);
495   if (!WindowsDbgHelp::symInitialize(hProcess, NULL, TRUE)) {
496     return;
497   }
498 
499   // Note: we ignore any errors from this point on. The symbol engine may be
500   // usable enough.
501   g_state = state_ready;
502 
503   (void)recalc_search_path_locked(NULL);
504 
505 }
506 
507 ///////////////////// External functions //////////////////////////
508 
509 // All outside facing functions are synchronized. Also, we run
510 // initialization on first touch.
511 
512 static CRITICAL_SECTION g_cs;
513 
514 namespace { // Do not export.
515   class SymbolEngineEntry {
516    public:
SymbolEngineEntry()517     SymbolEngineEntry() {
518       ::EnterCriticalSection(&g_cs);
519       if (g_state == state_uninitialized) {
520         initialize();
521       }
522     }
~SymbolEngineEntry()523     ~SymbolEngineEntry() {
524       ::LeaveCriticalSection(&g_cs);
525     }
526   };
527 }
528 
529 // Called at DLL_PROCESS_ATTACH.
pre_initialize()530 void SymbolEngine::pre_initialize() {
531   ::InitializeCriticalSection(&g_cs);
532 }
533 
decode(const void * addr,char * buf,int buflen,int * offset,bool do_demangle)534 bool SymbolEngine::decode(const void* addr, char* buf, int buflen, int* offset, bool do_demangle) {
535 
536   assert(buf != NULL && buflen > 0 && offset != NULL, "Argument error");
537   buf[0] = '\0';
538   *offset = -1;
539 
540   if (addr == NULL) {
541     return false;
542   }
543 
544   SymbolEngineEntry entry_guard;
545 
546   // Try decoding the symbol once. If we fail, attempt to rebuild the
547   // symbol search path - maybe the pc points to a dll whose pdb file is
548   // outside our search path. Then do attempt the decode again.
549   bool success = decode_locked(addr, buf, buflen, offset, do_demangle);
550   if (!success) {
551     bool did_update_search_path = false;
552     if (recalc_search_path_locked(&did_update_search_path)) {
553       if (did_update_search_path) {
554         success = decode_locked(addr, buf, buflen, offset, do_demangle);
555       }
556     }
557   }
558 
559   return success;
560 
561 }
562 
demangle(const char * symbol,char * buf,int buflen)563 bool SymbolEngine::demangle(const char* symbol, char *buf, int buflen) {
564 
565   SymbolEngineEntry entry_guard;
566 
567   return demangle_locked(symbol, buf, buflen);
568 
569 }
570 
recalc_search_path(bool * p_search_path_was_updated)571 bool SymbolEngine::recalc_search_path(bool* p_search_path_was_updated) {
572 
573   SymbolEngineEntry entry_guard;
574 
575   return recalc_search_path_locked(p_search_path_was_updated);
576 
577 }
578 
get_source_info(const void * addr,char * buf,size_t buflen,int * line_no)579 bool SymbolEngine::get_source_info(const void* addr, char* buf, size_t buflen,
580                                    int* line_no)
581 {
582   assert(buf != NULL && buflen > 0 && line_no != NULL, "Argument error");
583   buf[0] = '\0';
584   *line_no = -1;
585 
586   if (addr == NULL) {
587     return false;
588   }
589 
590   SymbolEngineEntry entry_guard;
591 
592   IMAGEHLP_LINE64 lineinfo;
593   memset(&lineinfo, 0, sizeof(lineinfo));
594   lineinfo.SizeOfStruct = sizeof(lineinfo);
595   DWORD displacement;
596   if (WindowsDbgHelp::symGetLineFromAddr64(::GetCurrentProcess(), (DWORD64)addr,
597                                            &displacement, &lineinfo)) {
598     if (buf != NULL && buflen > 0 && lineinfo.FileName != NULL) {
599       // We only return the file name, not the whole path.
600       char* p = lineinfo.FileName;
601       char* q = strrchr(lineinfo.FileName, '\\');
602       if (q) {
603         p = q + 1;
604       }
605       ::strncpy(buf, p, buflen - 1);
606       buf[buflen - 1] = '\0';
607     }
608     if (line_no != 0) {
609       *line_no = lineinfo.LineNumber;
610     }
611     return true;
612   }
613   return false;
614 }
615 
616 // Print one liner describing state (if library loaded, which functions are
617 // missing - if any, and the dbhelp API version)
print_state_on(outputStream * st)618 void SymbolEngine::print_state_on(outputStream* st) {
619 
620   SymbolEngineEntry entry_guard;
621 
622   st->print("symbol engine: ");
623 
624   if (g_state == state_uninitialized) {
625     st->print("uninitialized.");
626   } else if (g_state == state_error) {
627     st->print("initialization error.");
628   } else {
629     st->print("initialized successfully");
630     st->print(" - sym options: 0x%X", WindowsDbgHelp::symGetOptions());
631     st->print(" - pdb path: ");
632     if (WindowsDbgHelp::symGetSearchPath(::GetCurrentProcess(),
633                                           g_buffers.search_path.ptr(),
634                                           (int)g_buffers.search_path.capacity())) {
635       st->print_raw(g_buffers.search_path.ptr());
636     } else {
637       st->print_raw("(cannot be retrieved)");
638     }
639   }
640   st->cr();
641 
642 }
643