1 // Copyright (c) 2010 Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 // stackwalker_x86.cc: x86-specific stackwalker.
31 //
32 // See stackwalker_x86.h for documentation.
33 //
34 // Author: Mark Mentovai
35 
36 #include <assert.h>
37 #include <string>
38 
39 #include "common/scoped_ptr.h"
40 #include "google_breakpad/processor/call_stack.h"
41 #include "google_breakpad/processor/code_modules.h"
42 #include "google_breakpad/processor/memory_region.h"
43 #include "google_breakpad/processor/source_line_resolver_interface.h"
44 #include "google_breakpad/processor/stack_frame_cpu.h"
45 #include "processor/logging.h"
46 #include "processor/postfix_evaluator-inl.h"
47 #include "processor/stackwalker_x86.h"
48 #include "processor/windows_frame_info.h"
49 #include "processor/cfi_frame_info.h"
50 
51 namespace google_breakpad {
52 
53 // Max reasonable size for a single x86 frame is 128 KB.  This value is used in
54 // a heuristic for recovering of the EBP chain after a scan for return address.
55 // This value is based on a stack frame size histogram built for a set of
56 // popular third party libraries which suggests that 99.5% of all frames are
57 // smaller than 128 KB.
58 static const uint32_t kMaxReasonableGapBetweenFrames = 128 * 1024;
59 
60 const StackwalkerX86::CFIWalker::RegisterSet
61 StackwalkerX86::cfi_register_map_[] = {
62   // It may seem like $eip and $esp are callee-saves, because (with Unix or
63   // cdecl calling conventions) the callee is responsible for having them
64   // restored upon return. But the callee_saves flags here really means
65   // that the walker should assume they're unchanged if the CFI doesn't
66   // mention them, which is clearly wrong for $eip and $esp.
67   { "$eip", ".ra",  false,
68     StackFrameX86::CONTEXT_VALID_EIP, &MDRawContextX86::eip },
69   { "$esp", ".cfa", false,
70     StackFrameX86::CONTEXT_VALID_ESP, &MDRawContextX86::esp },
71   { "$ebp", NULL,   true,
72     StackFrameX86::CONTEXT_VALID_EBP, &MDRawContextX86::ebp },
73   { "$eax", NULL,   false,
74     StackFrameX86::CONTEXT_VALID_EAX, &MDRawContextX86::eax },
75   { "$ebx", NULL,   true,
76     StackFrameX86::CONTEXT_VALID_EBX, &MDRawContextX86::ebx },
77   { "$ecx", NULL,   false,
78     StackFrameX86::CONTEXT_VALID_ECX, &MDRawContextX86::ecx },
79   { "$edx", NULL,   false,
80     StackFrameX86::CONTEXT_VALID_EDX, &MDRawContextX86::edx },
81   { "$esi", NULL,   true,
82     StackFrameX86::CONTEXT_VALID_ESI, &MDRawContextX86::esi },
83   { "$edi", NULL,   true,
84     StackFrameX86::CONTEXT_VALID_EDI, &MDRawContextX86::edi },
85 };
86 
StackwalkerX86(const SystemInfo * system_info,const MDRawContextX86 * context,MemoryRegion * memory,const CodeModules * modules,StackFrameSymbolizer * resolver_helper)87 StackwalkerX86::StackwalkerX86(const SystemInfo* system_info,
88                                const MDRawContextX86* context,
89                                MemoryRegion* memory,
90                                const CodeModules* modules,
91                                StackFrameSymbolizer* resolver_helper)
92     : Stackwalker(system_info, memory, modules, resolver_helper),
93       context_(context),
94       cfi_walker_(cfi_register_map_,
95                   (sizeof(cfi_register_map_) / sizeof(cfi_register_map_[0]))) {
96   if (memory_ && memory_->GetBase() + memory_->GetSize() - 1 > 0xffffffff) {
97     // The x86 is a 32-bit CPU, the limits of the supplied stack are invalid.
98     // Mark memory_ = NULL, which will cause stackwalking to fail.
99     BPLOG(ERROR) << "Memory out of range for stackwalking: " <<
100                     HexString(memory_->GetBase()) << "+" <<
101                     HexString(memory_->GetSize());
102     memory_ = NULL;
103   }
104 }
105 
~StackFrameX86()106 StackFrameX86::~StackFrameX86() {
107   if (windows_frame_info)
108     delete windows_frame_info;
109   windows_frame_info = NULL;
110   if (cfi_frame_info)
111     delete cfi_frame_info;
112   cfi_frame_info = NULL;
113 }
114 
ReturnAddress() const115 uint64_t StackFrameX86::ReturnAddress() const {
116   assert(context_validity & StackFrameX86::CONTEXT_VALID_EIP);
117   return context.eip;
118 }
119 
GetContextFrame()120 StackFrame* StackwalkerX86::GetContextFrame() {
121   if (!context_) {
122     BPLOG(ERROR) << "Can't get context frame without context";
123     return NULL;
124   }
125 
126   StackFrameX86* frame = new StackFrameX86();
127 
128   // The instruction pointer is stored directly in a register, so pull it
129   // straight out of the CPU context structure.
130   frame->context = *context_;
131   frame->context_validity = StackFrameX86::CONTEXT_VALID_ALL;
132   frame->trust = StackFrame::FRAME_TRUST_CONTEXT;
133   frame->instruction = frame->context.eip;
134 
135   return frame;
136 }
137 
GetCallerByWindowsFrameInfo(const vector<StackFrame * > & frames,WindowsFrameInfo * last_frame_info,bool stack_scan_allowed)138 StackFrameX86* StackwalkerX86::GetCallerByWindowsFrameInfo(
139     const vector<StackFrame*> &frames,
140     WindowsFrameInfo* last_frame_info,
141     bool stack_scan_allowed) {
142   StackFrame::FrameTrust trust = StackFrame::FRAME_TRUST_NONE;
143 
144   StackFrameX86* last_frame = static_cast<StackFrameX86*>(frames.back());
145 
146   // Save the stack walking info we found, in case we need it later to
147   // find the callee of the frame we're constructing now.
148   last_frame->windows_frame_info = last_frame_info;
149 
150   // This function only covers the full STACK WIN case. If
151   // last_frame_info is VALID_PARAMETER_SIZE-only, then we should
152   // assume the traditional frame format or use some other strategy.
153   if (last_frame_info->valid != WindowsFrameInfo::VALID_ALL)
154     return NULL;
155 
156   // This stackwalker sets each frame's %esp to its value immediately prior
157   // to the CALL into the callee.  This means that %esp points to the last
158   // callee argument pushed onto the stack, which may not be where %esp points
159   // after the callee returns.  Specifically, the value is correct for the
160   // cdecl calling convention, but not other conventions.  The cdecl
161   // convention requires a caller to pop its callee's arguments from the
162   // stack after the callee returns.  This is usually accomplished by adding
163   // the known size of the arguments to %esp.  Other calling conventions,
164   // including stdcall, thiscall, and fastcall, require the callee to pop any
165   // parameters stored on the stack before returning.  This is usually
166   // accomplished by using the RET n instruction, which pops n bytes off
167   // the stack after popping the return address.
168   //
169   // Because each frame's %esp will point to a location on the stack after
170   // callee arguments have been PUSHed, when locating things in a stack frame
171   // relative to %esp, the size of the arguments to the callee need to be
172   // taken into account.  This seems a little bit unclean, but it's better
173   // than the alternative, which would need to take these same things into
174   // account, but only for cdecl functions.  With this implementation, we get
175   // to be agnostic about each function's calling convention.  Furthermore,
176   // this is how Windows debugging tools work, so it means that the %esp
177   // values produced by this stackwalker directly correspond to the %esp
178   // values you'll see there.
179   //
180   // If the last frame has no callee (because it's the context frame), just
181   // set the callee parameter size to 0: the stack pointer can't point to
182   // callee arguments because there's no callee.  This is correct as long
183   // as the context wasn't captured while arguments were being pushed for
184   // a function call.  Note that there may be functions whose parameter sizes
185   // are unknown, 0 is also used in that case.  When that happens, it should
186   // be possible to walk to the next frame without reference to %esp.
187 
188   uint32_t last_frame_callee_parameter_size = 0;
189   int frames_already_walked = frames.size();
190   if (frames_already_walked >= 2) {
191     const StackFrameX86* last_frame_callee
192         = static_cast<StackFrameX86*>(frames[frames_already_walked - 2]);
193     WindowsFrameInfo* last_frame_callee_info
194         = last_frame_callee->windows_frame_info;
195     if (last_frame_callee_info &&
196         (last_frame_callee_info->valid
197          & WindowsFrameInfo::VALID_PARAMETER_SIZE)) {
198       last_frame_callee_parameter_size =
199           last_frame_callee_info->parameter_size;
200     }
201   }
202 
203   // Set up the dictionary for the PostfixEvaluator.  %ebp, %esp, and sometimes
204   // %ebx are used in program strings, and their previous values are known, so
205   // set them here.
206   PostfixEvaluator<uint32_t>::DictionaryType dictionary;
207   // Provide the current register values.
208   dictionary["$ebp"] = last_frame->context.ebp;
209   dictionary["$esp"] = last_frame->context.esp;
210   if (last_frame->context_validity & StackFrameX86::CONTEXT_VALID_EBX)
211     dictionary["$ebx"] = last_frame->context.ebx;
212   // Provide constants from the debug info for last_frame and its callee.
213   // .cbCalleeParams is a Breakpad extension that allows us to use the
214   // PostfixEvaluator engine when certain types of debugging information
215   // are present without having to write the constants into the program
216   // string as literals.
217   dictionary[".cbCalleeParams"] = last_frame_callee_parameter_size;
218   dictionary[".cbSavedRegs"] = last_frame_info->saved_register_size;
219   dictionary[".cbLocals"] = last_frame_info->local_size;
220 
221   uint32_t raSearchStart = last_frame->context.esp +
222                            last_frame_callee_parameter_size +
223                            last_frame_info->local_size +
224                            last_frame_info->saved_register_size;
225 
226   uint32_t raSearchStartOld = raSearchStart;
227   uint32_t found = 0;  // dummy value
228   // Scan up to three words above the calculated search value, in case
229   // the stack was aligned to a quadword boundary.
230   //
231   // TODO(ivan.penkov): Consider cleaning up the scan for return address that
232   // follows.  The purpose of this scan is to adjust the .raSearchStart
233   // calculation (which is based on register %esp) in the cases where register
234   // %esp may have been aligned (up to a quadword).  There are two problems
235   // with this approach:
236   //  1) In practice, 64 byte boundary alignment is seen which clearly can not
237   //     be handled by a three word scan.
238   //  2) A search for a return address is "guesswork" by definition because
239   //     the results will be different depending on what is left on the stack
240   //     from previous executions.
241   // So, basically, the results from this scan should be ignored if other means
242   // for calculation of the value of .raSearchStart are available.
243   if (ScanForReturnAddress(raSearchStart, &raSearchStart, &found, 3) &&
244       last_frame->trust == StackFrame::FRAME_TRUST_CONTEXT &&
245       last_frame->windows_frame_info != NULL &&
246       last_frame_info->type_ == WindowsFrameInfo::STACK_INFO_FPO &&
247       raSearchStartOld == raSearchStart &&
248       found == last_frame->context.eip) {
249     // The context frame represents an FPO-optimized Windows system call.
250     // On the top of the stack we have a pointer to the current instruction.
251     // This means that the callee has returned but the return address is still
252     // on the top of the stack which is very atypical situaltion.
253     // Skip one slot from the stack and do another scan in order to get the
254     // actual return address.
255     raSearchStart += 4;
256     ScanForReturnAddress(raSearchStart, &raSearchStart, &found, 3);
257   }
258 
259   dictionary[".cbParams"] = last_frame_info->parameter_size;
260 
261   // Decide what type of program string to use. The program string is in
262   // postfix notation and will be passed to PostfixEvaluator::Evaluate.
263   // Given the dictionary and the program string, it is possible to compute
264   // the return address and the values of other registers in the calling
265   // function. Because of bugs described below, the stack may need to be
266   // scanned for these values. The results of program string evaluation
267   // will be used to determine whether to scan for better values.
268   string program_string;
269   bool recover_ebp = true;
270 
271   trust = StackFrame::FRAME_TRUST_CFI;
272   if (!last_frame_info->program_string.empty()) {
273     // The FPO data has its own program string, which will tell us how to
274     // get to the caller frame, and may even fill in the values of
275     // nonvolatile registers and provide pointers to local variables and
276     // parameters.  In some cases, particularly with program strings that use
277     // .raSearchStart, the stack may need to be scanned afterward.
278     program_string = last_frame_info->program_string;
279   } else if (last_frame_info->allocates_base_pointer) {
280     // The function corresponding to the last frame doesn't use the frame
281     // pointer for conventional purposes, but it does allocate a new
282     // frame pointer and use it for its own purposes.  Its callee's
283     // information is still accessed relative to %esp, and the previous
284     // value of %ebp can be recovered from a location in its stack frame,
285     // within the saved-register area.
286     //
287     // Functions that fall into this category use the %ebp register for
288     // a purpose other than the frame pointer.  They restore the caller's
289     // %ebp before returning.  These functions create their stack frame
290     // after a CALL by decrementing the stack pointer in an amount
291     // sufficient to store local variables, and then PUSHing saved
292     // registers onto the stack.  Arguments to a callee function, if any,
293     // are PUSHed after that.  Walking up to the caller, therefore,
294     // can be done solely with calculations relative to the stack pointer
295     // (%esp).  The return address is recovered from the memory location
296     // above the known sizes of the callee's parameters, saved registers,
297     // and locals.  The caller's stack pointer (the value of %esp when
298     // the caller executed CALL) is the location immediately above the
299     // saved return address.  The saved value of %ebp to be restored for
300     // the caller is at a known location in the saved-register area of
301     // the stack frame.
302     //
303     // For this type of frame, MSVC 14 (from Visual Studio 8/2005) in
304     // link-time code generation mode (/LTCG and /GL) can generate erroneous
305     // debugging data.  The reported size of saved registers can be 0,
306     // which is clearly an error because these frames must, at the very
307     // least, save %ebp.  For this reason, in addition to those given above
308     // about the use of .raSearchStart, the stack may need to be scanned
309     // for a better return address and a better frame pointer after the
310     // program string is evaluated.
311     //
312     // %eip_new = *(%esp_old + callee_params + saved_regs + locals)
313     // %ebp_new = *(%esp_old + callee_params + saved_regs - 8)
314     // %esp_new = %esp_old + callee_params + saved_regs + locals + 4
315     program_string = "$eip .raSearchStart ^ = "
316         "$ebp $esp .cbCalleeParams + .cbSavedRegs + 8 - ^ = "
317         "$esp .raSearchStart 4 + =";
318   } else {
319     // The function corresponding to the last frame doesn't use %ebp at
320     // all.  The callee frame is located relative to %esp.
321     //
322     // The called procedure's instruction pointer and stack pointer are
323     // recovered in the same way as the case above, except that no
324     // frame pointer (%ebp) is used at all, so it is not saved anywhere
325     // in the callee's stack frame and does not need to be recovered.
326     // Because %ebp wasn't used in the callee, whatever value it has
327     // is the value that it had in the caller, so it can be carried
328     // straight through without bringing its validity into question.
329     //
330     // Because of the use of .raSearchStart, the stack will possibly be
331     // examined to locate a better return address after program string
332     // evaluation.  The stack will not be examined to locate a saved
333     // %ebp value, because these frames do not save (or use) %ebp.
334     //
335     // We also propagate %ebx through, as it is commonly unmodifed after
336     // calling simple forwarding functions in ntdll (that are this non-EBP
337     // using type). It's not clear that this is always correct, but it is
338     // important for some functions to get a correct walk.
339     //
340     // %eip_new = *(%esp_old + callee_params + saved_regs + locals)
341     // %esp_new = %esp_old + callee_params + saved_regs + locals + 4
342     // %ebp_new = %ebp_old
343     // %ebx_new = %ebx_old  // If available.
344     program_string = "$eip .raSearchStart ^ = "
345                      "$esp .raSearchStart 4 + =";
346     if (last_frame->context_validity & StackFrameX86::CONTEXT_VALID_EBX)
347       program_string += " $ebx $ebx =";
348     recover_ebp = false;
349   }
350 
351   // Check for alignment operators in the program string.  If alignment
352   // operators are found, then current %ebp must be valid and it is the only
353   // reliable data point that can be used for getting to the previous frame.
354   // E.g. the .raSearchStart calculation (above) is based on %esp and since
355   // %esp was aligned in the current frame (which is a lossy operation) the
356   // calculated value of .raSearchStart cannot be correct and should not be
357   // used.  Instead .raSearchStart must be calculated based on %ebp.
358   // The code that follows assumes that .raSearchStart is supposed to point
359   // at the saved return address (ebp + 4).
360   // For some more details on this topic, take a look at the following thread:
361   // https://groups.google.com/forum/#!topic/google-breakpad-dev/ZP1FA9B1JjM
362   if ((StackFrameX86::CONTEXT_VALID_EBP & last_frame->context_validity) != 0 &&
363       program_string.find('@') != string::npos) {
364     raSearchStart = last_frame->context.ebp + 4;
365   }
366 
367   // The difference between raSearch and raSearchStart is unknown,
368   // but making them the same seems to work well in practice.
369   dictionary[".raSearchStart"] = raSearchStart;
370   dictionary[".raSearch"] = raSearchStart;
371 
372   // Now crank it out, making sure that the program string set at least the
373   // two required variables.
374   PostfixEvaluator<uint32_t> evaluator =
375       PostfixEvaluator<uint32_t>(&dictionary, memory_);
376   PostfixEvaluator<uint32_t>::DictionaryValidityType dictionary_validity;
377   if (!evaluator.Evaluate(program_string, &dictionary_validity) ||
378       dictionary_validity.find("$eip") == dictionary_validity.end() ||
379       dictionary_validity.find("$esp") == dictionary_validity.end()) {
380     // Program string evaluation failed. It may be that %eip is not somewhere
381     // with stack frame info, and %ebp is pointing to non-stack memory, so
382     // our evaluation couldn't succeed. We'll scan the stack for a return
383     // address. This can happen if the stack is in a module for which
384     // we don't have symbols, and that module is compiled without a
385     // frame pointer.
386     uint32_t location_start = last_frame->context.esp;
387     uint32_t location, eip;
388     if (!stack_scan_allowed
389         || !ScanForReturnAddress(location_start, &location, &eip,
390                                  frames.size() == 1 /* is_context_frame */)) {
391       // if we can't find an instruction pointer even with stack scanning,
392       // give up.
393       return NULL;
394     }
395 
396     // This seems like a reasonable return address. Since program string
397     // evaluation failed, use it and set %esp to the location above the
398     // one where the return address was found.
399     dictionary["$eip"] = eip;
400     dictionary["$esp"] = location + 4;
401     trust = StackFrame::FRAME_TRUST_SCAN;
402   }
403 
404   // Since this stack frame did not use %ebp in a traditional way,
405   // locating the return address isn't entirely deterministic. In that
406   // case, the stack can be scanned to locate the return address.
407   //
408   // However, if program string evaluation resulted in both %eip and
409   // %ebp values of 0, trust that the end of the stack has been
410   // reached and don't scan for anything else.
411   if (dictionary["$eip"] != 0 || dictionary["$ebp"] != 0) {
412     int offset = 0;
413 
414     // This scan can only be done if a CodeModules object is available, to
415     // check that candidate return addresses are in fact inside a module.
416     //
417     // TODO(mmentovai): This ignores dynamically-generated code.  One possible
418     // solution is to check the minidump's memory map to see if the candidate
419     // %eip value comes from a mapped executable page, although this would
420     // require dumps that contain MINIDUMP_MEMORY_INFO, which the Breakpad
421     // client doesn't currently write (it would need to call MiniDumpWriteDump
422     // with the MiniDumpWithFullMemoryInfo type bit set).  Even given this
423     // ability, older OSes (pre-XP SP2) and CPUs (pre-P4) don't enforce
424     // an independent execute privilege on memory pages.
425 
426     uint32_t eip = dictionary["$eip"];
427     if (modules_ && !modules_->GetModuleForAddress(eip)) {
428       // The instruction pointer at .raSearchStart was invalid, so start
429       // looking one 32-bit word above that location.
430       uint32_t location_start = dictionary[".raSearchStart"] + 4;
431       uint32_t location;
432       if (stack_scan_allowed
433           && ScanForReturnAddress(location_start, &location, &eip,
434                                   frames.size() == 1 /* is_context_frame */)) {
435         // This is a better return address that what program string
436         // evaluation found.  Use it, and set %esp to the location above the
437         // one where the return address was found.
438         dictionary["$eip"] = eip;
439         dictionary["$esp"] = location + 4;
440         offset = location - location_start;
441         trust = StackFrame::FRAME_TRUST_CFI_SCAN;
442       }
443     }
444 
445     if (recover_ebp) {
446       // When trying to recover the previous value of the frame pointer (%ebp),
447       // start looking at the lowest possible address in the saved-register
448       // area, and look at the entire saved register area, increased by the
449       // size of |offset| to account for additional data that may be on the
450       // stack.  The scan is performed from the highest possible address to
451       // the lowest, because the expectation is that the function's prolog
452       // would have saved %ebp early.
453       uint32_t ebp = dictionary["$ebp"];
454 
455       // When a scan for return address is used, it is possible to skip one or
456       // more frames (when return address is not in a known module).  One
457       // indication for skipped frames is when the value of %ebp is lower than
458       // the location of the return address on the stack
459       bool has_skipped_frames =
460         (trust != StackFrame::FRAME_TRUST_CFI && ebp <= raSearchStart + offset);
461 
462       uint32_t value;  // throwaway variable to check pointer validity
463       if (has_skipped_frames || !memory_->GetMemoryAtAddress(ebp, &value)) {
464         int fp_search_bytes = last_frame_info->saved_register_size + offset;
465         uint32_t location_end = last_frame->context.esp +
466                                  last_frame_callee_parameter_size;
467 
468         for (uint32_t location = location_end + fp_search_bytes;
469              location >= location_end;
470              location -= 4) {
471           if (!memory_->GetMemoryAtAddress(location, &ebp))
472             break;
473 
474           if (memory_->GetMemoryAtAddress(ebp, &value)) {
475             // The candidate value is a pointer to the same memory region
476             // (the stack).  Prefer it as a recovered %ebp result.
477             dictionary["$ebp"] = ebp;
478             break;
479           }
480         }
481       }
482     }
483   }
484 
485   // Create a new stack frame (ownership will be transferred to the caller)
486   // and fill it in.
487   StackFrameX86* frame = new StackFrameX86();
488 
489   frame->trust = trust;
490   frame->context = last_frame->context;
491   frame->context.eip = dictionary["$eip"];
492   frame->context.esp = dictionary["$esp"];
493   frame->context.ebp = dictionary["$ebp"];
494   frame->context_validity = StackFrameX86::CONTEXT_VALID_EIP |
495                                 StackFrameX86::CONTEXT_VALID_ESP |
496                                 StackFrameX86::CONTEXT_VALID_EBP;
497 
498   // These are nonvolatile (callee-save) registers, and the program string
499   // may have filled them in.
500   if (dictionary_validity.find("$ebx") != dictionary_validity.end()) {
501     frame->context.ebx = dictionary["$ebx"];
502     frame->context_validity |= StackFrameX86::CONTEXT_VALID_EBX;
503   }
504   if (dictionary_validity.find("$esi") != dictionary_validity.end()) {
505     frame->context.esi = dictionary["$esi"];
506     frame->context_validity |= StackFrameX86::CONTEXT_VALID_ESI;
507   }
508   if (dictionary_validity.find("$edi") != dictionary_validity.end()) {
509     frame->context.edi = dictionary["$edi"];
510     frame->context_validity |= StackFrameX86::CONTEXT_VALID_EDI;
511   }
512 
513   return frame;
514 }
515 
GetCallerByCFIFrameInfo(const vector<StackFrame * > & frames,CFIFrameInfo * cfi_frame_info)516 StackFrameX86* StackwalkerX86::GetCallerByCFIFrameInfo(
517     const vector<StackFrame*> &frames,
518     CFIFrameInfo* cfi_frame_info) {
519   StackFrameX86* last_frame = static_cast<StackFrameX86*>(frames.back());
520   last_frame->cfi_frame_info = cfi_frame_info;
521 
522   scoped_ptr<StackFrameX86> frame(new StackFrameX86());
523   if (!cfi_walker_
524       .FindCallerRegisters(*memory_, *cfi_frame_info,
525                            last_frame->context, last_frame->context_validity,
526                            &frame->context, &frame->context_validity))
527     return NULL;
528 
529   // Make sure we recovered all the essentials.
530   static const int essentials = (StackFrameX86::CONTEXT_VALID_EIP
531                                  | StackFrameX86::CONTEXT_VALID_ESP
532                                  | StackFrameX86::CONTEXT_VALID_EBP);
533   if ((frame->context_validity & essentials) != essentials)
534     return NULL;
535 
536   frame->trust = StackFrame::FRAME_TRUST_CFI;
537 
538   return frame.release();
539 }
540 
GetCallerByEBPAtBase(const vector<StackFrame * > & frames,bool stack_scan_allowed)541 StackFrameX86* StackwalkerX86::GetCallerByEBPAtBase(
542     const vector<StackFrame*> &frames,
543     bool stack_scan_allowed) {
544   StackFrame::FrameTrust trust;
545   StackFrameX86* last_frame = static_cast<StackFrameX86*>(frames.back());
546   uint32_t last_esp = last_frame->context.esp;
547   uint32_t last_ebp = last_frame->context.ebp;
548 
549   // Assume that the standard %ebp-using x86 calling convention is in
550   // use.
551   //
552   // The typical x86 calling convention, when frame pointers are present,
553   // is for the calling procedure to use CALL, which pushes the return
554   // address onto the stack and sets the instruction pointer (%eip) to
555   // the entry point of the called routine.  The called routine then
556   // PUSHes the calling routine's frame pointer (%ebp) onto the stack
557   // before copying the stack pointer (%esp) to the frame pointer (%ebp).
558   // Therefore, the calling procedure's frame pointer is always available
559   // by dereferencing the called procedure's frame pointer, and the return
560   // address is always available at the memory location immediately above
561   // the address pointed to by the called procedure's frame pointer.  The
562   // calling procedure's stack pointer (%esp) is 8 higher than the value
563   // of the called procedure's frame pointer at the time the calling
564   // procedure made the CALL: 4 bytes for the return address pushed by the
565   // CALL itself, and 4 bytes for the callee's PUSH of the caller's frame
566   // pointer.
567   //
568   // %eip_new = *(%ebp_old + 4)
569   // %esp_new = %ebp_old + 8
570   // %ebp_new = *(%ebp_old)
571 
572   uint32_t caller_eip, caller_esp, caller_ebp;
573 
574   if (memory_->GetMemoryAtAddress(last_ebp + 4, &caller_eip) &&
575       memory_->GetMemoryAtAddress(last_ebp, &caller_ebp)) {
576     caller_esp = last_ebp + 8;
577     trust = StackFrame::FRAME_TRUST_FP;
578   } else {
579     // We couldn't read the memory %ebp refers to. It may be that %ebp
580     // is pointing to non-stack memory. We'll scan the stack for a
581     // return address. This can happen if last_frame is executing code
582     // for a module for which we don't have symbols, and that module
583     // is compiled without a frame pointer.
584     if (!stack_scan_allowed
585         || !ScanForReturnAddress(last_esp, &caller_esp, &caller_eip,
586                                  frames.size() == 1 /* is_context_frame */)) {
587       // if we can't find an instruction pointer even with stack scanning,
588       // give up.
589       return NULL;
590     }
591 
592     // ScanForReturnAddress found a reasonable return address. Advance %esp to
593     // the location immediately above the one where the return address was
594     // found.
595     caller_esp += 4;
596     // Try to restore the %ebp chain.  The caller %ebp should be stored at a
597     // location immediately below the one where the return address was found.
598     // A valid caller %ebp must be greater than the address where it is stored
599     // and the gap between the two adjacent frames should be reasonable.
600     uint32_t restored_ebp_chain = caller_esp - 8;
601     if (!memory_->GetMemoryAtAddress(restored_ebp_chain, &caller_ebp) ||
602         caller_ebp <= restored_ebp_chain ||
603         caller_ebp - restored_ebp_chain > kMaxReasonableGapBetweenFrames) {
604       // The restored %ebp chain doesn't appear to be valid.
605       // Assume that %ebp is unchanged.
606       caller_ebp = last_ebp;
607     }
608 
609     trust = StackFrame::FRAME_TRUST_SCAN;
610   }
611 
612   // Create a new stack frame (ownership will be transferred to the caller)
613   // and fill it in.
614   StackFrameX86* frame = new StackFrameX86();
615 
616   frame->trust = trust;
617   frame->context = last_frame->context;
618   frame->context.eip = caller_eip;
619   frame->context.esp = caller_esp;
620   frame->context.ebp = caller_ebp;
621   frame->context_validity = StackFrameX86::CONTEXT_VALID_EIP |
622                             StackFrameX86::CONTEXT_VALID_ESP |
623                             StackFrameX86::CONTEXT_VALID_EBP;
624 
625   return frame;
626 }
627 
GetCallerFrame(const CallStack * stack,bool stack_scan_allowed)628 StackFrame* StackwalkerX86::GetCallerFrame(const CallStack* stack,
629                                            bool stack_scan_allowed) {
630   if (!memory_ || !stack) {
631     BPLOG(ERROR) << "Can't get caller frame without memory or stack";
632     return NULL;
633   }
634 
635   const vector<StackFrame*> &frames = *stack->frames();
636   StackFrameX86* last_frame = static_cast<StackFrameX86*>(frames.back());
637   scoped_ptr<StackFrameX86> new_frame;
638 
639   // If the resolver has Windows stack walking information, use that.
640   WindowsFrameInfo* windows_frame_info
641       = frame_symbolizer_->FindWindowsFrameInfo(last_frame);
642   if (windows_frame_info)
643     new_frame.reset(GetCallerByWindowsFrameInfo(frames, windows_frame_info,
644                                                 stack_scan_allowed));
645 
646   // If the resolver has DWARF CFI information, use that.
647   if (!new_frame.get()) {
648     CFIFrameInfo* cfi_frame_info =
649         frame_symbolizer_->FindCFIFrameInfo(last_frame);
650     if (cfi_frame_info)
651       new_frame.reset(GetCallerByCFIFrameInfo(frames, cfi_frame_info));
652   }
653 
654   // Otherwise, hope that the program was using a traditional frame structure.
655   if (!new_frame.get())
656     new_frame.reset(GetCallerByEBPAtBase(frames, stack_scan_allowed));
657 
658   // If nothing worked, tell the caller.
659   if (!new_frame.get())
660     return NULL;
661 
662   // Should we terminate the stack walk? (end-of-stack or broken invariant)
663   if (TerminateWalk(new_frame->context.eip,
664                     new_frame->context.esp,
665                     last_frame->context.esp,
666                     frames.size() == 1)) {
667     return NULL;
668   }
669 
670   // new_frame->context.eip is the return address, which is the instruction
671   // after the CALL that caused us to arrive at the callee. Set
672   // new_frame->instruction to one less than that, so it points within the
673   // CALL instruction. See StackFrame::instruction for details, and
674   // StackFrameAMD64::ReturnAddress.
675   new_frame->instruction = new_frame->context.eip - 1;
676 
677   return new_frame.release();
678 }
679 
680 }  // namespace google_breakpad
681