1 
2 /*--------------------------------------------------------------------*/
3 /*--- Take snapshots of client stacks.              m_stacktrace.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2000-2017 Julian Seward
11       jseward@acm.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 #include "pub_core_basics.h"
32 #include "pub_core_vki.h"
33 #include "pub_core_threadstate.h"
34 #include "pub_core_debuginfo.h"     // XXX: circular dependency
35 #include "pub_core_aspacemgr.h"     // For VG_(is_addressable)()
36 #include "pub_core_libcbase.h"
37 #include "pub_core_libcassert.h"
38 #include "pub_core_libcprint.h"
39 #include "pub_core_machine.h"
40 #include "pub_core_options.h"
41 #include "pub_core_stacks.h"        // VG_(stack_limits)
42 #include "pub_core_stacktrace.h"
43 #include "pub_core_syswrap.h"       // VG_(is_in_syscall)
44 #include "pub_core_xarray.h"
45 #include "pub_core_clientstate.h"   // VG_(client__dl_sysinfo_int80)
46 #include "pub_core_trampoline.h"
47 
48 
49 /*------------------------------------------------------------*/
50 /*---                                                      ---*/
51 /*--- BEGIN platform-dependent unwinder worker functions   ---*/
52 /*---                                                      ---*/
53 /*------------------------------------------------------------*/
54 
55 /* Take a snapshot of the client's stack, putting up to 'max_n_ips'
56    IPs into 'ips'.  In order to be thread-safe, we pass in the
57    thread's IP SP, FP if that's meaningful, and LR if that's
58    meaningful.  Returns number of IPs put in 'ips'.
59 
60    If you know what the thread ID for this stack is, send that as the
61    first parameter, else send zero.  This helps generate better stack
62    traces on ppc64-linux and has no effect on other platforms.
63 */
64 
65 /* Do frame merging in the _i frames in _ips array of recursive cycles
66    of up to _nframes.  The merge is done during stack unwinding
67    (i.e. in platform specific unwinders) to collect as many
68    "interesting" stack traces as possible. */
69 #define RECURSIVE_MERGE(_nframes,_ips,_i) if (UNLIKELY(_nframes > 0)) \
70 do {                                                                  \
71    Int dist;                                                          \
72    for (dist = 1; dist <= _nframes && dist < (Int)_i; dist++) {       \
73       if (_ips[_i-1] == _ips[_i-1-dist]) {                            \
74          _i = _i - dist;                                              \
75          break;                                                       \
76       }                                                               \
77    }                                                                  \
78 } while (0)
79 
80 /* Note about calculation of fp_min : fp_min is the lowest address
81    which can be accessed during unwinding. This is SP - VG_STACK_REDZONE_SZB.
82    On most platforms, this will be equal to SP (as VG_STACK_REDZONE_SZB
83    is 0). However, on some platforms (e.g. amd64), there is an accessible
84    redzone below the SP. Some CFI unwind info are generated, taking this
85    into account. As an example, the following is a CFI unwind info on
86    amd64 found for a 'retq' instruction:
87 [0x400f7e .. 0x400f7e]: let cfa=oldSP+8 in RA=*(cfa+-8) SP=cfa+0 BP=*(cfa+-16)
88   0x400f7e: retq
89   As you can see, the previous BP is found 16 bytes below the cfa, which
90   is the oldSP+8. So, effectively, the BP is found 8 bytes below the SP.
91   The fp_min must take this into account, otherwise, VG_(use_CF_info) will
92   not unwind the BP. */
93 
94 /* ------------------------ x86 ------------------------- */
95 
96 #if defined(VGP_x86_linux) || defined(VGP_x86_darwin) \
97     || defined(VGP_x86_solaris) || defined(VGP_x86_dragonfly)
98 
99 #define N_FP_CF_VERIF 1021
100 // prime number so that size of fp_CF_verif is just below 4K or 8K
101 // Note that this prime nr differs from the one chosen in
102 // m_debuginfo/debuginfo.c for the cfsi cache : in case we have
103 // a collision here between two IPs, we expect to not (often) have the
104 // same collision in the cfsi cache (and vice-versa).
105 
106 // unwinding with fp chain is ok:
107 #define FPUNWIND 0
108 // there is no CFI info for this IP:
109 #define NOINFO   1
110 // Unwind with FP is not ok, must use CF unwind:
111 #define CFUNWIND 2
112 
113 static Addr fp_CF_verif_cache [N_FP_CF_VERIF];
114 
115 /* An unwind done by following the fp chain technique can be incorrect
116    as not all frames are respecting the standard bp/sp ABI.
117    The CF information is now generated by default by gcc
118    (as part of the dwarf info). However, unwinding using CF information
119    is significantly slower : a slowdown of 20% has been observed
120    on an helgrind test case.
121    So, by default, the unwinding will be done using the fp chain.
122    But before accepting to unwind an IP with fp_chain, the result
123    of the unwind will be checked with the CF information.
124    This check can give 3 results:
125      FPUNWIND (0): there is CF info, and it gives the same result as fp unwind.
126        => it is assumed that future unwind for this IP can be done
127           with the fast fp chain, without further CF checking
128      NOINFO   (1): there is no CF info (so, fp unwind is the only do-able thing)
129      CFUNWIND (2): there is CF info, but unwind result differs.
130        => it is assumed that future unwind for this IP must be done
131        with the CF info.
132    Of course, if each fp unwind implies a check done with a CF unwind,
133    it would just be slower => we cache the check result in an
134    array of checked Addr.
135    The check for an IP will be stored at
136     fp_CF_verif_cache[IP % N_FP_CF_VERIF] as one of:
137                      IP ^ FPUNWIND
138                      IP ^ NOINFO
139                      IP ^ CFUNWIND
140 
141    Note: we can re-use the last (ROUNDDOWN (log (N_FP_CF_VERIF))) bits
142    to store the check result, as they are guaranteed to be non significant
143    in the comparison between 2 IPs stored in fp_CF_verif_cache).
144    In other words, if two IPs are only differing on the last 2 bits,
145    then they will not land in the same cache bucket.
146 */
147 
148 /* cached result of VG_(FPO_info_present)(). Refreshed each time
149    the fp_CF_verif_generation is different of the current debuginfo
150    generation. */
151 static Bool FPO_info_present = False;
152 
153 static UInt fp_CF_verif_generation = 0;
154 // Our cache has to be maintained in sync with the CFI cache.
155 // Each time the debuginfo is changed, its generation will be incremented.
156 // We will clear our cache when our saved generation differs from
157 // the debuginfo generation.
158 
VG_(get_StackTrace_wrk)159 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
160                                /*OUT*/Addr* ips, UInt max_n_ips,
161                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
162                                const UnwindStartRegs* startRegs,
163                                Addr fp_max_orig )
164 {
165    const Bool do_stats = False; // compute and output some stats regularly.
166    static struct {
167       UInt nr; // nr of stacktraces computed
168       UInt nf; // nr of frames computed
169       UInt Ca; // unwind for which cache indicates CFUnwind must be used.
170       UInt FF; // unwind for which cache indicates FPUnwind can be used.
171       UInt Cf; // unwind at end of stack+store CFUNWIND (xip not end of stack).
172       UInt Fw; // unwind at end of stack+store FPUNWIND
173       UInt FO; // unwind + store FPUNWIND
174       UInt CF; // unwind + store CFUNWIND. Details below.
175       UInt xi; UInt xs; UInt xb; // register(s) which caused a 'store CFUNWIND'.
176       UInt Ck; // unwind fp invalid+store FPUNWIND
177       UInt MS; // microsoft unwind
178    } stats;
179 
180    const Bool   debug = False;
181    //                 = VG_(debugLog_getLevel) () > 3;
182    //                 = True;
183    //                 = stats.nr >= 123456;
184    const HChar* unwind_case; // used when debug is True.
185    // Debugging this function is not straightforward.
186    // Here is the easiest way I have found:
187    // 1. Change the above to True.
188    // 2. Start your program under Valgrind with --tool=none --vgdb-error=0
189    // 3. Use GDB/vgdb to put a breakpoint where you want to debug the stacktrace
190    // 4. Continue till breakpoint is encountered
191    // 5. From GDB, use 'monitor v.info scheduler' and examine the unwind traces.
192    //    You might have to do twice 'monitor v.info scheduler' to see
193    //    the effect of caching the results of the verification.
194    //    You can also modify the debug dynamically using by using
195    //    'monitor v.set debuglog 4.
196 
197    Int   i;
198    Addr  fp_max;
199    UInt  n_found = 0;
200    const Int cmrf = VG_(clo_merge_recursive_frames);
201 
202    vg_assert(sizeof(Addr) == sizeof(UWord));
203    vg_assert(sizeof(Addr) == sizeof(void*));
204 
205    D3UnwindRegs fpverif_uregs; // result of CF unwind for a check reason.
206    Addr xip_verified = 0; // xip for which we have calculated fpverif_uregs
207    // 0 assigned to silence false positive -Wuninitialized warning
208    // This is a false positive as xip_verified is assigned when
209    // xip_verif > CFUNWIND and only used if xip_verif > CFUNWIND.
210 
211    D3UnwindRegs uregs;
212    uregs.xip = (Addr)startRegs->r_pc;
213    uregs.xsp = (Addr)startRegs->r_sp;
214    uregs.xbp = startRegs->misc.X86.r_ebp;
215    Addr fp_min = uregs.xsp - VG_STACK_REDZONE_SZB;
216 
217    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
218       stopping when the trail goes cold, which we guess to be
219       when FP is not a reasonable stack location. */
220 
221    // JRS 2002-sep-17: hack, to round up fp_max to the end of the
222    // current page, at least.  Dunno if it helps.
223    // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
224    fp_max = VG_PGROUNDUP(fp_max_orig);
225    if (fp_max >= sizeof(Addr))
226       fp_max -= sizeof(Addr);
227 
228    if (debug)
229       VG_(printf)("max_n_ips=%u fp_min=0x%08lx fp_max_orig=0x08%lx, "
230                   "fp_max=0x%08lx ip=0x%08lx fp=0x%08lx\n",
231                   max_n_ips, fp_min, fp_max_orig, fp_max,
232                   uregs.xip, uregs.xbp);
233 
234    /* Assertion broken before main() is reached in pthreaded programs;  the
235     * offending stack traces only have one item.  --njn, 2002-aug-16 */
236    /* vg_assert(fp_min <= fp_max);*/
237    // On Darwin, this kicks in for pthread-related stack traces, so they're
238    // only 1 entry long which is wrong.
239 #  if defined(VGO_linux)
240    if (fp_min + 512 >= fp_max) {
241       /* If the stack limits look bogus, don't poke around ... but
242          don't bomb out either. */
243 #  elif defined(VGO_solaris)
244    if (fp_max == 0) {
245       /* VG_(get_StackTrace)() can be called by tools very early when
246          various tracing options are enabled. Don't proceed further
247          if the stack limits look bogus.
248        */
249 #  endif
250 #  if defined(VGO_linux) || defined(VGO_solaris)
251       if (sps) sps[0] = uregs.xsp;
252       if (fps) fps[0] = uregs.xbp;
253       ips[0] = uregs.xip;
254       return 1;
255    }
256 #  endif
257 
258    if (UNLIKELY (fp_CF_verif_generation != VG_(debuginfo_generation)())) {
259       fp_CF_verif_generation = VG_(debuginfo_generation)();
260       VG_(memset)(&fp_CF_verif_cache, 0, sizeof(fp_CF_verif_cache));
261       FPO_info_present = VG_(FPO_info_present)();
262    }
263 
264 
265    /* Loop unwinding the stack. Note that the IP value we get on
266     * each pass (whether from CFI info or a stack frame) is a
267     * return address so is actually after the calling instruction
268     * in the calling function.
269     *
270     * Because of this we subtract one from the IP after each pass
271     * of the loop so that we find the right CFI block on the next
272     * pass - otherwise we can find the wrong CFI info if it happens
273     * to change after the calling instruction and that will mean
274     * that we will fail to unwind the next step.
275     *
276     * This most frequently happens at the end of a function when
277     * a tail call occurs and we wind up using the CFI info for the
278     * next function which is completely wrong.
279     */
280    if (sps) sps[0] = uregs.xsp;
281    if (fps) fps[0] = uregs.xbp;
282    ips[0] = uregs.xip;
283    i = 1;
284    if (do_stats) stats.nr++;
285 
286    while (True) {
287 
288       if (i >= max_n_ips)
289          break;
290 
291       UWord hash = uregs.xip % N_FP_CF_VERIF;
292       Addr xip_verif = uregs.xip ^ fp_CF_verif_cache [hash];
293       if (debug)
294          VG_(printf)("     uregs.xip 0x%08lx xip_verif[0x%08lx]"
295                      " xbp 0x%08lx xsp 0x%08lx\n",
296                      uregs.xip, xip_verif,
297                      uregs.xbp, uregs.xsp);
298       // If xip is in cache, then xip_verif will be <= CFUNWIND.
299       // Otherwise, if not in cache, xip_verif will be > CFUNWIND.
300 
301       /* Try to derive a new (ip,sp,fp) triple from the current set. */
302 
303       /* Do we have to do CFI unwinding ?
304          We do CFI unwinding if one of the following condition holds:
305          a. fp_CF_verif_cache contains xip but indicates CFUNWIND must
306             be done (i.e. fp unwind check failed when we did the first
307             unwind for this IP).
308          b. fp_CF_verif_cache does not contain xip.
309             We will try CFI unwinding in fpverif_uregs and compare with
310             FP unwind result to insert xip in the cache with the correct
311             indicator. */
312       if (UNLIKELY(xip_verif >= CFUNWIND)) {
313          if (xip_verif == CFUNWIND) {
314             /* case a : do "real" cfi unwind */
315             if ( VG_(use_CF_info)( &uregs, fp_min, fp_max ) ) {
316                if (debug) unwind_case = "Ca";
317                if (do_stats) stats.Ca++;
318                goto unwind_done;
319             }
320             /* ??? cache indicates we have to do CFI unwind (so, we
321              previously found CFI info, and failed the fp unwind
322              check). Now, we just failed with CFI.  So, once we
323              succeed, once we fail.  No idea what is going on =>
324              cleanup the cache entry and fallover to fp unwind (this
325              time). */
326             fp_CF_verif_cache [hash] = 0;
327             if (debug) VG_(printf)("     cache reset as CFI ok then nok\n");
328             //??? stats
329             xip_verif = NOINFO;
330          } else {
331             /* case b : do "verif" cfi unwind in fpverif_uregs */
332             fpverif_uregs = uregs;
333             xip_verified = uregs.xip;
334             if ( !VG_(use_CF_info)( &fpverif_uregs, fp_min, fp_max ) ) {
335                fp_CF_verif_cache [hash] = uregs.xip ^ NOINFO;
336                if (debug) VG_(printf)("     cache NOINFO fpverif_uregs\n");
337                xip_verif = NOINFO;
338             }
339          }
340       }
341 
342       /* On x86, try the old-fashioned method of following the
343          %ebp-chain.  This can be done if the fp_CF_verif_cache for xip
344          indicate fp unwind is ok. This must be done if the cache indicates
345          there is no info. This is also done to confirm what to put in the cache
346          if xip was not in the cache. */
347       /* This deals with frames resulting from functions which begin "pushl%
348          ebp ; movl %esp, %ebp" which is the ABI-mandated preamble. */
349       if (fp_min <= uregs.xbp &&
350           uregs.xbp <= fp_max - 1 * sizeof(UWord)/*see comment below*/ &&
351           VG_IS_4_ALIGNED(uregs.xbp))
352       {
353          Addr old_xsp;
354 
355          /* fp looks sane, so use it. */
356          uregs.xip = (((UWord*)uregs.xbp)[1]);
357          // We stop if we hit a zero (the traditional end-of-stack
358          // marker) or a one -- these correspond to recorded IPs of 0 or -1.
359          // The latter because r8818 (in this file) changes the meaning of
360          // entries [1] and above in a stack trace, by subtracting 1 from
361          // them.  Hence stacks that used to end with a zero value now end in
362          // -1 and so we must detect that too.
363          if (0 == uregs.xip || 1 == uregs.xip) {
364             if (xip_verif > CFUNWIND) {
365                // Check if we obtain the same result with fp unwind.
366                // If same result, then mark xip as fp unwindable
367                if (uregs.xip == fpverif_uregs.xip) {
368                   fp_CF_verif_cache [hash] = xip_verified ^ FPUNWIND;
369                   if (debug) VG_(printf)("     cache FPUNWIND 0\n");
370                   unwind_case = "Fw";
371                   if (do_stats) stats.Fw++;
372                   break;
373                } else {
374                   fp_CF_verif_cache [hash] = xip_verified ^ CFUNWIND;
375                   uregs = fpverif_uregs;
376                   if (debug) VG_(printf)("     cache CFUNWIND 0\n");
377                   unwind_case = "Cf";
378                   if (do_stats) stats.Cf++;
379                   goto unwind_done;
380                }
381             } else {
382                // end of stack => out of the loop.
383                break;
384             }
385          }
386 
387          old_xsp = uregs.xsp;
388          uregs.xsp = uregs.xbp + sizeof(Addr) /*saved %ebp*/
389                                + sizeof(Addr) /*ra*/;
390          uregs.xbp = (((UWord*)uregs.xbp)[0]);
391          if (xip_verif > CFUNWIND) {
392             if (uregs.xip == fpverif_uregs.xip
393                 && uregs.xsp == fpverif_uregs.xsp
394                 && uregs.xbp == fpverif_uregs.xbp) {
395                fp_CF_verif_cache [hash] = xip_verified ^ FPUNWIND;
396                if (debug) VG_(printf)("     cache FPUNWIND >2\n");
397                if (debug) unwind_case = "FO";
398                if (do_stats) stats.FO++;
399                if (old_xsp >= uregs.xsp) {
400                   if (debug)
401                     VG_(printf) ("     FO end of stack old_xsp %p >= xsp %p\n",
402                                  (void*)old_xsp, (void*)uregs.xsp);
403                   break;
404                }
405             } else {
406                fp_CF_verif_cache [hash] = xip_verified ^ CFUNWIND;
407                if (debug) VG_(printf)("     cache CFUNWIND >2\n");
408                if (do_stats && uregs.xip != fpverif_uregs.xip) stats.xi++;
409                if (do_stats && uregs.xsp != fpverif_uregs.xsp) stats.xs++;
410                if (do_stats && uregs.xbp != fpverif_uregs.xbp) stats.xb++;
411                uregs = fpverif_uregs;
412                if (debug) unwind_case = "CF";
413                if (do_stats) stats.CF++;
414             }
415          } else {
416             if (debug) unwind_case = "FF";
417             if (do_stats) stats.FF++;
418             if (old_xsp >= uregs.xsp) {
419                if (debug)
420                   VG_(printf) ("     FF end of stack old_xsp %p >= xsp %p\n",
421                                (void*)old_xsp, (void*)uregs.xsp);
422                break;
423             }
424          }
425          goto unwind_done;
426       } else {
427          // fp unwind has failed.
428          // If we were checking the validity of the cfi unwinding,
429          // we mark in the cache that the fp unwind cannot be done, and that
430          // cfi unwind is desired.
431          if (xip_verif > CFUNWIND) {
432             // We know that fpverif_uregs contains valid information,
433             // as a failed cf unwind would have put NOINFO in xip_verif.
434             fp_CF_verif_cache [hash] = xip_verified ^ CFUNWIND;
435             if (debug) VG_(printf)("     cache CFUNWIND as fp failed\n");
436             uregs = fpverif_uregs;
437             if (debug) unwind_case = "Ck";
438             if (do_stats) stats.Ck++;
439             goto unwind_done;
440          }
441          // xip_verif is FPUNWIND or NOINFO.
442          // We failed the cfi unwind and/or the fp unwind.
443          // => fallback to FPO info.
444       }
445 
446       /* And, similarly, try for MSVC FPO unwind info. */
447       if (FPO_info_present
448           && VG_(use_FPO_info)( &uregs.xip, &uregs.xsp, &uregs.xbp,
449                                 VG_(current_DiEpoch)(),
450                                 fp_min, fp_max ) ) {
451          if (debug) unwind_case = "MS";
452          if (do_stats) stats.MS++;
453          goto unwind_done;
454       }
455 
456       /* No luck.  We have to give up. */
457       break;
458 
459    unwind_done:
460       /* Add a frame in ips/sps/fps */
461       /* fp is %ebp.  sp is %esp.  ip is %eip. */
462       if (0 == uregs.xip || 1 == uregs.xip) break;
463       if (sps) sps[i] = uregs.xsp;
464       if (fps) fps[i] = uregs.xbp;
465       ips[i++] = uregs.xip - 1;
466       /* -1: refer to calling insn, not the RA */
467       if (debug)
468          VG_(printf)("     ips%s[%d]=0x%08lx\n", unwind_case, i-1, ips[i-1]);
469       uregs.xip = uregs.xip - 1;
470       /* as per comment at the head of this loop */
471       RECURSIVE_MERGE(cmrf,ips,i);
472    }
473 
474    if (do_stats) stats.nf += i;
475    if (do_stats && stats.nr % 10000 == 0) {
476      VG_(printf)("nr %u nf %u "
477                  "Ca %u FF %u "
478                  "Cf %u "
479                  "Fw %u FO %u "
480                  "CF %u (xi %u xs %u xb %u) "
481                  "Ck %u MS %u\n",
482                  stats.nr, stats.nf,
483                  stats.Ca, stats.FF,
484                  stats.Cf,
485                  stats.Fw, stats.FO,
486                  stats.CF, stats.xi, stats.xs, stats.xb,
487                  stats.Ck, stats.MS);
488    }
489    n_found = i;
490    return n_found;
491 }
492 
493 #undef N_FP_CF_VERIF
494 #undef FPUNWIND
495 #undef NOINFO
496 #undef CFUNWIND
497 
498 #endif
499 
500 /* ----------------------- amd64 ------------------------ */
501 
502 #if defined(VGP_amd64_linux) || defined(VGP_amd64_darwin) \
503     || defined(VGP_amd64_solaris) || defined(VGP_amd64_dragonfly)
504 
505 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
506                                /*OUT*/Addr* ips, UInt max_n_ips,
507                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
508                                const UnwindStartRegs* startRegs,
509                                Addr fp_max_orig )
510 {
511    const Bool  debug = False;
512    Int   i;
513    Addr  fp_max;
514    UInt  n_found = 0;
515    const Int cmrf = VG_(clo_merge_recursive_frames);
516 
517    vg_assert(sizeof(Addr) == sizeof(UWord));
518    vg_assert(sizeof(Addr) == sizeof(void*));
519 
520    D3UnwindRegs uregs;
521    uregs.xip = startRegs->r_pc;
522    uregs.xsp = startRegs->r_sp;
523    uregs.xbp = startRegs->misc.AMD64.r_rbp;
524    Addr fp_min = uregs.xsp - VG_STACK_REDZONE_SZB;
525 
526    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
527       stopping when the trail goes cold, which we guess to be
528       when FP is not a reasonable stack location. */
529 
530    // JRS 2002-sep-17: hack, to round up fp_max to the end of the
531    // current page, at least.  Dunno if it helps.
532    // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
533    fp_max = VG_PGROUNDUP(fp_max_orig);
534    if (fp_max >= sizeof(Addr))
535       fp_max -= sizeof(Addr);
536 
537    if (debug)
538       VG_(printf)("max_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
539                   "fp_max=0x%lx ip=0x%lx fp=0x%lx\n",
540                   max_n_ips, fp_min, fp_max_orig, fp_max,
541                   uregs.xip, uregs.xbp);
542 
543    /* Assertion broken before main() is reached in pthreaded programs;  the
544     * offending stack traces only have one item.  --njn, 2002-aug-16 */
545    /* vg_assert(fp_min <= fp_max);*/
546    // On Darwin, this kicks in for pthread-related stack traces, so they're
547    // only 1 entry long which is wrong.
548 #  if defined(VGO_linux)
549    if (fp_min + 256 >= fp_max) {
550       /* If the stack limits look bogus, don't poke around ... but
551          don't bomb out either. */
552 #  elif defined(VGO_solaris)
553    if (fp_max == 0) {
554       /* VG_(get_StackTrace)() can be called by tools very early when
555          various tracing options are enabled. Don't proceed further
556          if the stack limits look bogus.
557        */
558 #  endif
559 #  if defined(VGO_linux) || defined(VGO_solaris)
560 
561       if (sps) sps[0] = uregs.xsp;
562       if (fps) fps[0] = uregs.xbp;
563       ips[0] = uregs.xip;
564       return 1;
565    }
566 #  endif
567 
568    /* fp is %rbp.  sp is %rsp.  ip is %rip. */
569 
570    ips[0] = uregs.xip;
571    if (sps) sps[0] = uregs.xsp;
572    if (fps) fps[0] = uregs.xbp;
573    i = 1;
574    if (debug)
575       VG_(printf)("     ipsS[%d]=%#08lx rbp %#08lx rsp %#08lx\n",
576                   i-1, ips[i-1], uregs.xbp, uregs.xsp);
577 
578 #  if defined(VGO_darwin)
579    if (VG_(is_valid_tid)(tid_if_known) &&
580       VG_(is_in_syscall)(tid_if_known) &&
581       i < max_n_ips) {
582       /* On Darwin, all the system call stubs have no function
583        * prolog.  So instead of top of the stack being a new
584        * frame comprising a saved BP and a return address, we
585        * just have the return address in the caller's frame.
586        * Adjust for this by recording the return address.
587        */
588       ips[i] = *(Addr *)uregs.xsp - 1;
589       if (sps) sps[i] = uregs.xsp;
590       if (fps) fps[i] = uregs.xbp;
591       i++;
592    }
593 #  endif
594 
595    /* Loop unwinding the stack. Note that the IP value we get on
596     * each pass (whether from CFI info or a stack frame) is a
597     * return address so is actually after the calling instruction
598     * in the calling function.
599     *
600     * Because of this we subtract one from the IP after each pass
601     * of the loop so that we find the right CFI block on the next
602     * pass - otherwise we can find the wrong CFI info if it happens
603     * to change after the calling instruction and that will mean
604     * that we will fail to unwind the next step.
605     *
606     * This most frequently happens at the end of a function when
607     * a tail call occurs and we wind up using the CFI info for the
608     * next function which is completely wrong.
609     */
610    while (True) {
611       Addr old_xsp;
612 
613       if (i >= max_n_ips)
614          break;
615 
616       old_xsp = uregs.xsp;
617 
618       /* Try to derive a new (ip,sp,fp) triple from the current set. */
619 
620       /* First off, see if there is any CFI info to hand which can
621          be used. */
622       if ( VG_(use_CF_info)( &uregs, fp_min, fp_max ) ) {
623          if (0 == uregs.xip || 1 == uregs.xip) break;
624          if (old_xsp >= uregs.xsp) {
625             if (debug)
626                VG_(printf) ("     CF end of stack old_xsp %p >= xsp %p\n",
627                             (void*)old_xsp, (void*)uregs.xsp);
628             break;
629          }
630          if (sps) sps[i] = uregs.xsp;
631          if (fps) fps[i] = uregs.xbp;
632          ips[i++] = uregs.xip - 1; /* -1: refer to calling insn, not the RA */
633          if (debug)
634             VG_(printf)("     ipsC[%d]=%#08lx rbp %#08lx rsp %#08lx\n",
635                         i-1, ips[i-1], uregs.xbp, uregs.xsp);
636          uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
637          RECURSIVE_MERGE(cmrf,ips,i);
638          continue;
639       }
640 
641 #if defined(VGO_dragonfly)
642       const NSegment *seg;
643       const HChar *filename = NULL;
644       int match = 0;
645 
646       seg = VG_(am_find_nsegment)(uregs.xip);
647       if (seg != NULL) {
648          filename = VG_(am_get_filename)(seg);
649       }
650       if (filename != NULL && VG_(strstr)(filename, "/libc.so")) {
651          match = 1;
652       }
653       if (match == 1 && fp_min <= uregs.xsp &&
654 	  uregs.xsp <= fp_max - 1 * sizeof(UWord)) {
655          /* fp looks sane, so use it. */
656          uregs.xip = (((UWord*)uregs.xsp)[0]);
657          if (0 == uregs.xip || 1 == uregs.xip) break;
658          if (fps) fps[i] = uregs.xsp;
659          uregs.xsp = uregs.xsp + sizeof(Addr) /*ra*/;
660          if (sps) sps[i] = uregs.xsp;
661          ips[i++] = uregs.xip - 1; /* -1: refer to calling insn, not the RA */
662          if (debug)
663             VG_(printf)("     ipsFF[%d]=%#08lx\n", i-1, ips[i-1]);
664          uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
665          continue;
666       }
667 #endif
668 
669       /* If VG_(use_CF_info) fails, it won't modify ip/sp/fp, so
670          we can safely try the old-fashioned method. */
671       /* This bit is supposed to deal with frames resulting from
672          functions which begin "pushq %rbp ; movq %rsp, %rbp".
673          Unfortunately, since we can't (easily) look at the insns at
674          the start of the fn, like GDB does, there's no reliable way
675          to tell.  Hence the hack of first trying out CFI, and if that
676          fails, then use this as a fallback. */
677       /* Note: re "- 1 * sizeof(UWord)", need to take account of the
678          fact that we are prodding at & ((UWord*)fp)[1] and so need to
679          adjust the limit check accordingly.  Omitting this has been
680          observed to cause segfaults on rare occasions. */
681       if (fp_min <= uregs.xbp && uregs.xbp <= fp_max - 1 * sizeof(UWord)) {
682          /* fp looks sane, so use it. */
683          uregs.xip = (((UWord*)uregs.xbp)[1]);
684          if (0 == uregs.xip || 1 == uregs.xip) break;
685          uregs.xsp = uregs.xbp + sizeof(Addr) /*saved %rbp*/
686                                + sizeof(Addr) /*ra*/;
687          if (old_xsp >= uregs.xsp) {
688             if (debug)
689                VG_(printf) ("     FF end of stack old_xsp %p >= xsp %p\n",
690                             (void*)old_xsp, (void*)uregs.xsp);
691             break;
692          }
693          uregs.xbp = (((UWord*)uregs.xbp)[0]);
694          if (sps) sps[i] = uregs.xsp;
695          if (fps) fps[i] = uregs.xbp;
696          ips[i++] = uregs.xip - 1; /* -1: refer to calling insn, not the RA */
697          if (debug)
698             VG_(printf)("     ipsF[%d]=%#08lx rbp %#08lx rsp %#08lx\n",
699                         i-1, ips[i-1], uregs.xbp, uregs.xsp);
700          uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
701          RECURSIVE_MERGE(cmrf,ips,i);
702          continue;
703       }
704 
705       /* Last-ditch hack (evidently GDB does something similar).  We
706          are in the middle of nowhere and we have a nonsense value for
707          the frame pointer.  If the stack pointer is still valid,
708          assume that what it points at is a return address.  Yes,
709          desperate measures.  Could do better here:
710          - check that the supposed return address is in
711            an executable page
712          - check that the supposed return address is just after a call insn
713          - given those two checks, don't just consider *sp as the return
714            address; instead scan a likely section of stack (eg sp .. sp+256)
715            and use suitable values found there.
716       */
717       if (fp_min <= uregs.xsp && uregs.xsp < fp_max) {
718          uregs.xip = ((UWord*)uregs.xsp)[0];
719          if (0 == uregs.xip || 1 == uregs.xip) break;
720          if (sps) sps[i] = uregs.xsp;
721          if (fps) fps[i] = uregs.xbp;
722          ips[i++] = uregs.xip == 0
723                     ? 0 /* sp[0] == 0 ==> stuck at the bottom of a
724                            thread stack */
725                     : uregs.xip - 1;
726                         /* -1: refer to calling insn, not the RA */
727          if (debug)
728             VG_(printf)("     ipsH[%d]=%#08lx\n", i-1, ips[i-1]);
729          uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
730          uregs.xsp += 8;
731          RECURSIVE_MERGE(cmrf,ips,i);
732          continue;
733       }
734 
735       /* No luck at all.  We have to give up. */
736       break;
737    }
738 
739    n_found = i;
740    return n_found;
741 }
742 
743 #endif
744 
745 /* -----------------------ppc32/64 ---------------------- */
746 
747 #if defined(VGP_ppc32_linux) || defined(VGP_ppc64be_linux) \
748     || defined(VGP_ppc64le_linux)
749 
750 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
751                                /*OUT*/Addr* ips, UInt max_n_ips,
752                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
753                                const UnwindStartRegs* startRegs,
754                                Addr fp_max_orig )
755 {
756    Bool  lr_is_first_RA = False;
757 #  if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
758    Word redir_stack_size = 0;
759    Word redirs_used      = 0;
760 #  endif
761    const Int cmrf = VG_(clo_merge_recursive_frames);
762    const DiEpoch cur_ep = VG_(current_DiEpoch)();
763 
764    Bool  debug = False;
765    Int   i;
766    Addr  fp_max;
767    UInt  n_found = 0;
768 
769    vg_assert(sizeof(Addr) == sizeof(UWord));
770    vg_assert(sizeof(Addr) == sizeof(void*));
771 
772    Addr ip = (Addr)startRegs->r_pc;
773    Addr sp = (Addr)startRegs->r_sp;
774    Addr fp = sp;
775 #  if defined(VGP_ppc32_linux)
776    Addr lr = startRegs->misc.PPC32.r_lr;
777 #  elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
778    Addr lr = startRegs->misc.PPC64.r_lr;
779 #  endif
780    Addr fp_min = sp - VG_STACK_REDZONE_SZB;
781 
782    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
783       stopping when the trail goes cold, which we guess to be
784       when FP is not a reasonable stack location. */
785 
786    // JRS 2002-sep-17: hack, to round up fp_max to the end of the
787    // current page, at least.  Dunno if it helps.
788    // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
789    fp_max = VG_PGROUNDUP(fp_max_orig);
790    if (fp_max >= sizeof(Addr))
791       fp_max -= sizeof(Addr);
792 
793    if (debug)
794       VG_(printf)("max_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
795                   "fp_max=0x%lx ip=0x%lx fp=0x%lx\n",
796 		  max_n_ips, fp_min, fp_max_orig, fp_max, ip, fp);
797 
798    /* Assertion broken before main() is reached in pthreaded programs;  the
799     * offending stack traces only have one item.  --njn, 2002-aug-16 */
800    /* vg_assert(fp_min <= fp_max);*/
801    if (fp_min + 512 >= fp_max) {
802       /* If the stack limits look bogus, don't poke around ... but
803          don't bomb out either. */
804       if (sps) sps[0] = sp;
805       if (fps) fps[0] = fp;
806       ips[0] = ip;
807       return 1;
808    }
809 
810    /* fp is %r1.  ip is %cia.  Note, ppc uses r1 as both the stack and
811       frame pointers. */
812 
813 #  if defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
814    redir_stack_size = VEX_GUEST_PPC64_REDIR_STACK_SIZE;
815    redirs_used      = 0;
816 #  endif
817 
818 #  if defined(VG_PLAT_USES_PPCTOC) || defined (VGP_ppc64le_linux)
819    /* Deal with bogus LR values caused by function
820       interception/wrapping on ppc-TOC platforms; see comment on
821       similar code a few lines further down. */
822    if (lr == (Addr)&VG_(ppctoc_magic_redirect_return_stub)
823        && VG_(is_valid_tid)(tid_if_known)) {
824       Word hsp = VG_(threads)[tid_if_known].arch.vex.guest_REDIR_SP;
825       redirs_used++;
826       if (hsp >= 1 && hsp < redir_stack_size)
827          lr = VG_(threads)[tid_if_known]
828                  .arch.vex.guest_REDIR_STACK[hsp-1];
829    }
830 #  endif
831 
832    /* We have to determine whether or not LR currently holds this fn
833       (call it F)'s return address.  It might not if F has previously
834       called some other function, hence overwriting LR with a pointer
835       to some part of F.  Hence if LR and IP point to the same
836       function then we conclude LR does not hold this function's
837       return address; instead the LR at entry must have been saved in
838       the stack by F's prologue and so we must get it from there
839       instead.  Note all this guff only applies to the innermost
840       frame. */
841    lr_is_first_RA = False;
842    {
843       const HChar *buf_lr, *buf_ip;
844       /* The following conditional looks grossly inefficient and
845          surely could be majorly improved, with not much effort. */
846       if (VG_(get_fnname_raw) (cur_ep, lr, &buf_lr)) {
847          HChar buf_lr_copy[VG_(strlen)(buf_lr) + 1];
848          VG_(strcpy)(buf_lr_copy, buf_lr);
849          if (VG_(get_fnname_raw) (cur_ep, ip, &buf_ip))
850             if (VG_(strcmp)(buf_lr_copy, buf_ip))
851                lr_is_first_RA = True;
852       }
853    }
854 
855    if (sps) sps[0] = fp; /* NB. not sp */
856    if (fps) fps[0] = fp;
857    ips[0] = ip;
858    i = 1;
859 
860    if (fp_min <= fp && fp < fp_max-VG_WORDSIZE+1) {
861 
862       /* initial FP is sane; keep going */
863       fp = (((UWord*)fp)[0]);
864 
865       while (True) {
866 
867         /* On ppc64-linux (ppc64-elf, really), the lr save
868            slot is 2 words back from sp, whereas on ppc32-elf(?) it's
869            only one word back. */
870 #        if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
871          const Int lr_offset = 2;
872 #        else
873          const Int lr_offset = 1;
874 #        endif
875 
876          if (i >= max_n_ips)
877             break;
878 
879          /* Try to derive a new (ip,fp) pair from the current set. */
880 
881          if (fp_min <= fp && fp <= fp_max - lr_offset * sizeof(UWord)) {
882             /* fp looks sane, so use it. */
883 
884             if (i == 1 && lr_is_first_RA)
885                ip = lr;
886             else
887                ip = (((UWord*)fp)[lr_offset]);
888 
889 #           if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
890             /* Nasty hack to do with function replacement/wrapping on
891                ppc64-linux.  If LR points to our magic return stub,
892                then we are in a wrapped or intercepted function, in
893                which LR has been messed with.  The original LR will
894                have been pushed onto the thread's hidden REDIR stack
895                one down from the top (top element is the saved R2) and
896                so we should restore the value from there instead.
897                Since nested redirections can and do happen, we keep
898                track of the number of nested LRs used by the unwinding
899                so far with 'redirs_used'. */
900             if (ip == (Addr)&VG_(ppctoc_magic_redirect_return_stub)
901                 && VG_(is_valid_tid)(tid_if_known)) {
902                Word hsp = VG_(threads)[tid_if_known]
903                              .arch.vex.guest_REDIR_SP;
904                hsp -= 2 * redirs_used;
905                redirs_used ++;
906                if (hsp >= 1 && hsp < redir_stack_size)
907                   ip = VG_(threads)[tid_if_known]
908                           .arch.vex.guest_REDIR_STACK[hsp-1];
909             }
910 #           endif
911 
912             if (0 == ip || 1 == ip) break;
913             if (sps) sps[i] = fp; /* NB. not sp */
914             if (fps) fps[i] = fp;
915             fp = (((UWord*)fp)[0]);
916             ips[i++] = ip - 1; /* -1: refer to calling insn, not the RA */
917             if (debug)
918                VG_(printf)("     ipsF[%d]=%#08lx\n", i-1, ips[i-1]);
919             ip = ip - 1; /* ip is probably dead at this point, but
920                             play safe, a la x86/amd64 above.  See
921                             extensive comments above. */
922             RECURSIVE_MERGE(cmrf,ips,i);
923             continue;
924          }
925 
926          /* No luck there.  We have to give up. */
927          break;
928       }
929    }
930 
931    n_found = i;
932    return n_found;
933 }
934 
935 #endif
936 
937 /* ------------------------ arm ------------------------- */
938 
939 #if defined(VGP_arm_linux)
940 
941 static Bool in_same_fn ( Addr a1, Addr a2 )
942 {
943    const HChar *buf_a1, *buf_a2;
944    /* The following conditional looks grossly inefficient and
945       surely could be majorly improved, with not much effort. */
946    const DiEpoch cur_ep = VG_(current_DiEpoch)();
947    if (VG_(get_fnname_raw) (cur_ep, a1, &buf_a1)) {
948       HChar buf_a1_copy[VG_(strlen)(buf_a1) + 1];
949       VG_(strcpy)(buf_a1_copy, buf_a1);
950       if (VG_(get_fnname_raw) (cur_ep, a2, &buf_a2))
951          if (VG_(strcmp)(buf_a1_copy, buf_a2))
952             return True;
953    }
954    return False;
955 }
956 
957 static Bool in_same_page ( Addr a1, Addr a2 ) {
958    return (a1 & ~0xFFF) == (a2 & ~0xFFF);
959 }
960 
961 static Addr abs_diff ( Addr a1, Addr a2 ) {
962    return (Addr)(a1 > a2 ? a1 - a2 : a2 - a1);
963 }
964 
965 static Bool has_XT_perms ( Addr a )
966 {
967    NSegment const* seg = VG_(am_find_nsegment)(a);
968    return seg && seg->hasX && seg->hasT;
969 }
970 
971 static Bool looks_like_Thumb_call32 ( UShort w0, UShort w1 )
972 {
973    if (0)
974       VG_(printf)("isT32call %04x %04x\n", (UInt)w0, (UInt)w1);
975    // BL  simm26
976    if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) return True;
977    // BLX simm26
978    if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) return True;
979    return False;
980 }
981 
982 static Bool looks_like_Thumb_call16 ( UShort w0 )
983 {
984    return False;
985 }
986 
987 static Bool looks_like_ARM_call ( UInt a0 )
988 {
989    if (0)
990       VG_(printf)("isA32call %08x\n", a0);
991    // Leading E forces unconditional only -- fix
992    if ((a0 & 0xFF000000) == 0xEB000000) return True;
993    return False;
994 }
995 
996 static Bool looks_like_RA ( Addr ra )
997 {
998    /* 'ra' is a plausible return address if it points to
999        an instruction after a call insn. */
1000    Bool isT = (ra & 1);
1001    if (isT) {
1002       // returning to Thumb code
1003       ra &= ~1;
1004       ra -= 4;
1005       if (has_XT_perms(ra)) {
1006          UShort w0 = *(UShort*)ra;
1007          UShort w1 = in_same_page(ra, ra+2) ? *(UShort*)(ra+2) : 0;
1008          if (looks_like_Thumb_call16(w1) || looks_like_Thumb_call32(w0,w1))
1009             return True;
1010       }
1011    } else {
1012       // ARM
1013       ra &= ~3;
1014       ra -= 4;
1015       if (has_XT_perms(ra)) {
1016          UInt a0 = *(UInt*)ra;
1017          if (looks_like_ARM_call(a0))
1018             return True;
1019       }
1020    }
1021    return False;
1022 }
1023 
1024 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
1025                                /*OUT*/Addr* ips, UInt max_n_ips,
1026                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
1027                                const UnwindStartRegs* startRegs,
1028                                Addr fp_max_orig )
1029 {
1030    Bool  debug = False;
1031    Int   i;
1032    Addr  fp_max;
1033    UInt  n_found = 0;
1034    const Int cmrf = VG_(clo_merge_recursive_frames);
1035 
1036    vg_assert(sizeof(Addr) == sizeof(UWord));
1037    vg_assert(sizeof(Addr) == sizeof(void*));
1038 
1039    D3UnwindRegs uregs;
1040    uregs.r15 = startRegs->r_pc & 0xFFFFFFFE;
1041    uregs.r14 = startRegs->misc.ARM.r14;
1042    uregs.r13 = startRegs->r_sp;
1043    uregs.r12 = startRegs->misc.ARM.r12;
1044    uregs.r11 = startRegs->misc.ARM.r11;
1045    uregs.r7  = startRegs->misc.ARM.r7;
1046    Addr fp_min = uregs.r13 - VG_STACK_REDZONE_SZB;
1047 
1048    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
1049       stopping when the trail goes cold, which we guess to be
1050       when FP is not a reasonable stack location. */
1051 
1052    // JRS 2002-sep-17: hack, to round up fp_max to the end of the
1053    // current page, at least.  Dunno if it helps.
1054    // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
1055    fp_max = VG_PGROUNDUP(fp_max_orig);
1056    if (fp_max >= sizeof(Addr))
1057       fp_max -= sizeof(Addr);
1058 
1059    if (debug)
1060       VG_(printf)("\nmax_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
1061                   "fp_max=0x%lx r15=0x%lx r13=0x%lx\n",
1062                   max_n_ips, fp_min, fp_max_orig, fp_max,
1063                   uregs.r15, uregs.r13);
1064 
1065    /* Assertion broken before main() is reached in pthreaded programs;  the
1066     * offending stack traces only have one item.  --njn, 2002-aug-16 */
1067    /* vg_assert(fp_min <= fp_max);*/
1068    // On Darwin, this kicks in for pthread-related stack traces, so they're
1069    // only 1 entry long which is wrong.
1070    if (fp_min + 512 >= fp_max) {
1071       /* If the stack limits look bogus, don't poke around ... but
1072          don't bomb out either. */
1073       if (sps) sps[0] = uregs.r13;
1074       if (fps) fps[0] = 0;
1075       ips[0] = uregs.r15;
1076       return 1;
1077    }
1078 
1079    /* */
1080 
1081    if (sps) sps[0] = uregs.r13;
1082    if (fps) fps[0] = 0;
1083    ips[0] = uregs.r15;
1084    i = 1;
1085 
1086    /* Loop unwinding the stack. */
1087    Bool do_stack_scan = False;
1088 
1089    /* First try the Official Way, using Dwarf CFI. */
1090    while (True) {
1091       if (debug) {
1092          VG_(printf)("i: %d, r15: 0x%lx, r13: 0x%lx\n",
1093                      i, uregs.r15, uregs.r13);
1094       }
1095 
1096       if (i >= max_n_ips)
1097          break;
1098 
1099       if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
1100          if (sps) sps[i] = uregs.r13;
1101          if (fps) fps[i] = 0;
1102          ips[i++] = (uregs.r15 & 0xFFFFFFFE) - 1;
1103          if (debug)
1104             VG_(printf)("USING CFI: r15: 0x%lx, r13: 0x%lx\n",
1105                         uregs.r15, uregs.r13);
1106          uregs.r15 = (uregs.r15 & 0xFFFFFFFE) - 1;
1107          RECURSIVE_MERGE(cmrf,ips,i);
1108          continue;
1109       }
1110 
1111       /* No luck.  We have to give up. */
1112       do_stack_scan = True;
1113       break;
1114    }
1115 
1116    /* Now try Plan B (maybe) -- stack scanning.  This often gives
1117       pretty bad results, so this has to be enabled explicitly by the
1118       user. */
1119    if (do_stack_scan
1120        && i < max_n_ips && i < (Int)VG_(clo_unw_stack_scan_thresh)) {
1121       Int  nByStackScan = 0;
1122       Addr lr = uregs.r14;
1123       Addr sp = uregs.r13 & ~3;
1124       Addr pc = uregs.r15;
1125       // First see if LR contains
1126       // something that could be a valid return address.
1127       if (!in_same_fn(lr, pc) && looks_like_RA(lr)) {
1128          // take it only if 'cand' isn't obviously a duplicate
1129          // of the last found IP value
1130          Addr cand = (lr & 0xFFFFFFFE) - 1;
1131          if (abs_diff(cand, ips[i-1]) > 1) {
1132             if (sps) sps[i] = 0;
1133             if (fps) fps[i] = 0;
1134             ips[i++] = cand;
1135             RECURSIVE_MERGE(cmrf,ips,i);
1136             nByStackScan++;
1137          }
1138       }
1139       while (in_same_page(sp, uregs.r13)) {
1140          if (i >= max_n_ips)
1141             break;
1142          // we're in the same page; fairly safe to keep going
1143          UWord w = *(UWord*)(sp & ~0x3);
1144          if (looks_like_RA(w)) {
1145             Addr cand = (w & 0xFFFFFFFE) - 1;
1146             // take it only if 'cand' isn't obviously a duplicate
1147             // of the last found IP value
1148             if (abs_diff(cand, ips[i-1]) > 1) {
1149                if (sps) sps[i] = 0;
1150                if (fps) fps[i] = 0;
1151                ips[i++] = cand;
1152                RECURSIVE_MERGE(cmrf,ips,i);
1153                if (++nByStackScan >= VG_(clo_unw_stack_scan_frames)) break;
1154             }
1155          }
1156          sp += 4;
1157       }
1158    }
1159 
1160    n_found = i;
1161    return n_found;
1162 }
1163 
1164 #endif
1165 
1166 /* ------------------------ arm64 ------------------------- */
1167 
1168 #if defined(VGP_arm64_linux)
1169 
1170 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
1171                                /*OUT*/Addr* ips, UInt max_n_ips,
1172                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
1173                                const UnwindStartRegs* startRegs,
1174                                Addr fp_max_orig )
1175 {
1176    Bool  debug = False;
1177    Int   i;
1178    Addr  fp_max;
1179    UInt  n_found = 0;
1180    const Int cmrf = VG_(clo_merge_recursive_frames);
1181 
1182    vg_assert(sizeof(Addr) == sizeof(UWord));
1183    vg_assert(sizeof(Addr) == sizeof(void*));
1184 
1185    D3UnwindRegs uregs;
1186    uregs.pc = startRegs->r_pc;
1187    uregs.sp = startRegs->r_sp;
1188    uregs.x30 = startRegs->misc.ARM64.x30;
1189    uregs.x29 = startRegs->misc.ARM64.x29;
1190    Addr fp_min = uregs.sp - VG_STACK_REDZONE_SZB;
1191 
1192    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
1193       stopping when the trail goes cold, which we guess to be
1194       when FP is not a reasonable stack location. */
1195 
1196    // JRS 2002-sep-17: hack, to round up fp_max to the end of the
1197    // current page, at least.  Dunno if it helps.
1198    // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
1199    fp_max = VG_PGROUNDUP(fp_max_orig);
1200    if (fp_max >= sizeof(Addr))
1201       fp_max -= sizeof(Addr);
1202 
1203    if (debug)
1204       VG_(printf)("\nmax_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
1205                   "fp_max=0x%lx PC=0x%lx SP=0x%lx\n",
1206                   max_n_ips, fp_min, fp_max_orig, fp_max,
1207                   uregs.pc, uregs.sp);
1208 
1209    /* Assertion broken before main() is reached in pthreaded programs;  the
1210     * offending stack traces only have one item.  --njn, 2002-aug-16 */
1211    /* vg_assert(fp_min <= fp_max);*/
1212    // On Darwin, this kicks in for pthread-related stack traces, so they're
1213    // only 1 entry long which is wrong.
1214    if (fp_min + 512 >= fp_max) {
1215       /* If the stack limits look bogus, don't poke around ... but
1216          don't bomb out either. */
1217       if (sps) sps[0] = uregs.sp;
1218       if (fps) fps[0] = uregs.x29;
1219       ips[0] = uregs.pc;
1220       return 1;
1221    }
1222 
1223    /* */
1224 
1225    if (sps) sps[0] = uregs.sp;
1226    if (fps) fps[0] = uregs.x29;
1227    ips[0] = uregs.pc;
1228    i = 1;
1229 
1230    /* Loop unwinding the stack, using CFI. */
1231    while (True) {
1232       if (debug) {
1233          VG_(printf)("i: %d, pc: 0x%lx, sp: 0x%lx\n",
1234                      i, uregs.pc, uregs.sp);
1235       }
1236 
1237       if (i >= max_n_ips)
1238          break;
1239 
1240       if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
1241          if (sps) sps[i] = uregs.sp;
1242          if (fps) fps[i] = uregs.x29;
1243          ips[i++] = uregs.pc - 1;
1244          if (debug)
1245             VG_(printf)("USING CFI: pc: 0x%lx, sp: 0x%lx\n",
1246                         uregs.pc, uregs.sp);
1247          uregs.pc = uregs.pc - 1;
1248          RECURSIVE_MERGE(cmrf,ips,i);
1249          continue;
1250       }
1251 
1252       /* No luck.  We have to give up. */
1253       break;
1254    }
1255 
1256    n_found = i;
1257    return n_found;
1258 }
1259 
1260 #endif
1261 
1262 /* ------------------------ s390x ------------------------- */
1263 
1264 #if defined(VGP_s390x_linux)
1265 
1266 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
1267                                /*OUT*/Addr* ips, UInt max_n_ips,
1268                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
1269                                const UnwindStartRegs* startRegs,
1270                                Addr fp_max_orig )
1271 {
1272    Bool  debug = False;
1273    Int   i;
1274    Addr  fp_max;
1275    UInt  n_found = 0;
1276    const Int cmrf = VG_(clo_merge_recursive_frames);
1277 
1278    vg_assert(sizeof(Addr) == sizeof(UWord));
1279    vg_assert(sizeof(Addr) == sizeof(void*));
1280 
1281    D3UnwindRegs uregs;
1282    uregs.ia = startRegs->r_pc;
1283    uregs.sp = startRegs->r_sp;
1284    Addr fp_min = uregs.sp - VG_STACK_REDZONE_SZB;
1285    uregs.fp = startRegs->misc.S390X.r_fp;
1286    uregs.lr = startRegs->misc.S390X.r_lr;
1287 
1288    fp_max = VG_PGROUNDUP(fp_max_orig);
1289    if (fp_max >= sizeof(Addr))
1290       fp_max -= sizeof(Addr);
1291 
1292    if (debug)
1293       VG_(printf)("max_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
1294                   "fp_max=0x%lx IA=0x%lx SP=0x%lx FP=0x%lx\n",
1295                   max_n_ips, fp_min, fp_max_orig, fp_max,
1296                   uregs.ia, uregs.sp,uregs.fp);
1297 
1298    /* The first frame is pretty obvious */
1299    ips[0] = uregs.ia;
1300    if (sps) sps[0] = uregs.sp;
1301    if (fps) fps[0] = uregs.fp;
1302    i = 1;
1303 
1304    /* for everything else we have to rely on the eh_frame. gcc defaults to
1305       not create a backchain and all the other  tools (like gdb) also have
1306       to use the CFI. */
1307    while (True) {
1308       if (i >= max_n_ips)
1309          break;
1310 
1311       if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
1312          if (sps) sps[i] = uregs.sp;
1313          if (fps) fps[i] = uregs.fp;
1314          ips[i++] = uregs.ia - 1;
1315          uregs.ia = uregs.ia - 1;
1316          RECURSIVE_MERGE(cmrf,ips,i);
1317          continue;
1318       }
1319       /* A problem on the first frame? Lets assume it was a bad jump.
1320          We will use the link register and the current stack and frame
1321          pointers and see if we can use the CFI in the next round. */
1322       if (i == 1) {
1323          if (sps) {
1324             sps[i] = sps[0];
1325             uregs.sp = sps[0];
1326          }
1327          if (fps) {
1328             fps[i] = fps[0];
1329             uregs.fp = fps[0];
1330          }
1331          uregs.ia = uregs.lr - 1;
1332          ips[i++] = uregs.lr - 1;
1333          RECURSIVE_MERGE(cmrf,ips,i);
1334          continue;
1335       }
1336 
1337       /* No luck.  We have to give up. */
1338       break;
1339    }
1340 
1341    n_found = i;
1342    return n_found;
1343 }
1344 
1345 #endif
1346 
1347 /* ------------------------ mips 32/64 ------------------------- */
1348 #if defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
1349 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
1350                                /*OUT*/Addr* ips, UInt max_n_ips,
1351                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
1352                                const UnwindStartRegs* startRegs,
1353                                Addr fp_max_orig )
1354 {
1355    Bool  debug = False;
1356    Int   i;
1357    Addr  fp_max;
1358    UInt  n_found = 0;
1359    const Int cmrf = VG_(clo_merge_recursive_frames);
1360 
1361    vg_assert(sizeof(Addr) == sizeof(UWord));
1362    vg_assert(sizeof(Addr) == sizeof(void*));
1363 
1364    D3UnwindRegs uregs;
1365    uregs.pc = startRegs->r_pc;
1366    uregs.sp = startRegs->r_sp;
1367    Addr fp_min = uregs.sp - VG_STACK_REDZONE_SZB;
1368 
1369 #if defined(VGP_mips32_linux)
1370    uregs.fp = startRegs->misc.MIPS32.r30;
1371    uregs.ra = startRegs->misc.MIPS32.r31;
1372 #elif defined(VGP_mips64_linux)
1373    uregs.fp = startRegs->misc.MIPS64.r30;
1374    uregs.ra = startRegs->misc.MIPS64.r31;
1375 #endif
1376 
1377    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
1378       stopping when the trail goes cold, which we guess to be
1379       when FP is not a reasonable stack location. */
1380 
1381    fp_max = VG_PGROUNDUP(fp_max_orig);
1382    if (fp_max >= sizeof(Addr))
1383       fp_max -= sizeof(Addr);
1384 
1385    if (debug)
1386       VG_(printf)("max_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
1387                   "fp_max=0x%lx pc=0x%lx sp=0x%lx fp=0x%lx\n",
1388                   max_n_ips, fp_min, fp_max_orig, fp_max,
1389                   uregs.pc, uregs.sp, uregs.fp);
1390 
1391    if (sps) sps[0] = uregs.sp;
1392    if (fps) fps[0] = uregs.fp;
1393    ips[0] = uregs.pc;
1394    i = 1;
1395 
1396    /* Loop unwinding the stack. */
1397 
1398    while (True) {
1399       if (debug) {
1400          VG_(printf)("i: %d, pc: 0x%lx, sp: 0x%lx, ra: 0x%lx\n",
1401                      i, uregs.pc, uregs.sp, uregs.ra);
1402       }
1403       if (i >= max_n_ips)
1404          break;
1405 
1406       D3UnwindRegs uregs_copy = uregs;
1407       if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
1408          if (debug)
1409             VG_(printf)("USING CFI: pc: 0x%lx, sp: 0x%lx, ra: 0x%lx\n",
1410                         uregs.pc, uregs.sp, uregs.ra);
1411          if (0 != uregs.pc && 1 != uregs.pc) {
1412             if (sps) sps[i] = uregs.sp;
1413             if (fps) fps[i] = uregs.fp;
1414             ips[i++] = uregs.pc - 4;
1415             uregs.pc = uregs.pc - 4;
1416             RECURSIVE_MERGE(cmrf,ips,i);
1417             continue;
1418          } else
1419             uregs = uregs_copy;
1420       }
1421 
1422       int seen_sp_adjust = 0;
1423       long frame_offset = 0;
1424       PtrdiffT offset;
1425       const DiEpoch cur_ep = VG_(current_DiEpoch)();
1426       if (VG_(get_inst_offset_in_function)(cur_ep, uregs.pc, &offset)) {
1427          Addr start_pc = uregs.pc - offset;
1428          Addr limit_pc = uregs.pc;
1429          Addr cur_pc;
1430          for (cur_pc = start_pc; cur_pc < limit_pc; cur_pc += 4) {
1431             unsigned long inst, high_word, low_word;
1432             unsigned long * cur_inst;
1433             /* Fetch the instruction.   */
1434             cur_inst = (unsigned long *)cur_pc;
1435             inst = *((UInt *) cur_inst);
1436             if(debug)
1437                VG_(printf)("cur_pc: 0x%lx, inst: 0x%lx\n", cur_pc, inst);
1438 
1439             /* Save some code by pre-extracting some useful fields.  */
1440             high_word = (inst >> 16) & 0xffff;
1441             low_word = inst & 0xffff;
1442 
1443             if (high_word == 0x27bd        /* addiu $sp,$sp,-i */
1444                 || high_word == 0x23bd     /* addi $sp,$sp,-i */
1445                 || high_word == 0x67bd) {  /* daddiu $sp,$sp,-i */
1446                if (low_word & 0x8000)	/* negative stack adjustment? */
1447                   frame_offset += 0x10000 - low_word;
1448                else
1449                   /* Exit loop if a positive stack adjustment is found, which
1450                      usually means that the stack cleanup code in the function
1451                      epilogue is reached.  */
1452                break;
1453             seen_sp_adjust = 1;
1454             }
1455          }
1456          if(debug)
1457             VG_(printf)("offset: 0x%lx\n", frame_offset);
1458       }
1459       if (seen_sp_adjust) {
1460          if (0 == uregs.pc || 1 == uregs.pc) break;
1461          if (uregs.pc == uregs.ra - 8) break;
1462          if (sps) {
1463             sps[i] = uregs.sp + frame_offset;
1464          }
1465          uregs.sp = uregs.sp + frame_offset;
1466 
1467          if (fps) {
1468             fps[i] = fps[0];
1469             uregs.fp = fps[0];
1470          }
1471          if (0 == uregs.ra || 1 == uregs.ra) break;
1472          uregs.pc = uregs.ra - 8;
1473          ips[i++] = uregs.ra - 8;
1474          RECURSIVE_MERGE(cmrf,ips,i);
1475          continue;
1476       }
1477 
1478       if (i == 1) {
1479          if (sps) {
1480             sps[i] = sps[0];
1481             uregs.sp = sps[0];
1482          }
1483          if (fps) {
1484             fps[i] = fps[0];
1485             uregs.fp = fps[0];
1486          }
1487          if (0 == uregs.ra || 1 == uregs.ra) break;
1488          uregs.pc = uregs.ra - 8;
1489          ips[i++] = uregs.ra - 8;
1490          RECURSIVE_MERGE(cmrf,ips,i);
1491          continue;
1492       }
1493       /* No luck.  We have to give up. */
1494       break;
1495    }
1496 
1497    n_found = i;
1498    return n_found;
1499 }
1500 
1501 #endif
1502 
1503 /*------------------------------------------------------------*/
1504 /*---                                                      ---*/
1505 /*--- END platform-dependent unwinder worker functions     ---*/
1506 /*---                                                      ---*/
1507 /*------------------------------------------------------------*/
1508 
1509 /*------------------------------------------------------------*/
1510 /*--- Exported functions.                                  ---*/
1511 /*------------------------------------------------------------*/
1512 
1513 UInt VG_(get_StackTrace_with_deltas)(
1514          ThreadId tid,
1515          /*OUT*/StackTrace ips, UInt n_ips,
1516          /*OUT*/StackTrace sps,
1517          /*OUT*/StackTrace fps,
1518          Word first_ip_delta,
1519          Word first_sp_delta
1520       )
1521 {
1522    /* Get the register values with which to start the unwind. */
1523    UnwindStartRegs startRegs;
1524    VG_(memset)( &startRegs, 0, sizeof(startRegs) );
1525    VG_(get_UnwindStartRegs)( &startRegs, tid );
1526 
1527    Addr stack_highest_byte = VG_(threads)[tid].client_stack_highest_byte;
1528    Addr stack_lowest_byte  = 0;
1529 
1530 #  if defined(VGP_x86_linux)
1531    /* Nasty little hack to deal with syscalls - if libc is using its
1532       _dl_sysinfo_int80 function for syscalls (the TLS version does),
1533       then ip will always appear to be in that function when doing a
1534       syscall, not the actual libc function doing the syscall.  This
1535       check sees if IP is within that function, and pops the return
1536       address off the stack so that ip is placed within the library
1537       function calling the syscall.  This makes stack backtraces much
1538       more useful.
1539 
1540       The function is assumed to look like this (from glibc-2.3.6 sources):
1541          _dl_sysinfo_int80:
1542             int $0x80
1543             ret
1544       That is 3 (2+1) bytes long.  We could be more thorough and check
1545       the 3 bytes of the function are as expected, but I can't be
1546       bothered.
1547    */
1548    if (VG_(client__dl_sysinfo_int80) != 0 /* we know its address */
1549        && startRegs.r_pc >= VG_(client__dl_sysinfo_int80)
1550        && startRegs.r_pc < VG_(client__dl_sysinfo_int80)+3
1551        && VG_(am_is_valid_for_client)(startRegs.r_pc, sizeof(Addr),
1552                                       VKI_PROT_READ)) {
1553       startRegs.r_pc  = (ULong) *(Addr*)(UWord)startRegs.r_sp;
1554       startRegs.r_sp += (ULong) sizeof(Addr);
1555    }
1556 #  endif
1557 
1558    /* See if we can get a better idea of the stack limits */
1559    VG_(stack_limits)( (Addr)startRegs.r_sp,
1560                       &stack_lowest_byte, &stack_highest_byte );
1561 
1562    /* Take into account the first_ip_delta and first_sp_delta. */
1563    startRegs.r_pc += (Long)first_ip_delta;
1564    startRegs.r_sp += (Long)first_sp_delta;
1565 
1566    if (0)
1567       VG_(printf)("tid %u: stack_highest=0x%08lx ip=0x%010llx "
1568                   "sp=0x%010llx\n",
1569                   tid, stack_highest_byte,
1570                   startRegs.r_pc, startRegs.r_sp);
1571 
1572    return VG_(get_StackTrace_wrk)(tid, ips, n_ips,
1573                                        sps, fps,
1574                                        &startRegs,
1575                                        stack_highest_byte);
1576 }
1577 
1578 UInt VG_(get_StackTrace) ( ThreadId tid,
1579                            /*OUT*/StackTrace ips, UInt max_n_ips,
1580                            /*OUT*/StackTrace sps,
1581                            /*OUT*/StackTrace fps,
1582                            Word first_ip_delta )
1583 {
1584    return VG_(get_StackTrace_with_deltas) (tid,
1585                                            ips, max_n_ips,
1586                                            sps,
1587                                            fps,
1588                                            first_ip_delta,
1589                                            0 /* first_sp_delta */
1590                                            );
1591 }
1592 
1593 static void printIpDesc(UInt n, DiEpoch ep, Addr ip, void* uu_opaque)
1594 {
1595    InlIPCursor *iipc = VG_(new_IIPC)(ep, ip);
1596 
1597    do {
1598       const HChar *buf = VG_(describe_IP)(ep, ip, iipc);
1599       if (VG_(clo_xml)) {
1600          VG_(printf_xml)("    %s\n", buf);
1601       } else {
1602          VG_(message)(Vg_UserMsg, "   %s %s\n",
1603                       ( n == 0 ? "at" : "by" ), buf);
1604       }
1605       n++;
1606       // Increase n to show "at" for only one level.
1607    } while (VG_(next_IIPC)(iipc));
1608    VG_(delete_IIPC)(iipc);
1609 }
1610 
1611 /* Print a StackTrace. */
1612 void VG_(pp_StackTrace) ( DiEpoch ep, StackTrace ips, UInt n_ips )
1613 {
1614    vg_assert( n_ips > 0 );
1615 
1616    if (VG_(clo_xml))
1617       VG_(printf_xml)("  <stack>\n");
1618 
1619    VG_(apply_StackTrace)( printIpDesc, NULL, ep, ips, n_ips );
1620 
1621    if (VG_(clo_xml))
1622       VG_(printf_xml)("  </stack>\n");
1623 }
1624 
1625 /* Get and immediately print a StackTrace. */
1626 void VG_(get_and_pp_StackTrace) ( ThreadId tid, UInt max_n_ips )
1627 {
1628    Addr ips[max_n_ips];
1629    UInt n_ips
1630       = VG_(get_StackTrace)(tid, ips, max_n_ips,
1631                             NULL/*array to dump SP values in*/,
1632                             NULL/*array to dump FP values in*/,
1633                             0/*first_ip_delta*/);
1634    VG_(pp_StackTrace)(VG_(current_DiEpoch)(), ips, n_ips);
1635 }
1636 
1637 void VG_(apply_StackTrace)(
1638         void(*action)(UInt n, DiEpoch ep, Addr ip, void* opaque),
1639         void* opaque,
1640         DiEpoch ep, StackTrace ips, UInt n_ips
1641      )
1642 {
1643    Int i;
1644 
1645    vg_assert(n_ips > 0);
1646    if ( ! VG_(clo_show_below_main) ) {
1647       // Search (from the outer frame onwards) the appearance of "main"
1648       // or the last appearance of a below main function.
1649       // Then decrease n_ips so as to not call action for the below main
1650       for (i = n_ips - 1; i >= 0; i--) {
1651          Vg_FnNameKind kind = VG_(get_fnname_kind_from_IP)(ep, ips[i]);
1652          if (Vg_FnNameMain == kind || Vg_FnNameBelowMain == kind)
1653             n_ips = i + 1;
1654          if (Vg_FnNameMain == kind)
1655             break;
1656       }
1657    }
1658 
1659    for (i = 0; i < n_ips; i++)
1660       // Act on the ip
1661       action(i, ep, ips[i], opaque);
1662 }
1663 
1664 
1665 /*--------------------------------------------------------------------*/
1666 /*--- end                                                          ---*/
1667 /*--------------------------------------------------------------------*/
1668