1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "LulMain.h"
8 
9 #include <string.h>
10 #include <stdlib.h>
11 #include <stdio.h>
12 
13 #include <algorithm>  // std::sort
14 #include <string>
15 
16 #include "mozilla/Assertions.h"
17 #include "mozilla/ArrayUtils.h"
18 #include "mozilla/CheckedInt.h"
19 #include "mozilla/DebugOnly.h"
20 #include "mozilla/MemoryChecking.h"
21 #include "mozilla/Sprintf.h"
22 
23 #include "LulCommonExt.h"
24 #include "LulElfExt.h"
25 
26 #include "LulMainInt.h"
27 
28 #include "platform-linux-lul.h"  // for gettid()
29 
30 // Set this to 1 for verbose logging
31 #define DEBUG_MAIN 0
32 
33 namespace lul {
34 
35 using std::string;
36 using std::vector;
37 using std::pair;
38 using mozilla::CheckedInt;
39 using mozilla::DebugOnly;
40 
41 
42 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
43 //
44 // Some functions in this file are marked RUNS IN NO-MALLOC CONTEXT.
45 // Any such function -- and, hence, the transitive closure of those
46 // reachable from it -- must not do any dynamic memory allocation.
47 // Doing so risks deadlock.  There is exactly one root function for
48 // the transitive closure: Lul::Unwind.
49 //
50 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
51 
52 
53 ////////////////////////////////////////////////////////////////
54 // RuleSet                                                    //
55 ////////////////////////////////////////////////////////////////
56 
57 static const char*
NameOf_DW_REG(int16_t aReg)58 NameOf_DW_REG(int16_t aReg)
59 {
60   switch (aReg) {
61     case DW_REG_CFA:       return "cfa";
62 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
63     case DW_REG_INTEL_XBP: return "xbp";
64     case DW_REG_INTEL_XSP: return "xsp";
65     case DW_REG_INTEL_XIP: return "xip";
66 #elif defined(LUL_ARCH_arm)
67     case DW_REG_ARM_R7:    return "r7";
68     case DW_REG_ARM_R11:   return "r11";
69     case DW_REG_ARM_R12:   return "r12";
70     case DW_REG_ARM_R13:   return "r13";
71     case DW_REG_ARM_R14:   return "r14";
72     case DW_REG_ARM_R15:   return "r15";
73 #else
74 # error "Unsupported arch"
75 #endif
76     default: return "???";
77   }
78 }
79 
80 string
ShowRule(const char * aNewReg) const81 LExpr::ShowRule(const char* aNewReg) const
82 {
83   char buf[64];
84   string res = string(aNewReg) + "=";
85   switch (mHow) {
86     case UNKNOWN:
87       res += "Unknown";
88       break;
89     case NODEREF:
90       SprintfLiteral(buf, "%s+%d",
91                      NameOf_DW_REG(mReg), (int)mOffset);
92       res += buf;
93       break;
94     case DEREF:
95       SprintfLiteral(buf, "*(%s+%d)",
96                      NameOf_DW_REG(mReg), (int)mOffset);
97       res += buf;
98       break;
99     case PFXEXPR:
100       SprintfLiteral(buf, "PfxExpr-at-%d", (int)mOffset);
101       res += buf;
102       break;
103     default:
104       res += "???";
105       break;
106   }
107   return res;
108 }
109 
110 void
Print(void (* aLog)(const char *)) const111 RuleSet::Print(void(*aLog)(const char*)) const
112 {
113   char buf[96];
114   SprintfLiteral(buf, "[%llx .. %llx]: let ",
115                  (unsigned long long int)mAddr,
116                  (unsigned long long int)(mAddr + mLen - 1));
117   string res = string(buf);
118   res += mCfaExpr.ShowRule("cfa");
119   res += " in";
120   // For each reg we care about, print the recovery expression.
121 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
122   res += mXipExpr.ShowRule(" RA");
123   res += mXspExpr.ShowRule(" SP");
124   res += mXbpExpr.ShowRule(" BP");
125 #elif defined(LUL_ARCH_arm)
126   res += mR15expr.ShowRule(" R15");
127   res += mR7expr .ShowRule(" R7" );
128   res += mR11expr.ShowRule(" R11");
129   res += mR12expr.ShowRule(" R12");
130   res += mR13expr.ShowRule(" R13");
131   res += mR14expr.ShowRule(" R14");
132 #else
133 # error "Unsupported arch"
134 #endif
135   aLog(res.c_str());
136 }
137 
138 LExpr*
ExprForRegno(DW_REG_NUMBER aRegno)139 RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) {
140   switch (aRegno) {
141     case DW_REG_CFA: return &mCfaExpr;
142 #   if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
143     case DW_REG_INTEL_XIP: return &mXipExpr;
144     case DW_REG_INTEL_XSP: return &mXspExpr;
145     case DW_REG_INTEL_XBP: return &mXbpExpr;
146 #   elif defined(LUL_ARCH_arm)
147     case DW_REG_ARM_R15:   return &mR15expr;
148     case DW_REG_ARM_R14:   return &mR14expr;
149     case DW_REG_ARM_R13:   return &mR13expr;
150     case DW_REG_ARM_R12:   return &mR12expr;
151     case DW_REG_ARM_R11:   return &mR11expr;
152     case DW_REG_ARM_R7:    return &mR7expr;
153 #   else
154 #     error "Unknown arch"
155 #   endif
156     default: return nullptr;
157   }
158 }
159 
RuleSet()160 RuleSet::RuleSet()
161 {
162   mAddr = 0;
163   mLen  = 0;
164   // The only other fields are of type LExpr and those are initialised
165   // by LExpr::LExpr().
166 }
167 
168 
169 ////////////////////////////////////////////////////////////////
170 // SecMap                                                     //
171 ////////////////////////////////////////////////////////////////
172 
173 // See header file LulMainInt.h for comments about invariants.
174 
SecMap(void (* aLog)(const char *))175 SecMap::SecMap(void(*aLog)(const char*))
176   : mSummaryMinAddr(1)
177   , mSummaryMaxAddr(0)
178   , mUsable(true)
179   , mLog(aLog)
180 {}
181 
~SecMap()182 SecMap::~SecMap() {
183   mRuleSets.clear();
184 }
185 
186 // RUNS IN NO-MALLOC CONTEXT
187 RuleSet*
FindRuleSet(uintptr_t ia)188 SecMap::FindRuleSet(uintptr_t ia) {
189   // Binary search mRuleSets to find one that brackets |ia|.
190   // lo and hi need to be signed, else the loop termination tests
191   // don't work properly.  Note that this works correctly even when
192   // mRuleSets.size() == 0.
193 
194   // Can't do this until the array has been sorted and preened.
195   MOZ_ASSERT(mUsable);
196 
197   long int lo = 0;
198   long int hi = (long int)mRuleSets.size() - 1;
199   while (true) {
200     // current unsearched space is from lo to hi, inclusive.
201     if (lo > hi) {
202       // not found
203       return nullptr;
204     }
205     long int  mid         = lo + ((hi - lo) / 2);
206     RuleSet*  mid_ruleSet = &mRuleSets[mid];
207     uintptr_t mid_minAddr = mid_ruleSet->mAddr;
208     uintptr_t mid_maxAddr = mid_minAddr + mid_ruleSet->mLen - 1;
209     if (ia < mid_minAddr) { hi = mid-1; continue; }
210     if (ia > mid_maxAddr) { lo = mid+1; continue; }
211     MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
212     return mid_ruleSet;
213   }
214   // NOTREACHED
215 }
216 
217 // Add a RuleSet to the collection.  The rule is copied in.  Calling
218 // this makes the map non-searchable.
219 void
AddRuleSet(const RuleSet * rs)220 SecMap::AddRuleSet(const RuleSet* rs) {
221   mUsable = false;
222   mRuleSets.push_back(*rs);
223 }
224 
225 // Add a PfxInstr to the vector of such instrs, and return the index
226 // in the vector.  Calling this makes the map non-searchable.
227 uint32_t
AddPfxInstr(PfxInstr pfxi)228 SecMap::AddPfxInstr(PfxInstr pfxi) {
229   mUsable = false;
230   mPfxInstrs.push_back(pfxi);
231   return mPfxInstrs.size() - 1;
232 }
233 
234 
235 static bool
CmpRuleSetsByAddrLE(const RuleSet & rs1,const RuleSet & rs2)236 CmpRuleSetsByAddrLE(const RuleSet& rs1, const RuleSet& rs2) {
237   return rs1.mAddr < rs2.mAddr;
238 }
239 
240 // Prepare the map for searching.  Completely remove any which don't
241 // fall inside the specified range [start, +len).
242 void
PrepareRuleSets(uintptr_t aStart,size_t aLen)243 SecMap::PrepareRuleSets(uintptr_t aStart, size_t aLen)
244 {
245   if (mRuleSets.empty()) {
246     return;
247   }
248 
249   MOZ_ASSERT(aLen > 0);
250   if (aLen == 0) {
251     // This should never happen.
252     mRuleSets.clear();
253     return;
254   }
255 
256   // Sort by start addresses.
257   std::sort(mRuleSets.begin(), mRuleSets.end(), CmpRuleSetsByAddrLE);
258 
259   // Detect any entry not completely contained within [start, +len).
260   // Set its length to zero, so that the next pass will remove it.
261   for (size_t i = 0; i < mRuleSets.size(); ++i) {
262     RuleSet* rs = &mRuleSets[i];
263     if (rs->mLen > 0 &&
264         (rs->mAddr < aStart || rs->mAddr + rs->mLen > aStart + aLen)) {
265       rs->mLen = 0;
266     }
267   }
268 
269   // Iteratively truncate any overlaps and remove any zero length
270   // entries that might result, or that may have been present
271   // initially.  Unless the input is seriously screwy, this is
272   // expected to iterate only once.
273   while (true) {
274     size_t i;
275     size_t n = mRuleSets.size();
276     size_t nZeroLen = 0;
277 
278     if (n == 0) {
279       break;
280     }
281 
282     for (i = 1; i < n; ++i) {
283       RuleSet* prev = &mRuleSets[i-1];
284       RuleSet* here = &mRuleSets[i];
285       MOZ_ASSERT(prev->mAddr <= here->mAddr);
286       if (prev->mAddr + prev->mLen > here->mAddr) {
287         prev->mLen = here->mAddr - prev->mAddr;
288       }
289       if (prev->mLen == 0)
290         nZeroLen++;
291     }
292 
293     if (mRuleSets[n-1].mLen == 0) {
294       nZeroLen++;
295     }
296 
297     // At this point, the entries are in-order and non-overlapping.
298     // If none of them are zero-length, we are done.
299     if (nZeroLen == 0) {
300       break;
301     }
302 
303     // Slide back the entries to remove the zero length ones.
304     size_t j = 0;  // The write-point.
305     for (i = 0; i < n; ++i) {
306       if (mRuleSets[i].mLen == 0) {
307         continue;
308       }
309       if (j != i) mRuleSets[j] = mRuleSets[i];
310       ++j;
311     }
312     MOZ_ASSERT(i == n);
313     MOZ_ASSERT(nZeroLen <= n);
314     MOZ_ASSERT(j == n - nZeroLen);
315     while (nZeroLen > 0) {
316       mRuleSets.pop_back();
317       nZeroLen--;
318     }
319 
320     MOZ_ASSERT(mRuleSets.size() == j);
321   }
322 
323   size_t n = mRuleSets.size();
324 
325 #ifdef DEBUG
326   // Do a final check on the rules: their address ranges must be
327   // ascending, non overlapping, non zero sized.
328   if (n > 0) {
329     MOZ_ASSERT(mRuleSets[0].mLen > 0);
330     for (size_t i = 1; i < n; ++i) {
331       RuleSet* prev = &mRuleSets[i-1];
332       RuleSet* here = &mRuleSets[i];
333       MOZ_ASSERT(prev->mAddr < here->mAddr);
334       MOZ_ASSERT(here->mLen > 0);
335       MOZ_ASSERT(prev->mAddr + prev->mLen <= here->mAddr);
336     }
337   }
338 #endif
339 
340   // Set the summary min and max address values.
341   if (n == 0) {
342     // Use the values defined in comments in the class declaration.
343     mSummaryMinAddr = 1;
344     mSummaryMaxAddr = 0;
345   } else {
346     mSummaryMinAddr = mRuleSets[0].mAddr;
347     mSummaryMaxAddr = mRuleSets[n-1].mAddr + mRuleSets[n-1].mLen - 1;
348   }
349   char buf[150];
350   SprintfLiteral(buf,
351                  "PrepareRuleSets: %d entries, smin/smax 0x%llx, 0x%llx\n",
352                  (int)n, (unsigned long long int)mSummaryMinAddr,
353                          (unsigned long long int)mSummaryMaxAddr);
354   buf[sizeof(buf)-1] = 0;
355   mLog(buf);
356 
357   // Is now usable for binary search.
358   mUsable = true;
359 
360   if (0) {
361     mLog("\nRulesets after preening\n");
362     for (size_t i = 0; i < mRuleSets.size(); ++i) {
363       mRuleSets[i].Print(mLog);
364       mLog("\n");
365     }
366     mLog("\n");
367   }
368 }
369 
IsEmpty()370 bool SecMap::IsEmpty() {
371   return mRuleSets.empty();
372 }
373 
374 
375 ////////////////////////////////////////////////////////////////
376 // SegArray                                                   //
377 ////////////////////////////////////////////////////////////////
378 
379 // A SegArray holds a set of address ranges that together exactly
380 // cover an address range, with no overlaps or holes.  Each range has
381 // an associated value, which in this case has been specialised to be
382 // a simple boolean.  The representation is kept to minimal canonical
383 // form in which adjacent ranges with the same associated value are
384 // merged together.  Each range is represented by a |struct Seg|.
385 //
386 // SegArrays are used to keep track of which parts of the address
387 // space are known to contain instructions.
388 class SegArray {
389 
390  public:
add(uintptr_t lo,uintptr_t hi,bool val)391   void add(uintptr_t lo, uintptr_t hi, bool val) {
392     if (lo > hi) {
393       return;
394     }
395     split_at(lo);
396     if (hi < UINTPTR_MAX) {
397       split_at(hi+1);
398     }
399     std::vector<Seg>::size_type iLo, iHi, i;
400     iLo = find(lo);
401     iHi = find(hi);
402     for (i = iLo; i <= iHi; ++i) {
403       mSegs[i].val = val;
404     }
405     preen();
406   }
407 
408   // RUNS IN NO-MALLOC CONTEXT
getBoundingCodeSegment(uintptr_t * rx_min,uintptr_t * rx_max,uintptr_t addr)409   bool getBoundingCodeSegment(/*OUT*/uintptr_t* rx_min,
410                               /*OUT*/uintptr_t* rx_max, uintptr_t addr) {
411     std::vector<Seg>::size_type i = find(addr);
412     if (!mSegs[i].val) {
413       return false;
414     }
415     *rx_min = mSegs[i].lo;
416     *rx_max = mSegs[i].hi;
417     return true;
418   }
419 
SegArray()420   SegArray() {
421     Seg s(0, UINTPTR_MAX, false);
422     mSegs.push_back(s);
423   }
424 
425  private:
426   struct Seg {
Seglul::SegArray::Seg427     Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {}
428     uintptr_t lo;
429     uintptr_t hi;
430     bool val;
431   };
432 
preen()433   void preen() {
434     for (std::vector<Seg>::iterator iter = mSegs.begin();
435          iter < mSegs.end()-1;
436          ++iter) {
437       if (iter[0].val != iter[1].val) {
438         continue;
439       }
440       iter[0].hi = iter[1].hi;
441       mSegs.erase(iter+1);
442       // Back up one, so as not to miss an opportunity to merge
443       // with the entry after this one.
444       --iter;
445     }
446   }
447 
448   // RUNS IN NO-MALLOC CONTEXT
find(uintptr_t a)449   std::vector<Seg>::size_type find(uintptr_t a) {
450     long int lo = 0;
451     long int hi = (long int)mSegs.size();
452     while (true) {
453       // The unsearched space is lo .. hi inclusive.
454       if (lo > hi) {
455         // Not found.  This can't happen.
456         return (std::vector<Seg>::size_type)(-1);
457       }
458       long int  mid    = lo + ((hi - lo) / 2);
459       uintptr_t mid_lo = mSegs[mid].lo;
460       uintptr_t mid_hi = mSegs[mid].hi;
461       if (a < mid_lo) { hi = mid-1; continue; }
462       if (a > mid_hi) { lo = mid+1; continue; }
463       return (std::vector<Seg>::size_type)mid;
464     }
465   }
466 
split_at(uintptr_t a)467   void split_at(uintptr_t a) {
468     std::vector<Seg>::size_type i = find(a);
469     if (mSegs[i].lo == a) {
470       return;
471     }
472     mSegs.insert( mSegs.begin()+i+1, mSegs[i] );
473     mSegs[i].hi = a-1;
474     mSegs[i+1].lo = a;
475   }
476 
show()477   void show() {
478     printf("<< %d entries:\n", (int)mSegs.size());
479     for (std::vector<Seg>::iterator iter = mSegs.begin();
480          iter < mSegs.end();
481          ++iter) {
482       printf("  %016llx  %016llx  %s\n",
483              (unsigned long long int)(*iter).lo,
484              (unsigned long long int)(*iter).hi,
485              (*iter).val ? "true" : "false");
486     }
487     printf(">>\n");
488   }
489 
490   std::vector<Seg> mSegs;
491 };
492 
493 
494 ////////////////////////////////////////////////////////////////
495 // PriMap                                                     //
496 ////////////////////////////////////////////////////////////////
497 
498 class PriMap {
499  public:
PriMap(void (* aLog)(const char *))500   explicit PriMap(void (*aLog)(const char*))
501     : mLog(aLog)
502   {}
503 
~PriMap()504   ~PriMap() {
505     for (std::vector<SecMap*>::iterator iter = mSecMaps.begin();
506          iter != mSecMaps.end();
507          ++iter) {
508       delete *iter;
509     }
510     mSecMaps.clear();
511   }
512 
513   // RUNS IN NO-MALLOC CONTEXT
514   pair<const RuleSet*, const vector<PfxInstr>*>
Lookup(uintptr_t ia)515   Lookup(uintptr_t ia)
516   {
517     SecMap* sm = FindSecMap(ia);
518     return pair<const RuleSet*, const vector<PfxInstr>*>
519              (sm ? sm->FindRuleSet(ia) : nullptr,
520               sm ? sm->GetPfxInstrs() : nullptr);
521   }
522 
523   // Add a secondary map.  No overlaps allowed w.r.t. existing
524   // secondary maps.
AddSecMap(SecMap * aSecMap)525   void AddSecMap(SecMap* aSecMap) {
526     // We can't add an empty SecMap to the PriMap.  But that's OK
527     // since we'd never be able to find anything in it anyway.
528     if (aSecMap->IsEmpty()) {
529       return;
530     }
531 
532     // Iterate through the SecMaps and find the right place for this
533     // one.  At the same time, ensure that the in-order
534     // non-overlapping invariant is preserved (and, generally, holds).
535     // FIXME: this gives a cost that is O(N^2) in the total number of
536     // shared objects in the system.  ToDo: better.
537     MOZ_ASSERT(aSecMap->mSummaryMinAddr <= aSecMap->mSummaryMaxAddr);
538 
539     size_t num_secMaps = mSecMaps.size();
540     uintptr_t i;
541     for (i = 0; i < num_secMaps; ++i) {
542       SecMap* sm_i = mSecMaps[i];
543       MOZ_ASSERT(sm_i->mSummaryMinAddr <= sm_i->mSummaryMaxAddr);
544       if (aSecMap->mSummaryMinAddr < sm_i->mSummaryMaxAddr) {
545         // |aSecMap| needs to be inserted immediately before mSecMaps[i].
546         break;
547       }
548     }
549     MOZ_ASSERT(i <= num_secMaps);
550     if (i == num_secMaps) {
551       // It goes at the end.
552       mSecMaps.push_back(aSecMap);
553     } else {
554       std::vector<SecMap*>::iterator iter = mSecMaps.begin() + i;
555       mSecMaps.insert(iter, aSecMap);
556     }
557     char buf[100];
558     SprintfLiteral(buf, "AddSecMap: now have %d SecMaps\n",
559                    (int)mSecMaps.size());
560     buf[sizeof(buf)-1] = 0;
561     mLog(buf);
562   }
563 
564   // Remove and delete any SecMaps in the mapping, that intersect
565   // with the specified address range.
RemoveSecMapsInRange(uintptr_t avma_min,uintptr_t avma_max)566   void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) {
567     MOZ_ASSERT(avma_min <= avma_max);
568     size_t num_secMaps = mSecMaps.size();
569     if (num_secMaps > 0) {
570       intptr_t i;
571       // Iterate from end to start over the vector, so as to ensure
572       // that the special case where |avma_min| and |avma_max| denote
573       // the entire address space, can be completed in time proportional
574       // to the number of elements in the map.
575       for (i = (intptr_t)num_secMaps-1; i >= 0; i--) {
576         SecMap* sm_i = mSecMaps[i];
577         if (sm_i->mSummaryMaxAddr < avma_min ||
578             avma_max < sm_i->mSummaryMinAddr) {
579           // There's no overlap.  Move on.
580           continue;
581         }
582         // We need to remove mSecMaps[i] and slide all those above it
583         // downwards to cover the hole.
584         mSecMaps.erase(mSecMaps.begin() + i);
585         delete sm_i;
586       }
587     }
588   }
589 
590   // Return the number of currently contained SecMaps.
CountSecMaps()591   size_t CountSecMaps() {
592     return mSecMaps.size();
593   }
594 
595   // Assess heuristically whether the given address is an instruction
596   // immediately following a call instruction.
597   // RUNS IN NO-MALLOC CONTEXT
MaybeIsReturnPoint(TaggedUWord aInstrAddr,SegArray * aSegArray)598   bool MaybeIsReturnPoint(TaggedUWord aInstrAddr, SegArray* aSegArray) {
599     if (!aInstrAddr.Valid()) {
600       return false;
601     }
602 
603     uintptr_t ia = aInstrAddr.Value();
604 
605     // Assume that nobody would be crazy enough to put code in the
606     // first or last page.
607     if (ia < 4096 || ((uintptr_t)(-ia)) < 4096) {
608       return false;
609     }
610 
611     // See if it falls inside a known r-x mapped area.  Poking around
612     // outside such places risks segfaulting.
613     uintptr_t insns_min, insns_max;
614     bool b = aSegArray->getBoundingCodeSegment(&insns_min, &insns_max, ia);
615     if (!b) {
616       // no code (that we know about) at this address
617       return false;
618     }
619 
620     // |ia| falls within an r-x range.  So we can
621     // safely poke around in [insns_min, insns_max].
622 
623 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
624     // Is the previous instruction recognisably a CALL?  This is
625     // common for the 32- and 64-bit versions, except for the
626     // simm32(%rip) case, which is 64-bit only.
627     //
628     // For all other cases, the 64 bit versions are either identical
629     // to the 32 bit versions, or have an optional extra leading REX.W
630     // byte (0x41).  Since the extra 0x41 is optional we have to
631     // ignore it, with the convenient result that the same matching
632     // logic works for both 32- and 64-bit cases.
633 
634     uint8_t* p = (uint8_t*)ia;
635 #   if defined(LUL_ARCH_x64)
636     // CALL simm32(%rip)  == FF15 simm32
637     if (ia - 6 >= insns_min && p[-6] == 0xFF && p[-5] == 0x15) {
638       return true;
639     }
640 #   endif
641     // CALL rel32  == E8 rel32  (both 32- and 64-bit)
642     if (ia - 5 >= insns_min && p[-5] == 0xE8) {
643       return true;
644     }
645     // CALL *%eax .. CALL *%edi  ==   FFD0 ..   FFD7  (32-bit)
646     // CALL *%rax .. CALL *%rdi  ==   FFD0 ..   FFD7  (64-bit)
647     // CALL *%r8  .. CALL *%r15  == 41FFD0 .. 41FFD7  (64-bit)
648     if (ia - 2 >= insns_min &&
649         p[-2] == 0xFF && p[-1] >= 0xD0 && p[-1] <= 0xD7) {
650       return true;
651     }
652     // Almost all of the remaining cases that occur in practice are
653     // of the form CALL *simm8(reg) or CALL *simm32(reg).
654     //
655     // 64 bit cases:
656     //
657     // call  *simm8(%rax)         FF50   simm8
658     // call  *simm8(%rcx)         FF51   simm8
659     // call  *simm8(%rdx)         FF52   simm8
660     // call  *simm8(%rbx)         FF53   simm8
661     // call  *simm8(%rsp)         FF5424 simm8
662     // call  *simm8(%rbp)         FF55   simm8
663     // call  *simm8(%rsi)         FF56   simm8
664     // call  *simm8(%rdi)         FF57   simm8
665     //
666     // call  *simm8(%r8)        41FF50   simm8
667     // call  *simm8(%r9)        41FF51   simm8
668     // call  *simm8(%r10)       41FF52   simm8
669     // call  *simm8(%r11)       41FF53   simm8
670     // call  *simm8(%r12)       41FF5424 simm8
671     // call  *simm8(%r13)       41FF55   simm8
672     // call  *simm8(%r14)       41FF56   simm8
673     // call  *simm8(%r15)       41FF57   simm8
674     //
675     // call  *simm32(%rax)        FF90   simm32
676     // call  *simm32(%rcx)        FF91   simm32
677     // call  *simm32(%rdx)        FF92   simm32
678     // call  *simm32(%rbx)        FF93   simm32
679     // call  *simm32(%rsp)        FF9424 simm32
680     // call  *simm32(%rbp)        FF95   simm32
681     // call  *simm32(%rsi)        FF96   simm32
682     // call  *simm32(%rdi)        FF97   simm32
683     //
684     // call  *simm32(%r8)       41FF90   simm32
685     // call  *simm32(%r9)       41FF91   simm32
686     // call  *simm32(%r10)      41FF92   simm32
687     // call  *simm32(%r11)      41FF93   simm32
688     // call  *simm32(%r12)      41FF9424 simm32
689     // call  *simm32(%r13)      41FF95   simm32
690     // call  *simm32(%r14)      41FF96   simm32
691     // call  *simm32(%r15)      41FF97   simm32
692     //
693     // 32 bit cases:
694     //
695     // call  *simm8(%eax)         FF50   simm8
696     // call  *simm8(%ecx)         FF51   simm8
697     // call  *simm8(%edx)         FF52   simm8
698     // call  *simm8(%ebx)         FF53   simm8
699     // call  *simm8(%esp)         FF5424 simm8
700     // call  *simm8(%ebp)         FF55   simm8
701     // call  *simm8(%esi)         FF56   simm8
702     // call  *simm8(%edi)         FF57   simm8
703     //
704     // call  *simm32(%eax)        FF90   simm32
705     // call  *simm32(%ecx)        FF91   simm32
706     // call  *simm32(%edx)        FF92   simm32
707     // call  *simm32(%ebx)        FF93   simm32
708     // call  *simm32(%esp)        FF9424 simm32
709     // call  *simm32(%ebp)        FF95   simm32
710     // call  *simm32(%esi)        FF96   simm32
711     // call  *simm32(%edi)        FF97   simm32
712     if (ia - 3 >= insns_min &&
713         p[-3] == 0xFF &&
714         (p[-2] >= 0x50 && p[-2] <= 0x57 && p[-2] != 0x54)) {
715       // imm8 case, not including %esp/%rsp
716       return true;
717     }
718     if (ia - 4 >= insns_min &&
719         p[-4] == 0xFF && p[-3] == 0x54 && p[-2] == 0x24) {
720       // imm8 case for %esp/%rsp
721       return true;
722     }
723     if (ia - 6 >= insns_min &&
724         p[-6] == 0xFF &&
725         (p[-5] >= 0x90 && p[-5] <= 0x97 && p[-5] != 0x94)) {
726       // imm32 case, not including %esp/%rsp
727       return true;
728     }
729     if (ia - 7 >= insns_min &&
730         p[-7] == 0xFF && p[-6] == 0x94 && p[-5] == 0x24) {
731       // imm32 case for %esp/%rsp
732       return true;
733     }
734 
735 #elif defined(LUL_ARCH_arm)
736     if (ia & 1) {
737       uint16_t w0 = 0, w1 = 0;
738       // The return address has its lowest bit set, indicating a return
739       // to Thumb code.
740       ia &= ~(uintptr_t)1;
741       if (ia - 2 >= insns_min && ia - 1 <= insns_max) {
742         w1 = *(uint16_t*)(ia - 2);
743       }
744       if (ia - 4 >= insns_min && ia - 1 <= insns_max) {
745         w0 = *(uint16_t*)(ia - 4);
746       }
747       // Is it a 32-bit Thumb call insn?
748       // BL  simm26 (Encoding T1)
749       if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) {
750         return true;
751       }
752       // BLX simm26 (Encoding T2)
753       if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) {
754         return true;
755       }
756       // Other possible cases:
757       // (BLX Rm, Encoding T1).
758       // BLX Rm (encoding T1, 16 bit, inspect w1 and ignore w0.)
759       // 0100 0111 1 Rm 000
760     } else {
761       // Returning to ARM code.
762       uint32_t a0 = 0;
763       if ((ia & 3) == 0 && ia - 4 >= insns_min && ia - 1 <= insns_max) {
764         a0 = *(uint32_t*)(ia - 4);
765       }
766       // Leading E forces unconditional only -- fix.  It could be
767       // anything except F, which is the deprecated NV code.
768       // BL simm26 (Encoding A1)
769       if ((a0 & 0xFF000000) == 0xEB000000) {
770         return true;
771       }
772       // Other possible cases:
773       // BLX simm26 (Encoding A2)
774       //if ((a0 & 0xFE000000) == 0xFA000000)
775       //  return true;
776       // BLX (register) (A1): BLX <c> <Rm>
777       // cond 0001 0010 1111 1111 1111 0011 Rm
778       // again, cond can be anything except NV (0xF)
779     }
780 
781 #else
782 # error "Unsupported arch"
783 #endif
784 
785     // Not an insn we recognise.
786     return false;
787   }
788 
789  private:
790   // RUNS IN NO-MALLOC CONTEXT
FindSecMap(uintptr_t ia)791   SecMap* FindSecMap(uintptr_t ia) {
792     // Binary search mSecMaps to find one that brackets |ia|.
793     // lo and hi need to be signed, else the loop termination tests
794     // don't work properly.
795     long int lo = 0;
796     long int hi = (long int)mSecMaps.size() - 1;
797     while (true) {
798       // current unsearched space is from lo to hi, inclusive.
799       if (lo > hi) {
800         // not found
801         return nullptr;
802       }
803       long int  mid         = lo + ((hi - lo) / 2);
804       SecMap*   mid_secMap  = mSecMaps[mid];
805       uintptr_t mid_minAddr = mid_secMap->mSummaryMinAddr;
806       uintptr_t mid_maxAddr = mid_secMap->mSummaryMaxAddr;
807       if (ia < mid_minAddr) { hi = mid-1; continue; }
808       if (ia > mid_maxAddr) { lo = mid+1; continue; }
809       MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
810       return mid_secMap;
811     }
812     // NOTREACHED
813   }
814 
815  private:
816   // sorted array of per-object ranges, non overlapping, non empty
817   std::vector<SecMap*> mSecMaps;
818 
819   // a logging sink, for debugging.
820   void (*mLog)(const char*);
821 };
822 
823 
824 ////////////////////////////////////////////////////////////////
825 // LUL                                                        //
826 ////////////////////////////////////////////////////////////////
827 
828 #define LUL_LOG(_str) \
829   do { \
830     char buf[200]; \
831     SprintfLiteral(buf, \
832                    "LUL: pid %d tid %d lul-obj %p: %s", \
833                    getpid(), gettid(), this, (_str));   \
834     buf[sizeof(buf)-1] = 0; \
835     mLog(buf); \
836   } while (0)
837 
LUL(void (* aLog)(const char *))838 LUL::LUL(void (*aLog)(const char*))
839   : mLog(aLog)
840   , mAdminMode(true)
841   , mAdminThreadId(gettid())
842   , mPriMap(new PriMap(aLog))
843   , mSegArray(new SegArray())
844   , mUSU(new UniqueStringUniverse())
845 {
846   LUL_LOG("LUL::LUL: Created object");
847 }
848 
849 
~LUL()850 LUL::~LUL()
851 {
852   LUL_LOG("LUL::~LUL: Destroyed object");
853   delete mPriMap;
854   delete mSegArray;
855   mLog = nullptr;
856   delete mUSU;
857 }
858 
859 
860 void
MaybeShowStats()861 LUL::MaybeShowStats()
862 {
863   // This is racey in the sense that it can't guarantee that
864   //   n_new == n_new_Context + n_new_CFI + n_new_Scanned
865   // if it should happen that mStats is updated by some other thread
866   // in between computation of n_new and n_new_{Context,CFI,Scanned}.
867   // But it's just stats printing, so we don't really care.
868   uint32_t n_new = mStats - mStatsPrevious;
869   if (n_new >= 5000) {
870     uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext;
871     uint32_t n_new_CFI     = mStats.mCFI     - mStatsPrevious.mCFI;
872     uint32_t n_new_Scanned = mStats.mScanned - mStatsPrevious.mScanned;
873     mStatsPrevious = mStats;
874     char buf[200];
875     SprintfLiteral(buf,
876                    "LUL frame stats: TOTAL %5u"
877                    "    CTX %4u    CFI %4u    SCAN %4u",
878                    n_new, n_new_Context, n_new_CFI, n_new_Scanned);
879     buf[sizeof(buf)-1] = 0;
880     mLog(buf);
881   }
882 }
883 
884 
885 void
EnableUnwinding()886 LUL::EnableUnwinding()
887 {
888   LUL_LOG("LUL::EnableUnwinding");
889   // Don't assert for Admin mode here.  That is, tolerate a call here
890   // if we are already in Unwinding mode.
891   MOZ_ASSERT(gettid() == mAdminThreadId);
892 
893   mAdminMode = false;
894 }
895 
896 
897 void
NotifyAfterMap(uintptr_t aRXavma,size_t aSize,const char * aFileName,const void * aMappedImage)898 LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize,
899                     const char* aFileName, const void* aMappedImage)
900 {
901   MOZ_ASSERT(mAdminMode);
902   MOZ_ASSERT(gettid() == mAdminThreadId);
903 
904   mLog(":\n");
905   char buf[200];
906   SprintfLiteral(buf, "NotifyMap %llx %llu %s\n",
907                  (unsigned long long int)aRXavma, (unsigned long long int)aSize,
908                  aFileName);
909   buf[sizeof(buf)-1] = 0;
910   mLog(buf);
911 
912   // Ignore obviously-stupid notifications.
913   if (aSize > 0) {
914 
915     // Here's a new mapping, for this object.
916     SecMap* smap = new SecMap(mLog);
917 
918     // Read CFI or EXIDX unwind data into |smap|.
919     if (!aMappedImage) {
920       (void)lul::ReadSymbolData(
921               string(aFileName), std::vector<string>(), smap,
922               (void*)aRXavma, aSize, mUSU, mLog);
923     } else {
924       (void)lul::ReadSymbolDataInternal(
925               (const uint8_t*)aMappedImage,
926               string(aFileName), std::vector<string>(), smap,
927               (void*)aRXavma, aSize, mUSU, mLog);
928     }
929 
930     mLog("NotifyMap .. preparing entries\n");
931 
932     smap->PrepareRuleSets(aRXavma, aSize);
933 
934     SprintfLiteral(buf,
935                    "NotifyMap got %lld entries\n", (long long int)smap->Size());
936     buf[sizeof(buf)-1] = 0;
937     mLog(buf);
938 
939     // Add it to the primary map (the top level set of mapped objects).
940     mPriMap->AddSecMap(smap);
941 
942     // Tell the segment array about the mapping, so that the stack
943     // scan and __kernel_syscall mechanisms know where valid code is.
944     mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
945   }
946 }
947 
948 
949 void
NotifyExecutableArea(uintptr_t aRXavma,size_t aSize)950 LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize)
951 {
952   MOZ_ASSERT(mAdminMode);
953   MOZ_ASSERT(gettid() == mAdminThreadId);
954 
955   mLog(":\n");
956   char buf[200];
957   SprintfLiteral(buf, "NotifyExecutableArea %llx %llu\n",
958                    (unsigned long long int)aRXavma, (unsigned long long int)aSize);
959   buf[sizeof(buf)-1] = 0;
960   mLog(buf);
961 
962   // Ignore obviously-stupid notifications.
963   if (aSize > 0) {
964     // Tell the segment array about the mapping, so that the stack
965     // scan and __kernel_syscall mechanisms know where valid code is.
966     mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
967   }
968 }
969 
970 
971 void
NotifyBeforeUnmap(uintptr_t aRXavmaMin,uintptr_t aRXavmaMax)972 LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax)
973 {
974   MOZ_ASSERT(mAdminMode);
975   MOZ_ASSERT(gettid() == mAdminThreadId);
976 
977   mLog(":\n");
978   char buf[100];
979   SprintfLiteral(buf, "NotifyUnmap %016llx-%016llx\n",
980                  (unsigned long long int)aRXavmaMin,
981                  (unsigned long long int)aRXavmaMax);
982   buf[sizeof(buf)-1] = 0;
983   mLog(buf);
984 
985   MOZ_ASSERT(aRXavmaMin <= aRXavmaMax);
986 
987   // Remove from the primary map, any secondary maps that intersect
988   // with the address range.  Also delete the secondary maps.
989   mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax);
990 
991   // Tell the segment array that the address range no longer
992   // contains valid code.
993   mSegArray->add(aRXavmaMin, aRXavmaMax, false);
994 
995   SprintfLiteral(buf, "NotifyUnmap: now have %d SecMaps\n",
996                  (int)mPriMap->CountSecMaps());
997   buf[sizeof(buf)-1] = 0;
998   mLog(buf);
999 }
1000 
1001 
1002 size_t
CountMappings()1003 LUL::CountMappings()
1004 {
1005   MOZ_ASSERT(mAdminMode);
1006   MOZ_ASSERT(gettid() == mAdminThreadId);
1007 
1008   return mPriMap->CountSecMaps();
1009 }
1010 
1011 
1012 // RUNS IN NO-MALLOC CONTEXT
1013 static
DerefTUW(TaggedUWord aAddr,const StackImage * aStackImg)1014 TaggedUWord DerefTUW(TaggedUWord aAddr, const StackImage* aStackImg)
1015 {
1016   if (!aAddr.Valid()) {
1017     return TaggedUWord();
1018   }
1019 
1020   // Lower limit check.  |aAddr.Value()| is the lowest requested address
1021   // and |aStackImg->mStartAvma| is the lowest address we actually have,
1022   // so the comparison is straightforward.
1023   if (aAddr.Value() < aStackImg->mStartAvma) {
1024     return TaggedUWord();
1025   }
1026 
1027   // Upper limit check.  We must compute the highest requested address
1028   // and the highest address we actually have, but being careful to
1029   // avoid overflow.  In particular if |aAddr| is 0xFFF...FFF or the
1030   // 3/7 values below that, then we will get overflow.  See bug #1245477.
1031   typedef CheckedInt<uintptr_t> CheckedUWord;
1032   CheckedUWord highest_requested_plus_one
1033     = CheckedUWord(aAddr.Value()) + CheckedUWord(sizeof(uintptr_t));
1034   CheckedUWord highest_available_plus_one
1035     = CheckedUWord(aStackImg->mStartAvma) + CheckedUWord(aStackImg->mLen);
1036   if (!highest_requested_plus_one.isValid()     // overflow?
1037       || !highest_available_plus_one.isValid()  // overflow?
1038       || (highest_requested_plus_one.value()
1039           > highest_available_plus_one.value())) { // in range?
1040     return TaggedUWord();
1041   }
1042 
1043   return TaggedUWord(*(uintptr_t*)(aStackImg->mContents + aAddr.Value()
1044                                    - aStackImg->mStartAvma));
1045 }
1046 
1047 // RUNS IN NO-MALLOC CONTEXT
1048 static
EvaluateReg(int16_t aReg,const UnwindRegs * aOldRegs,TaggedUWord aCFA)1049 TaggedUWord EvaluateReg(int16_t aReg, const UnwindRegs* aOldRegs,
1050                         TaggedUWord aCFA)
1051 {
1052   switch (aReg) {
1053     case DW_REG_CFA:       return aCFA;
1054 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1055     case DW_REG_INTEL_XBP: return aOldRegs->xbp;
1056     case DW_REG_INTEL_XSP: return aOldRegs->xsp;
1057     case DW_REG_INTEL_XIP: return aOldRegs->xip;
1058 #elif defined(LUL_ARCH_arm)
1059     case DW_REG_ARM_R7:    return aOldRegs->r7;
1060     case DW_REG_ARM_R11:   return aOldRegs->r11;
1061     case DW_REG_ARM_R12:   return aOldRegs->r12;
1062     case DW_REG_ARM_R13:   return aOldRegs->r13;
1063     case DW_REG_ARM_R14:   return aOldRegs->r14;
1064     case DW_REG_ARM_R15:   return aOldRegs->r15;
1065 #else
1066 # error "Unsupported arch"
1067 #endif
1068     default: MOZ_ASSERT(0); return TaggedUWord();
1069   }
1070 }
1071 
1072 // RUNS IN NO-MALLOC CONTEXT
1073 // See prototype for comment.
EvaluatePfxExpr(int32_t start,const UnwindRegs * aOldRegs,TaggedUWord aCFA,const StackImage * aStackImg,const vector<PfxInstr> & aPfxInstrs)1074 TaggedUWord EvaluatePfxExpr(int32_t start,
1075                             const UnwindRegs* aOldRegs,
1076                             TaggedUWord aCFA, const StackImage* aStackImg,
1077                             const vector<PfxInstr>& aPfxInstrs)
1078 {
1079   // A small evaluation stack, and a stack pointer, which points to
1080   // the highest numbered in-use element.
1081   const int N_STACK = 10;
1082   TaggedUWord stack[N_STACK];
1083   int stackPointer = -1;
1084   for (int i = 0; i < N_STACK; i++)
1085     stack[i] = TaggedUWord();
1086 
1087 # define PUSH(_tuw) \
1088     do { \
1089       if (stackPointer >= N_STACK-1) goto fail; /* overflow */ \
1090       stack[++stackPointer] = (_tuw); \
1091     } while (0)
1092 
1093 # define POP(_lval) \
1094     do { \
1095       if (stackPointer < 0) goto fail; /* underflow */ \
1096      _lval = stack[stackPointer--]; \
1097    } while (0)
1098 
1099   // Cursor in the instruction sequence.
1100   size_t curr = start + 1;
1101 
1102   // Check the start point is sane.
1103   size_t nInstrs = aPfxInstrs.size();
1104   if (start < 0 || (size_t)start >= nInstrs)
1105     goto fail;
1106 
1107   {
1108     // The instruction sequence must start with PX_Start.  If not,
1109     // something is seriously wrong.
1110     PfxInstr first = aPfxInstrs[start];
1111     if (first.mOpcode != PX_Start)
1112       goto fail;
1113 
1114     // Push the CFA on the stack to start with (or not), as required by
1115     // the original DW_OP_*expression* CFI.
1116     if (first.mOperand != 0)
1117       PUSH(aCFA);
1118   }
1119 
1120   while (true) {
1121     if (curr >= nInstrs)
1122       goto fail; // ran off the end of the sequence
1123 
1124     PfxInstr pfxi = aPfxInstrs[curr++];
1125     if (pfxi.mOpcode == PX_End)
1126       break; // we're done
1127 
1128     switch (pfxi.mOpcode) {
1129       case PX_Start:
1130         // This should appear only at the start of the sequence.
1131         goto fail;
1132       case PX_End:
1133         // We just took care of that, so we shouldn't see it again.
1134         MOZ_ASSERT(0);
1135         goto fail;
1136       case PX_SImm32:
1137         PUSH(TaggedUWord((intptr_t)pfxi.mOperand));
1138         break;
1139       case PX_DwReg: {
1140         DW_REG_NUMBER reg = (DW_REG_NUMBER)pfxi.mOperand;
1141         MOZ_ASSERT(reg != DW_REG_CFA);
1142         PUSH(EvaluateReg(reg, aOldRegs, aCFA));
1143         break;
1144       }
1145       case PX_Deref: {
1146         TaggedUWord addr;
1147         POP(addr);
1148         PUSH(DerefTUW(addr, aStackImg));
1149         break;
1150       }
1151       case PX_Add: {
1152         TaggedUWord x, y;
1153         POP(x); POP(y); PUSH(y + x);
1154         break;
1155       }
1156       case PX_Sub: {
1157         TaggedUWord x, y;
1158         POP(x); POP(y); PUSH(y - x);
1159         break;
1160       }
1161       case PX_And: {
1162         TaggedUWord x, y;
1163         POP(x); POP(y); PUSH(y & x);
1164         break;
1165       }
1166       case PX_Or: {
1167         TaggedUWord x, y;
1168         POP(x); POP(y); PUSH(y | x);
1169         break;
1170       }
1171       case PX_CmpGES: {
1172         TaggedUWord x, y;
1173         POP(x); POP(y); PUSH(y.CmpGEs(x));
1174         break;
1175       }
1176       case PX_Shl: {
1177         TaggedUWord x, y;
1178         POP(x); POP(y); PUSH(y << x);
1179         break;
1180       }
1181       default:
1182         MOZ_ASSERT(0);
1183         goto fail;
1184     }
1185   } // while (true)
1186 
1187   // Evaluation finished.  The top value on the stack is the result.
1188   if (stackPointer >= 0) {
1189     return stack[stackPointer];
1190   }
1191   // Else fall through
1192 
1193  fail:
1194   return TaggedUWord();
1195 
1196 # undef PUSH
1197 # undef POP
1198 }
1199 
1200 // RUNS IN NO-MALLOC CONTEXT
EvaluateExpr(const UnwindRegs * aOldRegs,TaggedUWord aCFA,const StackImage * aStackImg,const vector<PfxInstr> * aPfxInstrs) const1201 TaggedUWord LExpr::EvaluateExpr(const UnwindRegs* aOldRegs,
1202                                 TaggedUWord aCFA, const StackImage* aStackImg,
1203                                 const vector<PfxInstr>* aPfxInstrs) const
1204 {
1205   switch (mHow) {
1206     case UNKNOWN:
1207       return TaggedUWord();
1208     case NODEREF: {
1209       TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
1210       tuw = tuw + TaggedUWord((intptr_t)mOffset);
1211       return tuw;
1212     }
1213     case DEREF: {
1214       TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
1215       tuw = tuw + TaggedUWord((intptr_t)mOffset);
1216       return DerefTUW(tuw, aStackImg);
1217     }
1218     case PFXEXPR: {
1219       MOZ_ASSERT(aPfxInstrs);
1220       if (!aPfxInstrs) {
1221         return TaggedUWord();
1222       }
1223       return EvaluatePfxExpr(mOffset, aOldRegs, aCFA, aStackImg, *aPfxInstrs);
1224     }
1225     default:
1226       MOZ_ASSERT(0);
1227       return TaggedUWord();
1228   }
1229 }
1230 
1231 // RUNS IN NO-MALLOC CONTEXT
1232 static
UseRuleSet(UnwindRegs * aRegs,const StackImage * aStackImg,const RuleSet * aRS,const vector<PfxInstr> * aPfxInstrs)1233 void UseRuleSet(/*MOD*/UnwindRegs* aRegs,
1234                 const StackImage* aStackImg, const RuleSet* aRS,
1235                 const vector<PfxInstr>* aPfxInstrs)
1236 {
1237   // Take a copy of regs, since we'll need to refer to the old values
1238   // whilst computing the new ones.
1239   UnwindRegs old_regs = *aRegs;
1240 
1241   // Mark all the current register values as invalid, so that the
1242   // caller can see, on our return, which ones have been computed
1243   // anew.  If we don't even manage to compute a new PC value, then
1244   // the caller will have to abandon the unwind.
1245   // FIXME: Create and use instead: aRegs->SetAllInvalid();
1246 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1247   aRegs->xbp = TaggedUWord();
1248   aRegs->xsp = TaggedUWord();
1249   aRegs->xip = TaggedUWord();
1250 #elif defined(LUL_ARCH_arm)
1251   aRegs->r7  = TaggedUWord();
1252   aRegs->r11 = TaggedUWord();
1253   aRegs->r12 = TaggedUWord();
1254   aRegs->r13 = TaggedUWord();
1255   aRegs->r14 = TaggedUWord();
1256   aRegs->r15 = TaggedUWord();
1257 #else
1258 #  error "Unsupported arch"
1259 #endif
1260 
1261   // This is generally useful.
1262   const TaggedUWord inval = TaggedUWord();
1263 
1264   // First, compute the CFA.
1265   TaggedUWord cfa
1266     = aRS->mCfaExpr.EvaluateExpr(&old_regs,
1267                                  inval/*old cfa*/, aStackImg, aPfxInstrs);
1268 
1269   // If we didn't manage to compute the CFA, well .. that's ungood,
1270   // but keep going anyway.  It'll be OK provided none of the register
1271   // value rules mention the CFA.  In any case, compute the new values
1272   // for each register that we're tracking.
1273 
1274 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1275   aRegs->xbp
1276     = aRS->mXbpExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1277   aRegs->xsp
1278     = aRS->mXspExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1279   aRegs->xip
1280     = aRS->mXipExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1281 #elif defined(LUL_ARCH_arm)
1282   aRegs->r7
1283     = aRS->mR7expr .EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1284   aRegs->r11
1285     = aRS->mR11expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1286   aRegs->r12
1287     = aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1288   aRegs->r13
1289     = aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1290   aRegs->r14
1291     = aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1292   aRegs->r15
1293     = aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1294 #else
1295 # error "Unsupported arch"
1296 #endif
1297 
1298   // We're done.  Any regs for which we didn't manage to compute a
1299   // new value will now be marked as invalid.
1300 }
1301 
1302 // RUNS IN NO-MALLOC CONTEXT
1303 void
Unwind(uintptr_t * aFramePCs,uintptr_t * aFrameSPs,size_t * aFramesUsed,size_t * aScannedFramesAcquired,size_t aFramesAvail,size_t aScannedFramesAllowed,UnwindRegs * aStartRegs,StackImage * aStackImg)1304 LUL::Unwind(/*OUT*/uintptr_t* aFramePCs,
1305             /*OUT*/uintptr_t* aFrameSPs,
1306             /*OUT*/size_t* aFramesUsed,
1307             /*OUT*/size_t* aScannedFramesAcquired,
1308             size_t aFramesAvail,
1309             size_t aScannedFramesAllowed,
1310             UnwindRegs* aStartRegs, StackImage* aStackImg)
1311 {
1312   MOZ_ASSERT(!mAdminMode);
1313 
1314   /////////////////////////////////////////////////////////
1315   // BEGIN UNWIND
1316 
1317   *aFramesUsed = 0;
1318 
1319   UnwindRegs  regs          = *aStartRegs;
1320   TaggedUWord last_valid_sp = TaggedUWord();
1321 
1322   // Stack-scan control
1323   unsigned int n_scanned_frames      = 0;  // # s-s frames recovered so far
1324   static const int NUM_SCANNED_WORDS = 50; // max allowed scan length
1325 
1326   while (true) {
1327 
1328     if (DEBUG_MAIN) {
1329       char buf[300];
1330       mLog("\n");
1331 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1332       SprintfLiteral(buf,
1333                      "LoopTop: rip %d/%llx  rsp %d/%llx  rbp %d/%llx\n",
1334                      (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(),
1335                      (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(),
1336                      (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value());
1337       buf[sizeof(buf)-1] = 0;
1338       mLog(buf);
1339 #elif defined(LUL_ARCH_arm)
1340       SprintfLiteral(buf,
1341                      "LoopTop: r15 %d/%llx  r7 %d/%llx  r11 %d/%llx"
1342                      "  r12 %d/%llx  r13 %d/%llx  r14 %d/%llx\n",
1343                      (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(),
1344                      (int)regs.r7.Valid(),  (unsigned long long int)regs.r7.Value(),
1345                      (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(),
1346                      (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(),
1347                      (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(),
1348                      (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value());
1349       buf[sizeof(buf)-1] = 0;
1350       mLog(buf);
1351 #else
1352 # error "Unsupported arch"
1353 #endif
1354     }
1355 
1356 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1357     TaggedUWord ia = regs.xip;
1358     TaggedUWord sp = regs.xsp;
1359 #elif defined(LUL_ARCH_arm)
1360     TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14);
1361     TaggedUWord sp = regs.r13;
1362 #else
1363 # error "Unsupported arch"
1364 #endif
1365 
1366     if (*aFramesUsed >= aFramesAvail) {
1367       break;
1368     }
1369 
1370     // If we don't have a valid value for the PC, give up.
1371     if (!ia.Valid()) {
1372       break;
1373     }
1374 
1375     // If this is the innermost frame, record the SP value, which
1376     // presumably is valid.  If this isn't the innermost frame, and we
1377     // have a valid SP value, check that its SP value isn't less that
1378     // the one we've seen so far, so as to catch potential SP value
1379     // cycles.
1380     if (*aFramesUsed == 0) {
1381       last_valid_sp = sp;
1382     } else {
1383       MOZ_ASSERT(last_valid_sp.Valid());
1384       if (sp.Valid()) {
1385         if (sp.Value() < last_valid_sp.Value()) {
1386           // Hmm, SP going in the wrong direction.  Let's stop.
1387           break;
1388         }
1389         // Remember where we got to.
1390         last_valid_sp = sp;
1391       }
1392     }
1393 
1394     // For the innermost frame, the IA value is what we need.  For all
1395     // other frames, it's actually the return address, so back up one
1396     // byte so as to get it into the calling instruction.
1397     aFramePCs[*aFramesUsed] = ia.Value() - (*aFramesUsed == 0 ? 0 : 1);
1398     aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0;
1399     (*aFramesUsed)++;
1400 
1401     // Find the RuleSet for the current IA, if any.  This will also
1402     // query the backing (secondary) maps if it isn't found in the
1403     // thread-local cache.
1404 
1405     // If this isn't the innermost frame, back up into the calling insn.
1406     if (*aFramesUsed > 1) {
1407       ia = ia + TaggedUWord((uintptr_t)(-1));
1408     }
1409 
1410     pair<const RuleSet*, const vector<PfxInstr>*> ruleset_and_pfxinstrs
1411       = mPriMap->Lookup(ia.Value());
1412     const RuleSet* ruleset = ruleset_and_pfxinstrs.first;
1413     const vector<PfxInstr>* pfxinstrs = ruleset_and_pfxinstrs.second;
1414 
1415     if (DEBUG_MAIN) {
1416       char buf[100];
1417       SprintfLiteral(buf, "ruleset for 0x%llx = %p\n",
1418                      (unsigned long long int)ia.Value(), ruleset);
1419       buf[sizeof(buf)-1] = 0;
1420       mLog(buf);
1421     }
1422 
1423     /////////////////////////////////////////////
1424     ////
1425     // On 32 bit x86-linux, syscalls are often done via the VDSO
1426     // function __kernel_vsyscall, which doesn't have a corresponding
1427     // object that we can read debuginfo from.  That effectively kills
1428     // off all stack traces for threads blocked in syscalls.  Hence
1429     // special-case by looking at the code surrounding the program
1430     // counter.
1431     //
1432     // 0xf7757420 <__kernel_vsyscall+0>:	push   %ecx
1433     // 0xf7757421 <__kernel_vsyscall+1>:	push   %edx
1434     // 0xf7757422 <__kernel_vsyscall+2>:	push   %ebp
1435     // 0xf7757423 <__kernel_vsyscall+3>:	mov    %esp,%ebp
1436     // 0xf7757425 <__kernel_vsyscall+5>:	sysenter
1437     // 0xf7757427 <__kernel_vsyscall+7>:	nop
1438     // 0xf7757428 <__kernel_vsyscall+8>:	nop
1439     // 0xf7757429 <__kernel_vsyscall+9>:	nop
1440     // 0xf775742a <__kernel_vsyscall+10>:	nop
1441     // 0xf775742b <__kernel_vsyscall+11>:	nop
1442     // 0xf775742c <__kernel_vsyscall+12>:	nop
1443     // 0xf775742d <__kernel_vsyscall+13>:	nop
1444     // 0xf775742e <__kernel_vsyscall+14>:	int    $0x80
1445     // 0xf7757430 <__kernel_vsyscall+16>:	pop    %ebp
1446     // 0xf7757431 <__kernel_vsyscall+17>:	pop    %edx
1447     // 0xf7757432 <__kernel_vsyscall+18>:	pop    %ecx
1448     // 0xf7757433 <__kernel_vsyscall+19>:	ret
1449     //
1450     // In cases where the sampled thread is blocked in a syscall, its
1451     // program counter will point at "pop %ebp".  Hence we look for
1452     // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and
1453     // the corresponding register-recovery actions are:
1454     //    new_ebp = *(old_esp + 0)
1455     //    new eip = *(old_esp + 12)
1456     //    new_esp = old_esp + 16
1457     //
1458     // It may also be the case that the program counter points two
1459     // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in
1460     // the case where the syscall has been restarted but the thread
1461     // hasn't been rescheduled.  The code below doesn't handle that;
1462     // it could easily be made to.
1463     //
1464 #if defined(LUL_PLAT_x86_android) || defined(LUL_PLAT_x86_linux)
1465     if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) {
1466       uintptr_t insns_min, insns_max;
1467       uintptr_t eip = ia.Value();
1468       bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip);
1469       if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) {
1470         uint8_t* eipC = (uint8_t*)eip;
1471         if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D &&
1472             eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) {
1473           TaggedUWord sp_plus_0  = sp;
1474           TaggedUWord sp_plus_12 = sp;
1475           TaggedUWord sp_plus_16 = sp;
1476           sp_plus_12 = sp_plus_12 + TaggedUWord(12);
1477           sp_plus_16 = sp_plus_16 + TaggedUWord(16);
1478           TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg);
1479           TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg);
1480           TaggedUWord new_esp = sp_plus_16;
1481           if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) {
1482             regs.xbp = new_ebp;
1483             regs.xip = new_eip;
1484             regs.xsp = new_esp;
1485             continue;
1486           }
1487         }
1488       }
1489     }
1490 #endif
1491     ////
1492     /////////////////////////////////////////////
1493 
1494     // So, do we have a ruleset for this address?  If so, use it now.
1495     if (ruleset) {
1496 
1497       if (DEBUG_MAIN) {
1498         ruleset->Print(mLog); mLog("\n");
1499       }
1500       // Use the RuleSet to compute the registers for the previous
1501       // frame.  |regs| is modified in-place.
1502       UseRuleSet(&regs, aStackImg, ruleset, pfxinstrs);
1503 
1504     } else {
1505 
1506       // There's no RuleSet for the specified address, so see if
1507       // it's possible to get anywhere by stack-scanning.
1508 
1509       // Use stack scanning frugally.
1510       if (n_scanned_frames++ >= aScannedFramesAllowed) {
1511         break;
1512       }
1513 
1514       // We can't scan the stack without a valid, aligned stack pointer.
1515       if (!sp.IsAligned()) {
1516         break;
1517       }
1518 
1519       bool scan_succeeded = false;
1520       for (int i = 0; i < NUM_SCANNED_WORDS; ++i) {
1521         TaggedUWord aWord = DerefTUW(sp, aStackImg);
1522         // aWord is something we fished off the stack.  It should be
1523         // valid, unless we overran the stack bounds.
1524         if (!aWord.Valid()) {
1525           break;
1526         }
1527 
1528         // Now, does aWord point inside a text section and immediately
1529         // after something that looks like a call instruction?
1530         if (mPriMap->MaybeIsReturnPoint(aWord, mSegArray)) {
1531           // Yes it does.  Update the unwound registers heuristically,
1532           // using the same schemes as Breakpad does.
1533           scan_succeeded = true;
1534           (*aScannedFramesAcquired)++;
1535 
1536 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1537           // The same logic applies for the 32- and 64-bit cases.
1538           // Register names of the form xsp etc refer to (eg) esp in
1539           // the 32-bit case and rsp in the 64-bit case.
1540 #         if defined(LUL_ARCH_x64)
1541           const int wordSize = 8;
1542 #         else
1543           const int wordSize = 4;
1544 #         endif
1545           // The return address -- at XSP -- will have been pushed by
1546           // the CALL instruction.  So the caller's XSP value
1547           // immediately before and after that CALL instruction is the
1548           // word above XSP.
1549           regs.xsp = sp + TaggedUWord(wordSize);
1550 
1551           // aWord points at the return point, so back up one byte
1552           // to put it in the calling instruction.
1553           regs.xip = aWord + TaggedUWord((uintptr_t)(-1));
1554 
1555           // Computing a new value from the frame pointer is more tricky.
1556           if (regs.xbp.Valid() &&
1557               sp.Valid() && regs.xbp.Value() == sp.Value() - wordSize) {
1558             // One possibility is that the callee begins with the standard
1559             // preamble "push %xbp; mov %xsp, %xbp".  In which case, the
1560             // (1) caller's XBP value will be at the word below XSP, and
1561             // (2) the current (callee's) XBP will point at that word:
1562             regs.xbp = DerefTUW(regs.xbp, aStackImg);
1563           } else if (regs.xbp.Valid() &&
1564                      sp.Valid() && regs.xbp.Value() >= sp.Value() + wordSize) {
1565             // If that didn't work out, maybe the callee didn't change
1566             // XBP, so it still holds the caller's value.  For that to
1567             // be plausible, XBP will need to have a value at least
1568             // higher than XSP since that holds the purported return
1569             // address.  In which case do nothing, since XBP already
1570             // holds the "right" value.
1571           } else {
1572             // Mark XBP as invalid, so that subsequent unwind iterations
1573             // don't assume it holds valid data.
1574             regs.xbp = TaggedUWord();
1575           }
1576 
1577           // Move on to the next word up the stack
1578           sp = sp + TaggedUWord(wordSize);
1579 
1580 #elif defined(LUL_ARCH_arm)
1581           // Set all registers to be undefined, except for SP(R13) and
1582           // PC(R15).
1583 
1584           // aWord points either at the return point, if returning to
1585           // ARM code, or one insn past the return point if returning
1586           // to Thumb code.  In both cases, aWord-2 is guaranteed to
1587           // fall within the calling instruction.
1588           regs.r15 = aWord + TaggedUWord((uintptr_t)(-2));
1589 
1590           // Make SP be the word above the location where the return
1591           // address was found.
1592           regs.r13 = sp + TaggedUWord(4);
1593 
1594           // All other regs are undefined.
1595           regs.r7 = regs.r11 = regs.r12 = regs.r14 = TaggedUWord();
1596 
1597           // Move on to the next word up the stack
1598           sp = sp + TaggedUWord(4);
1599 
1600 #else
1601 # error "Unknown plat"
1602 #endif
1603 
1604           break;
1605         }
1606 
1607       } // for (int i = 0; i < NUM_SCANNED_WORDS; i++)
1608 
1609       // We tried to make progress by scanning the stack, but failed.
1610       // So give up -- fall out of the top level unwind loop.
1611       if (!scan_succeeded) {
1612         break;
1613       }
1614     }
1615 
1616   } // top level unwind loop
1617 
1618   // END UNWIND
1619   /////////////////////////////////////////////////////////
1620 }
1621 
1622 
1623 ////////////////////////////////////////////////////////////////
1624 // LUL Unit Testing                                           //
1625 ////////////////////////////////////////////////////////////////
1626 
1627 static const int LUL_UNIT_TEST_STACK_SIZE = 16384;
1628 
1629 // This function is innermost in the test call sequence.  It uses LUL
1630 // to unwind, and compares the result with the sequence specified in
1631 // the director string.  These need to agree in order for the test to
1632 // pass.  In order not to screw up the results, this function needs
1633 // to have a not-very big stack frame, since we're only presenting
1634 // the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and
1635 // that chunk unavoidably includes the frame for this function.
1636 //
1637 // This function must not be inlined into its callers.  Doing so will
1638 // cause the expected-vs-actual backtrace consistency checking to
1639 // fail.  Prints summary results to |aLUL|'s logging sink and also
1640 // returns a boolean indicating whether or not the test passed.
1641 static __attribute__((noinline))
GetAndCheckStackTrace(LUL * aLUL,const char * dstring)1642 bool GetAndCheckStackTrace(LUL* aLUL, const char* dstring)
1643 {
1644   // Get hold of the current unwind-start registers.
1645   UnwindRegs startRegs;
1646   memset(&startRegs, 0, sizeof(startRegs));
1647 #if defined(LUL_PLAT_x64_linux)
1648   volatile uintptr_t block[3];
1649   MOZ_ASSERT(sizeof(block) == 24);
1650   __asm__ __volatile__(
1651     "leaq 0(%%rip), %%r15"   "\n\t"
1652     "movq %%r15, 0(%0)"      "\n\t"
1653     "movq %%rsp, 8(%0)"      "\n\t"
1654     "movq %%rbp, 16(%0)"     "\n"
1655     : : "r"(&block[0]) : "memory", "r15"
1656   );
1657   startRegs.xip = TaggedUWord(block[0]);
1658   startRegs.xsp = TaggedUWord(block[1]);
1659   startRegs.xbp = TaggedUWord(block[2]);
1660   const uintptr_t REDZONE_SIZE = 128;
1661   uintptr_t start = block[1] - REDZONE_SIZE;
1662 #elif defined(LUL_PLAT_x86_linux) || defined(LUL_PLAT_x86_android)
1663   volatile uintptr_t block[3];
1664   MOZ_ASSERT(sizeof(block) == 12);
1665   __asm__ __volatile__(
1666     ".byte 0xE8,0x00,0x00,0x00,0x00"/*call next insn*/  "\n\t"
1667     "popl %%edi"             "\n\t"
1668     "movl %%edi, 0(%0)"      "\n\t"
1669     "movl %%esp, 4(%0)"      "\n\t"
1670     "movl %%ebp, 8(%0)"      "\n"
1671     : : "r"(&block[0]) : "memory", "edi"
1672   );
1673   startRegs.xip = TaggedUWord(block[0]);
1674   startRegs.xsp = TaggedUWord(block[1]);
1675   startRegs.xbp = TaggedUWord(block[2]);
1676   const uintptr_t REDZONE_SIZE = 0;
1677   uintptr_t start = block[1] - REDZONE_SIZE;
1678 #elif defined(LUL_PLAT_arm_android)
1679   volatile uintptr_t block[6];
1680   MOZ_ASSERT(sizeof(block) == 24);
1681   __asm__ __volatile__(
1682     "mov r0, r15"            "\n\t"
1683     "str r0,  [%0, #0]"      "\n\t"
1684     "str r14, [%0, #4]"      "\n\t"
1685     "str r13, [%0, #8]"      "\n\t"
1686     "str r12, [%0, #12]"     "\n\t"
1687     "str r11, [%0, #16]"     "\n\t"
1688     "str r7,  [%0, #20]"     "\n"
1689     : : "r"(&block[0]) : "memory", "r0"
1690   );
1691   startRegs.r15 = TaggedUWord(block[0]);
1692   startRegs.r14 = TaggedUWord(block[1]);
1693   startRegs.r13 = TaggedUWord(block[2]);
1694   startRegs.r12 = TaggedUWord(block[3]);
1695   startRegs.r11 = TaggedUWord(block[4]);
1696   startRegs.r7  = TaggedUWord(block[5]);
1697   const uintptr_t REDZONE_SIZE = 0;
1698   uintptr_t start = block[1] - REDZONE_SIZE;
1699 #else
1700 # error "Unsupported platform"
1701 #endif
1702 
1703   // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the
1704   // stack.
1705   uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE;
1706   uintptr_t ws  = sizeof(void*);
1707   start &= ~(ws-1);
1708   end   &= ~(ws-1);
1709   uintptr_t nToCopy = end - start;
1710   if (nToCopy > lul::N_STACK_BYTES) {
1711     nToCopy = lul::N_STACK_BYTES;
1712   }
1713   MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
1714   StackImage* stackImg = new StackImage();
1715   stackImg->mLen       = nToCopy;
1716   stackImg->mStartAvma = start;
1717   if (nToCopy > 0) {
1718     MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy);
1719     memcpy(&stackImg->mContents[0], (void*)start, nToCopy);
1720   }
1721 
1722   // Unwind it.
1723   const int MAX_TEST_FRAMES = 64;
1724   uintptr_t framePCs[MAX_TEST_FRAMES];
1725   uintptr_t frameSPs[MAX_TEST_FRAMES];
1726   size_t framesAvail = mozilla::ArrayLength(framePCs);
1727   size_t framesUsed  = 0;
1728   size_t scannedFramesAllowed = 0;
1729   size_t scannedFramesAcquired = 0;
1730   aLUL->Unwind( &framePCs[0], &frameSPs[0],
1731                 &framesUsed, &scannedFramesAcquired,
1732                 framesAvail, scannedFramesAllowed,
1733                 &startRegs, stackImg );
1734 
1735   delete stackImg;
1736 
1737   //if (0) {
1738   //  // Show what we have.
1739   //  fprintf(stderr, "Got %d frames:\n", (int)framesUsed);
1740   //  for (size_t i = 0; i < framesUsed; i++) {
1741   //    fprintf(stderr, "  [%2d]   SP %p   PC %p\n",
1742   //            (int)i, (void*)frameSPs[i], (void*)framePCs[i]);
1743   //  }
1744   //  fprintf(stderr, "\n");
1745   //}
1746 
1747   // Check to see if there's a consistent binding between digits in
1748   // the director string ('1' .. '8') and the PC values acquired by
1749   // the unwind.  If there isn't, the unwinding has failed somehow.
1750   uintptr_t binding[8];  // binding for '1' .. binding for '8'
1751   memset((void*)binding, 0, sizeof(binding));
1752 
1753   // The general plan is to work backwards along the director string
1754   // and forwards along the framePCs array.  Doing so corresponds to
1755   // working outwards from the innermost frame of the recursive test set.
1756   const char* cursor = dstring;
1757 
1758   // Find the end.  This leaves |cursor| two bytes past the first
1759   // character we want to look at -- see comment below.
1760   while (*cursor) cursor++;
1761 
1762   // Counts the number of consistent frames.
1763   size_t nConsistent = 0;
1764 
1765   // Iterate back to the start of the director string.  The starting
1766   // points are a bit complex.  We can't use framePCs[0] because that
1767   // contains the PC in this frame (above).  We can't use framePCs[1]
1768   // because that will contain the PC at return point in the recursive
1769   // test group (TestFn[1-8]) for their call "out" to this function,
1770   // GetAndCheckStackTrace.  Although LUL will compute a correct
1771   // return address, that will not be the same return address as for a
1772   // recursive call out of the the function to another function in the
1773   // group.  Hence we can only start consistency checking at
1774   // framePCs[2].
1775   //
1776   // To be consistent, then, we must ignore the last element in the
1777   // director string as that corresponds to framePCs[1].  Hence the
1778   // start points are: framePCs[2] and the director string 2 bytes
1779   // before the terminating zero.
1780   //
1781   // Also as a result of this, the number of consistent frames counted
1782   // will always be one less than the length of the director string
1783   // (not including its terminating zero).
1784   size_t frameIx;
1785   for (cursor = cursor-2, frameIx = 2;
1786        cursor >= dstring && frameIx < framesUsed;
1787        cursor--, frameIx++) {
1788     char      c  = *cursor;
1789     uintptr_t pc = framePCs[frameIx];
1790     // If this doesn't hold, the director string is ill-formed.
1791     MOZ_ASSERT(c >= '1' && c <= '8');
1792     int n = ((int)c) - ((int)'1');
1793     if (binding[n] == 0) {
1794       // There's no binding for |c| yet, so install |pc| and carry on.
1795       binding[n] = pc;
1796       nConsistent++;
1797       continue;
1798     }
1799     // There's a pre-existing binding for |c|.  Check it's consistent.
1800     if (binding[n] != pc) {
1801       // Not consistent.  Give up now.
1802       break;
1803     }
1804     // Consistent.  Keep going.
1805     nConsistent++;
1806   }
1807 
1808   // So, did we succeed?
1809   bool passed = nConsistent+1 == strlen(dstring);
1810 
1811   // Show the results.
1812   char buf[200];
1813   SprintfLiteral(buf, "LULUnitTest:   dstring = %s\n", dstring);
1814   buf[sizeof(buf)-1] = 0;
1815   aLUL->mLog(buf);
1816   SprintfLiteral(buf,
1817                  "LULUnitTest:     %d consistent, %d in dstring: %s\n",
1818                  (int)nConsistent, (int)strlen(dstring),
1819                  passed ? "PASS" : "FAIL");
1820   buf[sizeof(buf)-1] = 0;
1821   aLUL->mLog(buf);
1822 
1823   return passed;
1824 }
1825 
1826 
1827 // Macro magic to create a set of 8 mutually recursive functions with
1828 // varying frame sizes.  These will recurse amongst themselves as
1829 // specified by |strP|, the directory string, and call
1830 // GetAndCheckStackTrace when the string becomes empty, passing it the
1831 // original value of the string.  This checks the result, printing
1832 // results on |aLUL|'s logging sink, and also returns a boolean
1833 // indicating whether or not the results are acceptable (correct).
1834 
1835 #define DECL_TEST_FN(NAME) \
1836   bool NAME(LUL* aLUL, const char* strPorig, const char* strP);
1837 
1838 #define GEN_TEST_FN(NAME, FRAMESIZE) \
1839   bool NAME(LUL* aLUL, const char* strPorig, const char* strP) { \
1840     volatile char space[FRAMESIZE]; \
1841     memset((char*)&space[0], 0, sizeof(space)); \
1842     if (*strP == '\0') { \
1843       /* We've come to the end of the director string. */ \
1844       /* Take a stack snapshot. */ \
1845       return GetAndCheckStackTrace(aLUL, strPorig); \
1846     } else { \
1847       /* Recurse onwards.  This is a bit subtle.  The obvious */ \
1848       /* thing to do here is call onwards directly, from within the */ \
1849       /* arms of the case statement.  That gives a problem in that */ \
1850       /* there will be multiple return points inside each function when */ \
1851       /* unwinding, so it will be difficult to check for consistency */ \
1852       /* against the director string.  Instead, we make an indirect */ \
1853       /* call, so as to guarantee that there is only one call site */ \
1854       /* within each function.  This does assume that the compiler */ \
1855       /* won't transform it back to the simple direct-call form. */ \
1856       /* To discourage it from doing so, the call is bracketed with */ \
1857       /* __asm__ __volatile__ sections so as to make it not-movable. */ \
1858       bool (*nextFn)(LUL*, const char*, const char*) = NULL; \
1859       switch (*strP) { \
1860         case '1': nextFn = TestFn1; break; \
1861         case '2': nextFn = TestFn2; break; \
1862         case '3': nextFn = TestFn3; break; \
1863         case '4': nextFn = TestFn4; break; \
1864         case '5': nextFn = TestFn5; break; \
1865         case '6': nextFn = TestFn6; break; \
1866         case '7': nextFn = TestFn7; break; \
1867         case '8': nextFn = TestFn8; break; \
1868         default:  nextFn = TestFn8; break; \
1869       } \
1870       __asm__ __volatile__("":::"cc","memory"); \
1871       bool passed = nextFn(aLUL, strPorig, strP+1); \
1872       __asm__ __volatile__("":::"cc","memory"); \
1873       return passed; \
1874     } \
1875   }
1876 
1877 // The test functions are mutually recursive, so it is necessary to
1878 // declare them before defining them.
1879 DECL_TEST_FN(TestFn1)
DECL_TEST_FN(TestFn2)1880 DECL_TEST_FN(TestFn2)
1881 DECL_TEST_FN(TestFn3)
1882 DECL_TEST_FN(TestFn4)
1883 DECL_TEST_FN(TestFn5)
1884 DECL_TEST_FN(TestFn6)
1885 DECL_TEST_FN(TestFn7)
1886 DECL_TEST_FN(TestFn8)
1887 
1888 GEN_TEST_FN(TestFn1, 123)
1889 GEN_TEST_FN(TestFn2, 456)
1890 GEN_TEST_FN(TestFn3, 789)
1891 GEN_TEST_FN(TestFn4, 23)
1892 GEN_TEST_FN(TestFn5, 47)
1893 GEN_TEST_FN(TestFn6, 117)
1894 GEN_TEST_FN(TestFn7, 1)
1895 GEN_TEST_FN(TestFn8, 99)
1896 
1897 
1898 // This starts the test sequence going.  Call here to generate a
1899 // sequence of calls as directed by the string |dstring|.  The call
1900 // sequence will, from its innermost frame, finish by calling
1901 // GetAndCheckStackTrace() and passing it |dstring|.
1902 // GetAndCheckStackTrace() will unwind the stack, check consistency
1903 // of those results against |dstring|, and print a pass/fail message
1904 // to aLUL's logging sink.  It also updates the counters in *aNTests
1905 // and aNTestsPassed.
1906 __attribute__((noinline)) void
1907 TestUnw(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed,
1908         LUL* aLUL, const char* dstring)
1909 {
1910   // Ensure that the stack has at least this much space on it.  This
1911   // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes
1912   // and hand it to LUL.  Safe in the sense that no segfault can
1913   // happen because the stack is at least this big.  This is all
1914   // somewhat dubious in the sense that a sufficiently clever compiler
1915   // (clang, for one) can figure out that space[] is unused and delete
1916   // it from the frame.  Hence the somewhat elaborate hoop jumping to
1917   // fill it up before the call and to at least appear to use the
1918   // value afterwards.
1919   int i;
1920   volatile char space[LUL_UNIT_TEST_STACK_SIZE];
1921   for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
1922     space[i] = (char)(i & 0x7F);
1923   }
1924 
1925   // Really run the test.
1926   bool passed = TestFn1(aLUL, dstring, dstring);
1927 
1928   // Appear to use space[], by visiting the value to compute some kind
1929   // of checksum, and then (apparently) using the checksum.
1930   int sum = 0;
1931   for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
1932     // If this doesn't fool LLVM, I don't know what will.
1933     sum += space[i] - 3*i;
1934   }
1935   __asm__ __volatile__("" : : "r"(sum));
1936 
1937   // Update the counters.
1938   (*aNTests)++;
1939   if (passed) {
1940     (*aNTestsPassed)++;
1941   }
1942 }
1943 
1944 
1945 void
RunLulUnitTests(int * aNTests,int * aNTestsPassed,LUL * aLUL)1946 RunLulUnitTests(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed, LUL* aLUL)
1947 {
1948   aLUL->mLog(":\n");
1949   aLUL->mLog("LULUnitTest: BEGIN\n");
1950   *aNTests = *aNTestsPassed = 0;
1951   TestUnw(aNTests, aNTestsPassed, aLUL, "11111111");
1952   TestUnw(aNTests, aNTestsPassed, aLUL, "11222211");
1953   TestUnw(aNTests, aNTestsPassed, aLUL, "111222333");
1954   TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212");
1955   TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258");
1956   TestUnw(aNTests, aNTestsPassed, aLUL,
1957           "123456781122334455667788777777777777777777777");
1958   aLUL->mLog("LULUnitTest: END\n");
1959   aLUL->mLog(":\n");
1960 }
1961 
1962 
1963 } // namespace lul
1964