1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "LulMain.h"
8 
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <unistd.h>  // write(), only for testing LUL
13 
14 #include <algorithm>  // std::sort
15 #include <string>
16 #include <utility>
17 
18 #include "GeckoProfiler.h"  // for profiler_current_thread_id()
19 #include "LulCommonExt.h"
20 #include "LulElfExt.h"
21 #include "LulMainInt.h"
22 #include "mozilla/ArrayUtils.h"
23 #include "mozilla/Assertions.h"
24 #include "mozilla/CheckedInt.h"
25 #include "mozilla/DebugOnly.h"
26 #include "mozilla/MemoryChecking.h"
27 #include "mozilla/Sprintf.h"
28 #include "mozilla/UniquePtr.h"
29 #include "mozilla/Unused.h"
30 
31 // Set this to 1 for verbose logging
32 #define DEBUG_MAIN 0
33 
34 namespace lul {
35 
36 using mozilla::CheckedInt;
37 using mozilla::DebugOnly;
38 using mozilla::MallocSizeOf;
39 using mozilla::Unused;
40 using std::pair;
41 using std::string;
42 using std::vector;
43 
44 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
45 //
46 // Some functions in this file are marked RUNS IN NO-MALLOC CONTEXT.
47 // Any such function -- and, hence, the transitive closure of those
48 // reachable from it -- must not do any dynamic memory allocation.
49 // Doing so risks deadlock.  There is exactly one root function for
50 // the transitive closure: Lul::Unwind.
51 //
52 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
53 
54 ////////////////////////////////////////////////////////////////
55 // RuleSet                                                    //
56 ////////////////////////////////////////////////////////////////
57 
NameOf_DW_REG(int16_t aReg)58 static const char* NameOf_DW_REG(int16_t aReg) {
59   switch (aReg) {
60     case DW_REG_CFA:
61       return "cfa";
62 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
63     case DW_REG_INTEL_XBP:
64       return "xbp";
65     case DW_REG_INTEL_XSP:
66       return "xsp";
67     case DW_REG_INTEL_XIP:
68       return "xip";
69 #elif defined(GP_ARCH_arm)
70     case DW_REG_ARM_R7:
71       return "r7";
72     case DW_REG_ARM_R11:
73       return "r11";
74     case DW_REG_ARM_R12:
75       return "r12";
76     case DW_REG_ARM_R13:
77       return "r13";
78     case DW_REG_ARM_R14:
79       return "r14";
80     case DW_REG_ARM_R15:
81       return "r15";
82 #elif defined(GP_ARCH_arm64)
83     case DW_REG_AARCH64_X29:
84       return "x29";
85     case DW_REG_AARCH64_X30:
86       return "x30";
87     case DW_REG_AARCH64_SP:
88       return "sp";
89 #elif defined(GP_ARCH_mips64)
90     case DW_REG_MIPS_SP:
91       return "sp";
92     case DW_REG_MIPS_FP:
93       return "fp";
94     case DW_REG_MIPS_PC:
95       return "pc";
96 #else
97 #  error "Unsupported arch"
98 #endif
99     default:
100       return "???";
101   }
102 }
103 
ShowRule(const char * aNewReg) const104 string LExpr::ShowRule(const char* aNewReg) const {
105   char buf[64];
106   string res = string(aNewReg) + "=";
107   switch (mHow) {
108     case UNKNOWN:
109       res += "Unknown";
110       break;
111     case NODEREF:
112       SprintfLiteral(buf, "%s+%d", NameOf_DW_REG(mReg), (int)mOffset);
113       res += buf;
114       break;
115     case DEREF:
116       SprintfLiteral(buf, "*(%s+%d)", NameOf_DW_REG(mReg), (int)mOffset);
117       res += buf;
118       break;
119     case PFXEXPR:
120       SprintfLiteral(buf, "PfxExpr-at-%d", (int)mOffset);
121       res += buf;
122       break;
123     default:
124       res += "???";
125       break;
126   }
127   return res;
128 }
129 
Print(uintptr_t avma,uintptr_t len,void (* aLog)(const char *)) const130 void RuleSet::Print(uintptr_t avma, uintptr_t len,
131                     void (*aLog)(const char*)) const {
132   char buf[96];
133   SprintfLiteral(buf, "[%llx .. %llx]: let ", (unsigned long long int)avma,
134                  (unsigned long long int)(avma + len - 1));
135   string res = string(buf);
136   res += mCfaExpr.ShowRule("cfa");
137   res += " in";
138   // For each reg we care about, print the recovery expression.
139 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
140   res += mXipExpr.ShowRule(" RA");
141   res += mXspExpr.ShowRule(" SP");
142   res += mXbpExpr.ShowRule(" BP");
143 #elif defined(GP_ARCH_arm)
144   res += mR15expr.ShowRule(" R15");
145   res += mR7expr.ShowRule(" R7");
146   res += mR11expr.ShowRule(" R11");
147   res += mR12expr.ShowRule(" R12");
148   res += mR13expr.ShowRule(" R13");
149   res += mR14expr.ShowRule(" R14");
150 #elif defined(GP_ARCH_arm64)
151   res += mX29expr.ShowRule(" X29");
152   res += mX30expr.ShowRule(" X30");
153   res += mSPexpr.ShowRule(" SP");
154 #elif defined(GP_ARCH_mips64)
155   res += mPCexpr.ShowRule(" PC");
156   res += mSPexpr.ShowRule(" SP");
157   res += mFPexpr.ShowRule(" FP");
158 #else
159 #  error "Unsupported arch"
160 #endif
161   aLog(res.c_str());
162 }
163 
ExprForRegno(DW_REG_NUMBER aRegno)164 LExpr* RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) {
165   switch (aRegno) {
166     case DW_REG_CFA:
167       return &mCfaExpr;
168 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
169     case DW_REG_INTEL_XIP:
170       return &mXipExpr;
171     case DW_REG_INTEL_XSP:
172       return &mXspExpr;
173     case DW_REG_INTEL_XBP:
174       return &mXbpExpr;
175 #elif defined(GP_ARCH_arm)
176     case DW_REG_ARM_R15:
177       return &mR15expr;
178     case DW_REG_ARM_R14:
179       return &mR14expr;
180     case DW_REG_ARM_R13:
181       return &mR13expr;
182     case DW_REG_ARM_R12:
183       return &mR12expr;
184     case DW_REG_ARM_R11:
185       return &mR11expr;
186     case DW_REG_ARM_R7:
187       return &mR7expr;
188 #elif defined(GP_ARCH_arm64)
189     case DW_REG_AARCH64_X29:
190       return &mX29expr;
191     case DW_REG_AARCH64_X30:
192       return &mX30expr;
193     case DW_REG_AARCH64_SP:
194       return &mSPexpr;
195 #elif defined(GP_ARCH_mips64)
196     case DW_REG_MIPS_SP:
197       return &mSPexpr;
198     case DW_REG_MIPS_FP:
199       return &mFPexpr;
200     case DW_REG_MIPS_PC:
201       return &mPCexpr;
202 #else
203 #  error "Unknown arch"
204 #endif
205     default:
206       return nullptr;
207   }
208 }
209 
RuleSet()210 RuleSet::RuleSet() {
211   // All fields are of type LExpr and so are initialised by LExpr::LExpr().
212 }
213 
214 ////////////////////////////////////////////////////////////////
215 // SecMap                                                     //
216 ////////////////////////////////////////////////////////////////
217 
218 // See header file LulMainInt.h for comments about invariants.
219 
SecMap(uintptr_t mapStartAVMA,uint32_t mapLen,void (* aLog)(const char *))220 SecMap::SecMap(uintptr_t mapStartAVMA, uint32_t mapLen,
221                void (*aLog)(const char*))
222     : mUsable(false),
223       mUniqifier(new mozilla::HashMap<RuleSet, uint32_t, RuleSet,
224                                       InfallibleAllocPolicy>),
225       mLog(aLog) {
226   if (mapLen == 0) {
227     // Degenerate case.
228     mMapMinAVMA = 1;
229     mMapMaxAVMA = 0;
230   } else {
231     mMapMinAVMA = mapStartAVMA;
232     mMapMaxAVMA = mapStartAVMA + (uintptr_t)mapLen - 1;
233   }
234 }
235 
~SecMap()236 SecMap::~SecMap() {
237   mExtents.clear();
238   mDictionary.clear();
239   if (mUniqifier) {
240     mUniqifier->clear();
241     mUniqifier = nullptr;
242   }
243 }
244 
245 // RUNS IN NO-MALLOC CONTEXT
FindRuleSet(uintptr_t ia)246 RuleSet* SecMap::FindRuleSet(uintptr_t ia) {
247   // Binary search mExtents to find one that brackets |ia|.
248   // lo and hi need to be signed, else the loop termination tests
249   // don't work properly.  Note that this works correctly even when
250   // mExtents.size() == 0.
251 
252   // Can't do this until the array has been sorted and preened.
253   MOZ_ASSERT(mUsable);
254 
255   long int lo = 0;
256   long int hi = (long int)mExtents.size() - 1;
257   while (true) {
258     // current unsearched space is from lo to hi, inclusive.
259     if (lo > hi) {
260       // not found
261       return nullptr;
262     }
263     long int mid = lo + ((hi - lo) / 2);
264     Extent* mid_extent = &mExtents[mid];
265     uintptr_t mid_offset = mid_extent->offset();
266     uintptr_t mid_len = mid_extent->len();
267     uintptr_t mid_minAddr = mMapMinAVMA + mid_offset;
268     uintptr_t mid_maxAddr = mid_minAddr + mid_len - 1;
269     if (ia < mid_minAddr) {
270       hi = mid - 1;
271       continue;
272     }
273     if (ia > mid_maxAddr) {
274       lo = mid + 1;
275       continue;
276     }
277     MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
278     uint32_t mid_extent_dictIx = mid_extent->dictIx();
279     MOZ_RELEASE_ASSERT(mid_extent_dictIx < mExtents.size());
280     return &mDictionary[mid_extent_dictIx];
281   }
282   // NOTREACHED
283 }
284 
285 // Add a RuleSet to the collection.  The rule is copied in.  Calling
286 // this makes the map non-searchable.
AddRuleSet(const RuleSet * rs,uintptr_t avma,uintptr_t len)287 void SecMap::AddRuleSet(const RuleSet* rs, uintptr_t avma, uintptr_t len) {
288   mUsable = false;
289 
290   // Zero length RuleSet?  Meaningless, but ignore it anyway.
291   if (len == 0) {
292     return;
293   }
294 
295   // Ignore attempts to add RuleSets whose address range doesn't fall within
296   // the declared address range for the SecMap.  Maybe we should print some
297   // kind of error message rather than silently ignoring them.
298   if (!(avma >= mMapMinAVMA && avma + len - 1 <= mMapMaxAVMA)) {
299     return;
300   }
301 
302   // Because `mMapStartAVMA` .. `mMapEndAVMA` can specify at most a 2^32-1 byte
303   // chunk of address space, the following must now hold.
304   MOZ_RELEASE_ASSERT(len <= (uintptr_t)0xFFFFFFFF);
305 
306   // See if `mUniqifier` already has `rs`.  If so set `dictIx` to the assigned
307   // dictionary index; if not, add `rs` to `mUniqifier` and assign a new
308   // dictionary index.  This is the core of the RuleSet-de-duplication process.
309   uint32_t dictIx = 0;
310   mozilla::HashMap<RuleSet, uint32_t, RuleSet, InfallibleAllocPolicy>::AddPtr
311       p = mUniqifier->lookupForAdd(*rs);
312   if (!p) {
313     dictIx = mUniqifier->count();
314     // If this ever fails, Extents::dictIx will need to be changed to be a
315     // type wider than the current uint16_t.
316     MOZ_RELEASE_ASSERT(dictIx < (1 << 16));
317     // This returns `false` on OOM.  We ignore the return value since we asked
318     // for it to use the InfallibleAllocPolicy.
319     DebugOnly<bool> addedOK = mUniqifier->add(p, *rs, dictIx);
320     MOZ_ASSERT(addedOK);
321   } else {
322     dictIx = p->value();
323   }
324 
325   uint32_t offset = (uint32_t)(avma - mMapMinAVMA);
326   while (len > 0) {
327     // Because Extents::len is a uint16_t, we have to add multiple `mExtents`
328     // entries to cover the case where `len` is equal to or greater than 2^16.
329     // This happens only exceedingly rarely.  In order to get more test
330     // coverage on what would otherwise be a very low probability (less than
331     // 0.0002%) corner case, we do this in steps of 4095.  On libxul.so as of
332     // Sept 2020, this increases the number of `mExtents` entries by about
333     // 0.05%, hence has no meaningful effect on space use, but increases the
334     // use of this corner case, and hence its test coverage, by a factor of 250.
335     uint32_t this_step_len = (len > 4095) ? 4095 : len;
336     mExtents.emplace_back(offset, this_step_len, dictIx);
337     offset += this_step_len;
338     len -= this_step_len;
339   }
340 }
341 
342 // Add a PfxInstr to the vector of such instrs, and return the index
343 // in the vector.  Calling this makes the map non-searchable.
AddPfxInstr(PfxInstr pfxi)344 uint32_t SecMap::AddPfxInstr(PfxInstr pfxi) {
345   mUsable = false;
346   mPfxInstrs.push_back(pfxi);
347   return mPfxInstrs.size() - 1;
348 }
349 
CmpExtentsByOffsetLE(const Extent & ext1,const Extent & ext2)350 static bool CmpExtentsByOffsetLE(const Extent& ext1, const Extent& ext2) {
351   return ext1.offset() < ext2.offset();
352 }
353 
354 // Prepare the map for searching, by sorting it, de-overlapping entries and
355 // removing any resulting zero-length entries.  At the start of this routine,
356 // all Extents should fall within [mMapMinAVMA, mMapMaxAVMA] and not have zero
357 // length, as a result of the checks in AddRuleSet().
PrepareRuleSets()358 void SecMap::PrepareRuleSets() {
359   // At this point, the de-duped RuleSets are in `mUniqifier`, and
360   // `mDictionary` is empty.  This method will, amongst other things, copy
361   // them into `mDictionary` in order of their assigned dictionary-index
362   // values, as established by `SecMap::AddRuleSet`, and free `mUniqifier`;
363   // after this method, it has no further use.
364   MOZ_RELEASE_ASSERT(mUniqifier);
365   MOZ_RELEASE_ASSERT(mDictionary.empty());
366 
367   if (mExtents.empty()) {
368     mUniqifier->clear();
369     mUniqifier = nullptr;
370     return;
371   }
372 
373   if (mMapMinAVMA == 1 && mMapMaxAVMA == 0) {
374     // The map is empty.  This should never happen.
375     mExtents.clear();
376     mUniqifier->clear();
377     mUniqifier = nullptr;
378     return;
379   }
380   MOZ_RELEASE_ASSERT(mMapMinAVMA <= mMapMaxAVMA);
381 
382   // We must have at least one Extent, and as a consequence there must be at
383   // least one entry in the uniqifier.
384   MOZ_RELEASE_ASSERT(!mExtents.empty() && !mUniqifier->empty());
385 
386 #ifdef DEBUG
387   // Check invariants on incoming Extents.
388   for (size_t i = 0; i < mExtents.size(); ++i) {
389     Extent* ext = &mExtents[i];
390     uint32_t len = ext->len();
391     MOZ_ASSERT(len > 0);
392     MOZ_ASSERT(len <= 4095 /* per '4095' in AddRuleSet() */);
393     uint32_t offset = ext->offset();
394     uintptr_t avma = mMapMinAVMA + (uintptr_t)offset;
395     // Upper bounds test.  There's no lower bounds test because `offset` is a
396     // positive displacement from `mMapMinAVMA`, so a small underrun will
397     // manifest as `len` being close to 2^32.
398     MOZ_ASSERT(avma + (uintptr_t)len - 1 <= mMapMaxAVMA);
399   }
400 #endif
401 
402   // Sort by start addresses.
403   std::sort(mExtents.begin(), mExtents.end(), CmpExtentsByOffsetLE);
404 
405   // Iteratively truncate any overlaps and remove any zero length
406   // entries that might result, or that may have been present
407   // initially.  Unless the input is seriously screwy, this is
408   // expected to iterate only once.
409   while (true) {
410     size_t i;
411     size_t n = mExtents.size();
412     size_t nZeroLen = 0;
413 
414     if (n == 0) {
415       break;
416     }
417 
418     for (i = 1; i < n; ++i) {
419       Extent* prev = &mExtents[i - 1];
420       Extent* here = &mExtents[i];
421       MOZ_ASSERT(prev->offset() <= here->offset());
422       if (prev->offset() + prev->len() > here->offset()) {
423         prev->setLen(here->offset() - prev->offset());
424       }
425       if (prev->len() == 0) {
426         nZeroLen++;
427       }
428     }
429 
430     if (mExtents[n - 1].len() == 0) {
431       nZeroLen++;
432     }
433 
434     // At this point, the entries are in-order and non-overlapping.
435     // If none of them are zero-length, we are done.
436     if (nZeroLen == 0) {
437       break;
438     }
439 
440     // Slide back the entries to remove the zero length ones.
441     size_t j = 0;  // The write-point.
442     for (i = 0; i < n; ++i) {
443       if (mExtents[i].len() == 0) {
444         continue;
445       }
446       if (j != i) {
447         mExtents[j] = mExtents[i];
448       }
449       ++j;
450     }
451     MOZ_ASSERT(i == n);
452     MOZ_ASSERT(nZeroLen <= n);
453     MOZ_ASSERT(j == n - nZeroLen);
454     while (nZeroLen > 0) {
455       mExtents.pop_back();
456       nZeroLen--;
457     }
458 
459     MOZ_ASSERT(mExtents.size() == j);
460   }
461 
462   size_t nExtents = mExtents.size();
463 
464 #ifdef DEBUG
465   // Do a final check on the extents: their address ranges must be
466   // ascending, non overlapping, non zero sized.
467   if (nExtents > 0) {
468     MOZ_ASSERT(mExtents[0].len() > 0);
469     for (size_t i = 1; i < nExtents; ++i) {
470       const Extent* prev = &mExtents[i - 1];
471       const Extent* here = &mExtents[i];
472       MOZ_ASSERT(prev->offset() < here->offset());
473       MOZ_ASSERT(here->len() > 0);
474       MOZ_ASSERT(prev->offset() + prev->len() <= here->offset());
475     }
476   }
477 #endif
478 
479   // Create the final dictionary by enumerating the uniqifier.
480   size_t nUniques = mUniqifier->count();
481 
482   RuleSet dummy;
483   mozilla::PodZero(&dummy);
484 
485   mDictionary.reserve(nUniques);
486   for (size_t i = 0; i < nUniques; i++) {
487     mDictionary.push_back(dummy);
488   }
489 
490   for (auto iter = mUniqifier->iter(); !iter.done(); iter.next()) {
491     MOZ_RELEASE_ASSERT(iter.get().value() < nUniques);
492     mDictionary[iter.get().value()] = iter.get().key();
493   }
494 
495   mUniqifier = nullptr;
496 
497   char buf[150];
498   SprintfLiteral(
499       buf,
500       "PrepareRuleSets: %lu extents, %lu rulesets, "
501       "avma min/max 0x%llx, 0x%llx\n",
502       (unsigned long int)nExtents, (unsigned long int)mDictionary.size(),
503       (unsigned long long int)mMapMinAVMA, (unsigned long long int)mMapMaxAVMA);
504   buf[sizeof(buf) - 1] = 0;
505   mLog(buf);
506 
507   // Is now usable for binary search.
508   mUsable = true;
509 
510 #if 0
511   mLog("\nRulesets after preening\n");
512   for (size_t i = 0; i < nExtents; ++i) {
513     const Extent* extent = &mExtents[i];
514     uintptr_t avma = mMapMinAVMA + (uintptr_t)extent->offset();
515     mDictionary[extent->dictIx()].Print(avma, extent->len(), mLog);
516     mLog("\n");
517   }
518   mLog("\n");
519 #endif
520 }
521 
IsEmpty()522 bool SecMap::IsEmpty() { return mExtents.empty(); }
523 
SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const524 size_t SecMap::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
525   size_t n = aMallocSizeOf(this);
526 
527   // It's conceivable that these calls would be unsafe with some
528   // implementations of std::vector, but it seems to be working for now...
529   n += aMallocSizeOf(mPfxInstrs.data());
530 
531   if (mUniqifier) {
532     n += mUniqifier->shallowSizeOfIncludingThis(aMallocSizeOf);
533   }
534   n += aMallocSizeOf(mDictionary.data());
535   n += aMallocSizeOf(mExtents.data());
536 
537   return n;
538 }
539 
540 ////////////////////////////////////////////////////////////////
541 // SegArray                                                   //
542 ////////////////////////////////////////////////////////////////
543 
544 // A SegArray holds a set of address ranges that together exactly
545 // cover an address range, with no overlaps or holes.  Each range has
546 // an associated value, which in this case has been specialised to be
547 // a simple boolean.  The representation is kept to minimal canonical
548 // form in which adjacent ranges with the same associated value are
549 // merged together.  Each range is represented by a |struct Seg|.
550 //
551 // SegArrays are used to keep track of which parts of the address
552 // space are known to contain instructions.
553 class SegArray {
554  public:
add(uintptr_t lo,uintptr_t hi,bool val)555   void add(uintptr_t lo, uintptr_t hi, bool val) {
556     if (lo > hi) {
557       return;
558     }
559     split_at(lo);
560     if (hi < UINTPTR_MAX) {
561       split_at(hi + 1);
562     }
563     std::vector<Seg>::size_type iLo, iHi, i;
564     iLo = find(lo);
565     iHi = find(hi);
566     for (i = iLo; i <= iHi; ++i) {
567       mSegs[i].val = val;
568     }
569     preen();
570   }
571 
572   // RUNS IN NO-MALLOC CONTEXT
getBoundingCodeSegment(uintptr_t * rx_min,uintptr_t * rx_max,uintptr_t addr)573   bool getBoundingCodeSegment(/*OUT*/ uintptr_t* rx_min,
574                               /*OUT*/ uintptr_t* rx_max, uintptr_t addr) {
575     std::vector<Seg>::size_type i = find(addr);
576     if (!mSegs[i].val) {
577       return false;
578     }
579     *rx_min = mSegs[i].lo;
580     *rx_max = mSegs[i].hi;
581     return true;
582   }
583 
SegArray()584   SegArray() {
585     Seg s(0, UINTPTR_MAX, false);
586     mSegs.push_back(s);
587   }
588 
589  private:
590   struct Seg {
Seglul::SegArray::Seg591     Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {}
592     uintptr_t lo;
593     uintptr_t hi;
594     bool val;
595   };
596 
preen()597   void preen() {
598     for (std::vector<Seg>::iterator iter = mSegs.begin();
599          iter < mSegs.end() - 1; ++iter) {
600       if (iter[0].val != iter[1].val) {
601         continue;
602       }
603       iter[0].hi = iter[1].hi;
604       mSegs.erase(iter + 1);
605       // Back up one, so as not to miss an opportunity to merge
606       // with the entry after this one.
607       --iter;
608     }
609   }
610 
611   // RUNS IN NO-MALLOC CONTEXT
find(uintptr_t a)612   std::vector<Seg>::size_type find(uintptr_t a) {
613     long int lo = 0;
614     long int hi = (long int)mSegs.size();
615     while (true) {
616       // The unsearched space is lo .. hi inclusive.
617       if (lo > hi) {
618         // Not found.  This can't happen.
619         return (std::vector<Seg>::size_type)(-1);
620       }
621       long int mid = lo + ((hi - lo) / 2);
622       uintptr_t mid_lo = mSegs[mid].lo;
623       uintptr_t mid_hi = mSegs[mid].hi;
624       if (a < mid_lo) {
625         hi = mid - 1;
626         continue;
627       }
628       if (a > mid_hi) {
629         lo = mid + 1;
630         continue;
631       }
632       return (std::vector<Seg>::size_type)mid;
633     }
634   }
635 
split_at(uintptr_t a)636   void split_at(uintptr_t a) {
637     std::vector<Seg>::size_type i = find(a);
638     if (mSegs[i].lo == a) {
639       return;
640     }
641     mSegs.insert(mSegs.begin() + i + 1, mSegs[i]);
642     mSegs[i].hi = a - 1;
643     mSegs[i + 1].lo = a;
644   }
645 
show()646   void show() {
647     printf("<< %d entries:\n", (int)mSegs.size());
648     for (std::vector<Seg>::iterator iter = mSegs.begin(); iter < mSegs.end();
649          ++iter) {
650       printf("  %016llx  %016llx  %s\n", (unsigned long long int)(*iter).lo,
651              (unsigned long long int)(*iter).hi,
652              (*iter).val ? "true" : "false");
653     }
654     printf(">>\n");
655   }
656 
657   std::vector<Seg> mSegs;
658 };
659 
660 ////////////////////////////////////////////////////////////////
661 // PriMap                                                     //
662 ////////////////////////////////////////////////////////////////
663 
664 class PriMap {
665  public:
PriMap(void (* aLog)(const char *))666   explicit PriMap(void (*aLog)(const char*)) : mLog(aLog) {}
667 
668   // RUNS IN NO-MALLOC CONTEXT
Lookup(uintptr_t ia)669   pair<const RuleSet*, const vector<PfxInstr>*> Lookup(uintptr_t ia) {
670     SecMap* sm = FindSecMap(ia);
671     return pair<const RuleSet*, const vector<PfxInstr>*>(
672         sm ? sm->FindRuleSet(ia) : nullptr, sm ? sm->GetPfxInstrs() : nullptr);
673   }
674 
675   // Add a secondary map.  No overlaps allowed w.r.t. existing
676   // secondary maps.
AddSecMap(mozilla::UniquePtr<SecMap> && aSecMap)677   void AddSecMap(mozilla::UniquePtr<SecMap>&& aSecMap) {
678     // We can't add an empty SecMap to the PriMap.  But that's OK
679     // since we'd never be able to find anything in it anyway.
680     if (aSecMap->IsEmpty()) {
681       return;
682     }
683 
684     // Iterate through the SecMaps and find the right place for this
685     // one.  At the same time, ensure that the in-order
686     // non-overlapping invariant is preserved (and, generally, holds).
687     // FIXME: this gives a cost that is O(N^2) in the total number of
688     // shared objects in the system.  ToDo: better.
689     MOZ_ASSERT(aSecMap->mMapMinAVMA <= aSecMap->mMapMaxAVMA);
690 
691     size_t num_secMaps = mSecMaps.size();
692     uintptr_t i;
693     for (i = 0; i < num_secMaps; ++i) {
694       mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
695       MOZ_ASSERT(sm_i->mMapMinAVMA <= sm_i->mMapMaxAVMA);
696       if (aSecMap->mMapMinAVMA < sm_i->mMapMaxAVMA) {
697         // |aSecMap| needs to be inserted immediately before mSecMaps[i].
698         break;
699       }
700     }
701     MOZ_ASSERT(i <= num_secMaps);
702     if (i == num_secMaps) {
703       // It goes at the end.
704       mSecMaps.push_back(std::move(aSecMap));
705     } else {
706       std::vector<mozilla::UniquePtr<SecMap>>::iterator iter =
707           mSecMaps.begin() + i;
708       mSecMaps.insert(iter, std::move(aSecMap));
709     }
710     char buf[100];
711     SprintfLiteral(buf, "AddSecMap: now have %d SecMaps\n",
712                    (int)mSecMaps.size());
713     buf[sizeof(buf) - 1] = 0;
714     mLog(buf);
715   }
716 
717   // Remove and delete any SecMaps in the mapping, that intersect
718   // with the specified address range.
RemoveSecMapsInRange(uintptr_t avma_min,uintptr_t avma_max)719   void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) {
720     MOZ_ASSERT(avma_min <= avma_max);
721     size_t num_secMaps = mSecMaps.size();
722     if (num_secMaps > 0) {
723       intptr_t i;
724       // Iterate from end to start over the vector, so as to ensure
725       // that the special case where |avma_min| and |avma_max| denote
726       // the entire address space, can be completed in time proportional
727       // to the number of elements in the map.
728       for (i = (intptr_t)num_secMaps - 1; i >= 0; i--) {
729         mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
730         if (sm_i->mMapMaxAVMA < avma_min || avma_max < sm_i->mMapMinAVMA) {
731           // There's no overlap.  Move on.
732           continue;
733         }
734         // We need to remove mSecMaps[i] and slide all those above it
735         // downwards to cover the hole.
736         mSecMaps.erase(mSecMaps.begin() + i);
737       }
738     }
739   }
740 
741   // Return the number of currently contained SecMaps.
CountSecMaps()742   size_t CountSecMaps() { return mSecMaps.size(); }
743 
SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const744   size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
745     size_t n = aMallocSizeOf(this);
746 
747     // It's conceivable that this call would be unsafe with some
748     // implementations of std::vector, but it seems to be working for now...
749     n += aMallocSizeOf(mSecMaps.data());
750 
751     for (size_t i = 0; i < mSecMaps.size(); i++) {
752       n += mSecMaps[i]->SizeOfIncludingThis(aMallocSizeOf);
753     }
754 
755     return n;
756   }
757 
758  private:
759   // RUNS IN NO-MALLOC CONTEXT
FindSecMap(uintptr_t ia)760   SecMap* FindSecMap(uintptr_t ia) {
761     // Binary search mSecMaps to find one that brackets |ia|.
762     // lo and hi need to be signed, else the loop termination tests
763     // don't work properly.
764     long int lo = 0;
765     long int hi = (long int)mSecMaps.size() - 1;
766     while (true) {
767       // current unsearched space is from lo to hi, inclusive.
768       if (lo > hi) {
769         // not found
770         return nullptr;
771       }
772       long int mid = lo + ((hi - lo) / 2);
773       mozilla::UniquePtr<SecMap>& mid_secMap = mSecMaps[mid];
774       uintptr_t mid_minAddr = mid_secMap->mMapMinAVMA;
775       uintptr_t mid_maxAddr = mid_secMap->mMapMaxAVMA;
776       if (ia < mid_minAddr) {
777         hi = mid - 1;
778         continue;
779       }
780       if (ia > mid_maxAddr) {
781         lo = mid + 1;
782         continue;
783       }
784       MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
785       return mid_secMap.get();
786     }
787     // NOTREACHED
788   }
789 
790  private:
791   // sorted array of per-object ranges, non overlapping, non empty
792   std::vector<mozilla::UniquePtr<SecMap>> mSecMaps;
793 
794   // a logging sink, for debugging.
795   void (*mLog)(const char*);
796 };
797 
798 ////////////////////////////////////////////////////////////////
799 // LUL                                                        //
800 ////////////////////////////////////////////////////////////////
801 
802 #define LUL_LOG(_str)                                                          \
803   do {                                                                         \
804     char buf[200];                                                             \
805     SprintfLiteral(buf, "LUL: pid %" PRIu64 " tid %" PRIu64 " lul-obj %p: %s", \
806                    uint64_t(profiler_current_process_id().ToNumber()),         \
807                    uint64_t(profiler_current_thread_id().ToNumber()), this,    \
808                    (_str));                                                    \
809     buf[sizeof(buf) - 1] = 0;                                                  \
810     mLog(buf);                                                                 \
811   } while (0)
812 
LUL(void (* aLog)(const char *))813 LUL::LUL(void (*aLog)(const char*))
814     : mLog(aLog),
815       mAdminMode(true),
816       mAdminThreadId(profiler_current_thread_id()),
817       mPriMap(new PriMap(aLog)),
818       mSegArray(new SegArray()),
819       mUSU(new UniqueStringUniverse()) {
820   LUL_LOG("LUL::LUL: Created object");
821 }
822 
~LUL()823 LUL::~LUL() {
824   LUL_LOG("LUL::~LUL: Destroyed object");
825   delete mPriMap;
826   delete mSegArray;
827   mLog = nullptr;
828   delete mUSU;
829 }
830 
MaybeShowStats()831 void LUL::MaybeShowStats() {
832   // This is racey in the sense that it can't guarantee that
833   //   n_new == n_new_Context + n_new_CFI + n_new_Scanned
834   // if it should happen that mStats is updated by some other thread
835   // in between computation of n_new and n_new_{Context,CFI,FP}.
836   // But it's just stats printing, so we don't really care.
837   uint32_t n_new = mStats - mStatsPrevious;
838   if (n_new >= 5000) {
839     uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext;
840     uint32_t n_new_CFI = mStats.mCFI - mStatsPrevious.mCFI;
841     uint32_t n_new_FP = mStats.mFP - mStatsPrevious.mFP;
842     mStatsPrevious = mStats;
843     char buf[200];
844     SprintfLiteral(buf,
845                    "LUL frame stats: TOTAL %5u"
846                    "    CTX %4u    CFI %4u    FP %4u",
847                    n_new, n_new_Context, n_new_CFI, n_new_FP);
848     buf[sizeof(buf) - 1] = 0;
849     mLog(buf);
850   }
851 }
852 
SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const853 size_t LUL::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
854   size_t n = aMallocSizeOf(this);
855   n += mPriMap->SizeOfIncludingThis(aMallocSizeOf);
856 
857   // Measurement of the following members may be added later if DMD finds it
858   // is worthwhile:
859   // - mSegArray
860   // - mUSU
861 
862   return n;
863 }
864 
EnableUnwinding()865 void LUL::EnableUnwinding() {
866   LUL_LOG("LUL::EnableUnwinding");
867   // Don't assert for Admin mode here.  That is, tolerate a call here
868   // if we are already in Unwinding mode.
869   MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
870 
871   mAdminMode = false;
872 }
873 
NotifyAfterMap(uintptr_t aRXavma,size_t aSize,const char * aFileName,const void * aMappedImage)874 void LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName,
875                          const void* aMappedImage) {
876   MOZ_RELEASE_ASSERT(mAdminMode);
877   MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
878 
879   mLog(":\n");
880   char buf[200];
881   SprintfLiteral(buf, "NotifyMap %llx %llu %s\n",
882                  (unsigned long long int)aRXavma, (unsigned long long int)aSize,
883                  aFileName);
884   buf[sizeof(buf) - 1] = 0;
885   mLog(buf);
886 
887   // We can't have a SecMap covering more than 2^32-1 bytes of address space.
888   // See the definition of SecMap for why.  Rather than crash the system, just
889   // limit the SecMap's size accordingly.  This case is never actually
890   // expected to happen.
891   if (((unsigned long long int)aSize) > 0xFFFFFFFFULL) {
892     aSize = (uintptr_t)0xFFFFFFFF;
893   }
894   MOZ_RELEASE_ASSERT(aSize <= 0xFFFFFFFF);
895 
896   // Ignore obviously-stupid notifications.
897   if (aSize > 0) {
898     // Here's a new mapping, for this object.
899     mozilla::UniquePtr<SecMap> smap =
900         mozilla::MakeUnique<SecMap>(aRXavma, (uint32_t)aSize, mLog);
901 
902     // Read CFI or EXIDX unwind data into |smap|.
903     if (!aMappedImage) {
904       (void)lul::ReadSymbolData(string(aFileName), std::vector<string>(),
905                                 smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
906     } else {
907       (void)lul::ReadSymbolDataInternal(
908           (const uint8_t*)aMappedImage, string(aFileName),
909           std::vector<string>(), smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
910     }
911 
912     mLog("NotifyMap .. preparing entries\n");
913 
914     smap->PrepareRuleSets();
915 
916     SprintfLiteral(buf, "NotifyMap got %lld entries\n",
917                    (long long int)smap->Size());
918     buf[sizeof(buf) - 1] = 0;
919     mLog(buf);
920 
921     // Add it to the primary map (the top level set of mapped objects).
922     mPriMap->AddSecMap(std::move(smap));
923 
924     // Tell the segment array about the mapping, so that the stack
925     // scan and __kernel_syscall mechanisms know where valid code is.
926     mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
927   }
928 }
929 
NotifyExecutableArea(uintptr_t aRXavma,size_t aSize)930 void LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize) {
931   MOZ_RELEASE_ASSERT(mAdminMode);
932   MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
933 
934   mLog(":\n");
935   char buf[200];
936   SprintfLiteral(buf, "NotifyExecutableArea %llx %llu\n",
937                  (unsigned long long int)aRXavma,
938                  (unsigned long long int)aSize);
939   buf[sizeof(buf) - 1] = 0;
940   mLog(buf);
941 
942   // Ignore obviously-stupid notifications.
943   if (aSize > 0) {
944     // Tell the segment array about the mapping, so that the stack
945     // scan and __kernel_syscall mechanisms know where valid code is.
946     mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
947   }
948 }
949 
NotifyBeforeUnmap(uintptr_t aRXavmaMin,uintptr_t aRXavmaMax)950 void LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax) {
951   MOZ_RELEASE_ASSERT(mAdminMode);
952   MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
953 
954   mLog(":\n");
955   char buf[100];
956   SprintfLiteral(buf, "NotifyUnmap %016llx-%016llx\n",
957                  (unsigned long long int)aRXavmaMin,
958                  (unsigned long long int)aRXavmaMax);
959   buf[sizeof(buf) - 1] = 0;
960   mLog(buf);
961 
962   MOZ_ASSERT(aRXavmaMin <= aRXavmaMax);
963 
964   // Remove from the primary map, any secondary maps that intersect
965   // with the address range.  Also delete the secondary maps.
966   mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax);
967 
968   // Tell the segment array that the address range no longer
969   // contains valid code.
970   mSegArray->add(aRXavmaMin, aRXavmaMax, false);
971 
972   SprintfLiteral(buf, "NotifyUnmap: now have %d SecMaps\n",
973                  (int)mPriMap->CountSecMaps());
974   buf[sizeof(buf) - 1] = 0;
975   mLog(buf);
976 }
977 
CountMappings()978 size_t LUL::CountMappings() {
979   MOZ_RELEASE_ASSERT(mAdminMode);
980   MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
981 
982   return mPriMap->CountSecMaps();
983 }
984 
985 // RUNS IN NO-MALLOC CONTEXT
DerefTUW(TaggedUWord aAddr,const StackImage * aStackImg)986 static TaggedUWord DerefTUW(TaggedUWord aAddr, const StackImage* aStackImg) {
987   if (!aAddr.Valid()) {
988     return TaggedUWord();
989   }
990 
991   // Lower limit check.  |aAddr.Value()| is the lowest requested address
992   // and |aStackImg->mStartAvma| is the lowest address we actually have,
993   // so the comparison is straightforward.
994   if (aAddr.Value() < aStackImg->mStartAvma) {
995     return TaggedUWord();
996   }
997 
998   // Upper limit check.  We must compute the highest requested address
999   // and the highest address we actually have, but being careful to
1000   // avoid overflow.  In particular if |aAddr| is 0xFFF...FFF or the
1001   // 3/7 values below that, then we will get overflow.  See bug #1245477.
1002   typedef CheckedInt<uintptr_t> CheckedUWord;
1003   CheckedUWord highest_requested_plus_one =
1004       CheckedUWord(aAddr.Value()) + CheckedUWord(sizeof(uintptr_t));
1005   CheckedUWord highest_available_plus_one =
1006       CheckedUWord(aStackImg->mStartAvma) + CheckedUWord(aStackImg->mLen);
1007   if (!highest_requested_plus_one.isValid()     // overflow?
1008       || !highest_available_plus_one.isValid()  // overflow?
1009       || (highest_requested_plus_one.value() >
1010           highest_available_plus_one.value())) {  // in range?
1011     return TaggedUWord();
1012   }
1013 
1014   return TaggedUWord(
1015       *(uintptr_t*)(&aStackImg
1016                          ->mContents[aAddr.Value() - aStackImg->mStartAvma]));
1017 }
1018 
1019 // RUNS IN NO-MALLOC CONTEXT
EvaluateReg(int16_t aReg,const UnwindRegs * aOldRegs,TaggedUWord aCFA)1020 static TaggedUWord EvaluateReg(int16_t aReg, const UnwindRegs* aOldRegs,
1021                                TaggedUWord aCFA) {
1022   switch (aReg) {
1023     case DW_REG_CFA:
1024       return aCFA;
1025 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1026     case DW_REG_INTEL_XBP:
1027       return aOldRegs->xbp;
1028     case DW_REG_INTEL_XSP:
1029       return aOldRegs->xsp;
1030     case DW_REG_INTEL_XIP:
1031       return aOldRegs->xip;
1032 #elif defined(GP_ARCH_arm)
1033     case DW_REG_ARM_R7:
1034       return aOldRegs->r7;
1035     case DW_REG_ARM_R11:
1036       return aOldRegs->r11;
1037     case DW_REG_ARM_R12:
1038       return aOldRegs->r12;
1039     case DW_REG_ARM_R13:
1040       return aOldRegs->r13;
1041     case DW_REG_ARM_R14:
1042       return aOldRegs->r14;
1043     case DW_REG_ARM_R15:
1044       return aOldRegs->r15;
1045 #elif defined(GP_ARCH_arm64)
1046     case DW_REG_AARCH64_X29:
1047       return aOldRegs->x29;
1048     case DW_REG_AARCH64_X30:
1049       return aOldRegs->x30;
1050     case DW_REG_AARCH64_SP:
1051       return aOldRegs->sp;
1052 #elif defined(GP_ARCH_mips64)
1053     case DW_REG_MIPS_SP:
1054       return aOldRegs->sp;
1055     case DW_REG_MIPS_FP:
1056       return aOldRegs->fp;
1057     case DW_REG_MIPS_PC:
1058       return aOldRegs->pc;
1059 #else
1060 #  error "Unsupported arch"
1061 #endif
1062     default:
1063       MOZ_ASSERT(0);
1064       return TaggedUWord();
1065   }
1066 }
1067 
1068 // RUNS IN NO-MALLOC CONTEXT
1069 // See prototype for comment.
EvaluatePfxExpr(int32_t start,const UnwindRegs * aOldRegs,TaggedUWord aCFA,const StackImage * aStackImg,const vector<PfxInstr> & aPfxInstrs)1070 TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs,
1071                             TaggedUWord aCFA, const StackImage* aStackImg,
1072                             const vector<PfxInstr>& aPfxInstrs) {
1073   // A small evaluation stack, and a stack pointer, which points to
1074   // the highest numbered in-use element.
1075   const int N_STACK = 10;
1076   TaggedUWord stack[N_STACK];
1077   int stackPointer = -1;
1078   for (int i = 0; i < N_STACK; i++) stack[i] = TaggedUWord();
1079 
1080 #define PUSH(_tuw)                                             \
1081   do {                                                         \
1082     if (stackPointer >= N_STACK - 1) goto fail; /* overflow */ \
1083     stack[++stackPointer] = (_tuw);                            \
1084   } while (0)
1085 
1086 #define POP(_lval)                                   \
1087   do {                                               \
1088     if (stackPointer < 0) goto fail; /* underflow */ \
1089     _lval = stack[stackPointer--];                   \
1090   } while (0)
1091 
1092   // Cursor in the instruction sequence.
1093   size_t curr = start + 1;
1094 
1095   // Check the start point is sane.
1096   size_t nInstrs = aPfxInstrs.size();
1097   if (start < 0 || (size_t)start >= nInstrs) goto fail;
1098 
1099   {
1100     // The instruction sequence must start with PX_Start.  If not,
1101     // something is seriously wrong.
1102     PfxInstr first = aPfxInstrs[start];
1103     if (first.mOpcode != PX_Start) goto fail;
1104 
1105     // Push the CFA on the stack to start with (or not), as required by
1106     // the original DW_OP_*expression* CFI.
1107     if (first.mOperand != 0) PUSH(aCFA);
1108   }
1109 
1110   while (true) {
1111     if (curr >= nInstrs) goto fail;  // ran off the end of the sequence
1112 
1113     PfxInstr pfxi = aPfxInstrs[curr++];
1114     if (pfxi.mOpcode == PX_End) break;  // we're done
1115 
1116     switch (pfxi.mOpcode) {
1117       case PX_Start:
1118         // This should appear only at the start of the sequence.
1119         goto fail;
1120       case PX_End:
1121         // We just took care of that, so we shouldn't see it again.
1122         MOZ_ASSERT(0);
1123         goto fail;
1124       case PX_SImm32:
1125         PUSH(TaggedUWord((intptr_t)pfxi.mOperand));
1126         break;
1127       case PX_DwReg: {
1128         DW_REG_NUMBER reg = (DW_REG_NUMBER)pfxi.mOperand;
1129         MOZ_ASSERT(reg != DW_REG_CFA);
1130         PUSH(EvaluateReg(reg, aOldRegs, aCFA));
1131         break;
1132       }
1133       case PX_Deref: {
1134         TaggedUWord addr;
1135         POP(addr);
1136         PUSH(DerefTUW(addr, aStackImg));
1137         break;
1138       }
1139       case PX_Add: {
1140         TaggedUWord x, y;
1141         POP(x);
1142         POP(y);
1143         PUSH(y + x);
1144         break;
1145       }
1146       case PX_Sub: {
1147         TaggedUWord x, y;
1148         POP(x);
1149         POP(y);
1150         PUSH(y - x);
1151         break;
1152       }
1153       case PX_And: {
1154         TaggedUWord x, y;
1155         POP(x);
1156         POP(y);
1157         PUSH(y & x);
1158         break;
1159       }
1160       case PX_Or: {
1161         TaggedUWord x, y;
1162         POP(x);
1163         POP(y);
1164         PUSH(y | x);
1165         break;
1166       }
1167       case PX_CmpGES: {
1168         TaggedUWord x, y;
1169         POP(x);
1170         POP(y);
1171         PUSH(y.CmpGEs(x));
1172         break;
1173       }
1174       case PX_Shl: {
1175         TaggedUWord x, y;
1176         POP(x);
1177         POP(y);
1178         PUSH(y << x);
1179         break;
1180       }
1181       default:
1182         MOZ_ASSERT(0);
1183         goto fail;
1184     }
1185   }  // while (true)
1186 
1187   // Evaluation finished.  The top value on the stack is the result.
1188   if (stackPointer >= 0) {
1189     return stack[stackPointer];
1190   }
1191   // Else fall through
1192 
1193 fail:
1194   return TaggedUWord();
1195 
1196 #undef PUSH
1197 #undef POP
1198 }
1199 
1200 // RUNS IN NO-MALLOC CONTEXT
EvaluateExpr(const UnwindRegs * aOldRegs,TaggedUWord aCFA,const StackImage * aStackImg,const vector<PfxInstr> * aPfxInstrs) const1201 TaggedUWord LExpr::EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA,
1202                                 const StackImage* aStackImg,
1203                                 const vector<PfxInstr>* aPfxInstrs) const {
1204   switch (mHow) {
1205     case UNKNOWN:
1206       return TaggedUWord();
1207     case NODEREF: {
1208       TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
1209       tuw = tuw + TaggedUWord((intptr_t)mOffset);
1210       return tuw;
1211     }
1212     case DEREF: {
1213       TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
1214       tuw = tuw + TaggedUWord((intptr_t)mOffset);
1215       return DerefTUW(tuw, aStackImg);
1216     }
1217     case PFXEXPR: {
1218       MOZ_ASSERT(aPfxInstrs);
1219       if (!aPfxInstrs) {
1220         return TaggedUWord();
1221       }
1222       return EvaluatePfxExpr(mOffset, aOldRegs, aCFA, aStackImg, *aPfxInstrs);
1223     }
1224     default:
1225       MOZ_ASSERT(0);
1226       return TaggedUWord();
1227   }
1228 }
1229 
1230 // RUNS IN NO-MALLOC CONTEXT
UseRuleSet(UnwindRegs * aRegs,const StackImage * aStackImg,const RuleSet * aRS,const vector<PfxInstr> * aPfxInstrs)1231 static void UseRuleSet(/*MOD*/ UnwindRegs* aRegs, const StackImage* aStackImg,
1232                        const RuleSet* aRS, const vector<PfxInstr>* aPfxInstrs) {
1233   // Take a copy of regs, since we'll need to refer to the old values
1234   // whilst computing the new ones.
1235   UnwindRegs old_regs = *aRegs;
1236 
1237   // Mark all the current register values as invalid, so that the
1238   // caller can see, on our return, which ones have been computed
1239   // anew.  If we don't even manage to compute a new PC value, then
1240   // the caller will have to abandon the unwind.
1241   // FIXME: Create and use instead: aRegs->SetAllInvalid();
1242 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1243   aRegs->xbp = TaggedUWord();
1244   aRegs->xsp = TaggedUWord();
1245   aRegs->xip = TaggedUWord();
1246 #elif defined(GP_ARCH_arm)
1247   aRegs->r7 = TaggedUWord();
1248   aRegs->r11 = TaggedUWord();
1249   aRegs->r12 = TaggedUWord();
1250   aRegs->r13 = TaggedUWord();
1251   aRegs->r14 = TaggedUWord();
1252   aRegs->r15 = TaggedUWord();
1253 #elif defined(GP_ARCH_arm64)
1254   aRegs->x29 = TaggedUWord();
1255   aRegs->x30 = TaggedUWord();
1256   aRegs->sp = TaggedUWord();
1257   aRegs->pc = TaggedUWord();
1258 #elif defined(GP_ARCH_mips64)
1259   aRegs->sp = TaggedUWord();
1260   aRegs->fp = TaggedUWord();
1261   aRegs->pc = TaggedUWord();
1262 #else
1263 #  error "Unsupported arch"
1264 #endif
1265 
1266   // This is generally useful.
1267   const TaggedUWord inval = TaggedUWord();
1268 
1269   // First, compute the CFA.
1270   TaggedUWord cfa = aRS->mCfaExpr.EvaluateExpr(&old_regs, inval /*old cfa*/,
1271                                                aStackImg, aPfxInstrs);
1272 
1273   // If we didn't manage to compute the CFA, well .. that's ungood,
1274   // but keep going anyway.  It'll be OK provided none of the register
1275   // value rules mention the CFA.  In any case, compute the new values
1276   // for each register that we're tracking.
1277 
1278 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1279   aRegs->xbp =
1280       aRS->mXbpExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1281   aRegs->xsp =
1282       aRS->mXspExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1283   aRegs->xip =
1284       aRS->mXipExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1285 #elif defined(GP_ARCH_arm)
1286   aRegs->r7 = aRS->mR7expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1287   aRegs->r11 =
1288       aRS->mR11expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1289   aRegs->r12 =
1290       aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1291   aRegs->r13 =
1292       aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1293   aRegs->r14 =
1294       aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1295   aRegs->r15 =
1296       aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1297 #elif defined(GP_ARCH_arm64)
1298   aRegs->x29 =
1299       aRS->mX29expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1300   aRegs->x30 =
1301       aRS->mX30expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1302   aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1303 #elif defined(GP_ARCH_mips64)
1304   aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1305   aRegs->fp = aRS->mFPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1306   aRegs->pc = aRS->mPCexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1307 #else
1308 #  error "Unsupported arch"
1309 #endif
1310 
1311   // We're done.  Any regs for which we didn't manage to compute a
1312   // new value will now be marked as invalid.
1313 }
1314 
1315 // RUNS IN NO-MALLOC CONTEXT
Unwind(uintptr_t * aFramePCs,uintptr_t * aFrameSPs,size_t * aFramesUsed,size_t * aFramePointerFramesAcquired,size_t aFramesAvail,UnwindRegs * aStartRegs,StackImage * aStackImg)1316 void LUL::Unwind(/*OUT*/ uintptr_t* aFramePCs,
1317                  /*OUT*/ uintptr_t* aFrameSPs,
1318                  /*OUT*/ size_t* aFramesUsed,
1319                  /*OUT*/ size_t* aFramePointerFramesAcquired,
1320                  size_t aFramesAvail, UnwindRegs* aStartRegs,
1321                  StackImage* aStackImg) {
1322   MOZ_RELEASE_ASSERT(!mAdminMode);
1323 
1324   /////////////////////////////////////////////////////////
1325   // BEGIN UNWIND
1326 
1327   *aFramesUsed = 0;
1328 
1329   UnwindRegs regs = *aStartRegs;
1330   TaggedUWord last_valid_sp = TaggedUWord();
1331 
1332   while (true) {
1333     if (DEBUG_MAIN) {
1334       char buf[300];
1335       mLog("\n");
1336 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1337       SprintfLiteral(
1338           buf, "LoopTop: rip %d/%llx  rsp %d/%llx  rbp %d/%llx\n",
1339           (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(),
1340           (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(),
1341           (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value());
1342       buf[sizeof(buf) - 1] = 0;
1343       mLog(buf);
1344 #elif defined(GP_ARCH_arm)
1345       SprintfLiteral(
1346           buf,
1347           "LoopTop: r15 %d/%llx  r7 %d/%llx  r11 %d/%llx"
1348           "  r12 %d/%llx  r13 %d/%llx  r14 %d/%llx\n",
1349           (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(),
1350           (int)regs.r7.Valid(), (unsigned long long int)regs.r7.Value(),
1351           (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(),
1352           (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(),
1353           (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(),
1354           (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value());
1355       buf[sizeof(buf) - 1] = 0;
1356       mLog(buf);
1357 #elif defined(GP_ARCH_arm64)
1358       SprintfLiteral(
1359           buf,
1360           "LoopTop: pc %d/%llx  x29 %d/%llx  x30 %d/%llx"
1361           "  sp %d/%llx\n",
1362           (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
1363           (int)regs.x29.Valid(), (unsigned long long int)regs.x29.Value(),
1364           (int)regs.x30.Valid(), (unsigned long long int)regs.x30.Value(),
1365           (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value());
1366       buf[sizeof(buf) - 1] = 0;
1367       mLog(buf);
1368 #elif defined(GP_ARCH_mips64)
1369       SprintfLiteral(
1370           buf, "LoopTop: pc %d/%llx  sp %d/%llx  fp %d/%llx\n",
1371           (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
1372           (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value(),
1373           (int)regs.fp.Valid(), (unsigned long long int)regs.fp.Value());
1374       buf[sizeof(buf) - 1] = 0;
1375       mLog(buf);
1376 #else
1377 #  error "Unsupported arch"
1378 #endif
1379     }
1380 
1381 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1382     TaggedUWord ia = regs.xip;
1383     TaggedUWord sp = regs.xsp;
1384 #elif defined(GP_ARCH_arm)
1385     TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14);
1386     TaggedUWord sp = regs.r13;
1387 #elif defined(GP_ARCH_arm64)
1388     TaggedUWord ia = (*aFramesUsed == 0 ? regs.pc : regs.x30);
1389     TaggedUWord sp = regs.sp;
1390 #elif defined(GP_ARCH_mips64)
1391     TaggedUWord ia = regs.pc;
1392     TaggedUWord sp = regs.sp;
1393 #else
1394 #  error "Unsupported arch"
1395 #endif
1396 
1397     if (*aFramesUsed >= aFramesAvail) {
1398       break;
1399     }
1400 
1401     // If we don't have a valid value for the PC, give up.
1402     if (!ia.Valid()) {
1403       break;
1404     }
1405 
1406     // If this is the innermost frame, record the SP value, which
1407     // presumably is valid.  If this isn't the innermost frame, and we
1408     // have a valid SP value, check that its SP value isn't less that
1409     // the one we've seen so far, so as to catch potential SP value
1410     // cycles.
1411     if (*aFramesUsed == 0) {
1412       last_valid_sp = sp;
1413     } else {
1414       MOZ_ASSERT(last_valid_sp.Valid());
1415       if (sp.Valid()) {
1416         if (sp.Value() < last_valid_sp.Value()) {
1417           // Hmm, SP going in the wrong direction.  Let's stop.
1418           break;
1419         }
1420         // Remember where we got to.
1421         last_valid_sp = sp;
1422       }
1423     }
1424 
1425     aFramePCs[*aFramesUsed] = ia.Value();
1426     aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0;
1427     (*aFramesUsed)++;
1428 
1429     // Find the RuleSet for the current IA, if any.  This will also
1430     // query the backing (secondary) maps if it isn't found in the
1431     // thread-local cache.
1432 
1433     // If this isn't the innermost frame, back up into the calling insn.
1434     if (*aFramesUsed > 1) {
1435       ia = ia + TaggedUWord((uintptr_t)(-1));
1436     }
1437 
1438     pair<const RuleSet*, const vector<PfxInstr>*> ruleset_and_pfxinstrs =
1439         mPriMap->Lookup(ia.Value());
1440     const RuleSet* ruleset = ruleset_and_pfxinstrs.first;
1441     const vector<PfxInstr>* pfxinstrs = ruleset_and_pfxinstrs.second;
1442 
1443     if (DEBUG_MAIN) {
1444       char buf[100];
1445       SprintfLiteral(buf, "ruleset for 0x%llx = %p\n",
1446                      (unsigned long long int)ia.Value(), ruleset);
1447       buf[sizeof(buf) - 1] = 0;
1448       mLog(buf);
1449     }
1450 
1451 #if defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
1452     /////////////////////////////////////////////
1453     ////
1454     // On 32 bit x86-linux, syscalls are often done via the VDSO
1455     // function __kernel_vsyscall, which doesn't have a corresponding
1456     // object that we can read debuginfo from.  That effectively kills
1457     // off all stack traces for threads blocked in syscalls.  Hence
1458     // special-case by looking at the code surrounding the program
1459     // counter.
1460     //
1461     // 0xf7757420 <__kernel_vsyscall+0>:	push   %ecx
1462     // 0xf7757421 <__kernel_vsyscall+1>:	push   %edx
1463     // 0xf7757422 <__kernel_vsyscall+2>:	push   %ebp
1464     // 0xf7757423 <__kernel_vsyscall+3>:	mov    %esp,%ebp
1465     // 0xf7757425 <__kernel_vsyscall+5>:	sysenter
1466     // 0xf7757427 <__kernel_vsyscall+7>:	nop
1467     // 0xf7757428 <__kernel_vsyscall+8>:	nop
1468     // 0xf7757429 <__kernel_vsyscall+9>:	nop
1469     // 0xf775742a <__kernel_vsyscall+10>:	nop
1470     // 0xf775742b <__kernel_vsyscall+11>:	nop
1471     // 0xf775742c <__kernel_vsyscall+12>:	nop
1472     // 0xf775742d <__kernel_vsyscall+13>:	nop
1473     // 0xf775742e <__kernel_vsyscall+14>:	int    $0x80
1474     // 0xf7757430 <__kernel_vsyscall+16>:	pop    %ebp
1475     // 0xf7757431 <__kernel_vsyscall+17>:	pop    %edx
1476     // 0xf7757432 <__kernel_vsyscall+18>:	pop    %ecx
1477     // 0xf7757433 <__kernel_vsyscall+19>:	ret
1478     //
1479     // In cases where the sampled thread is blocked in a syscall, its
1480     // program counter will point at "pop %ebp".  Hence we look for
1481     // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and
1482     // the corresponding register-recovery actions are:
1483     //    new_ebp = *(old_esp + 0)
1484     //    new eip = *(old_esp + 12)
1485     //    new_esp = old_esp + 16
1486     //
1487     // It may also be the case that the program counter points two
1488     // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in
1489     // the case where the syscall has been restarted but the thread
1490     // hasn't been rescheduled.  The code below doesn't handle that;
1491     // it could easily be made to.
1492     //
1493     if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) {
1494       uintptr_t insns_min, insns_max;
1495       uintptr_t eip = ia.Value();
1496       bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip);
1497       if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) {
1498         uint8_t* eipC = (uint8_t*)eip;
1499         if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D &&
1500             eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) {
1501           TaggedUWord sp_plus_0 = sp;
1502           TaggedUWord sp_plus_12 = sp;
1503           TaggedUWord sp_plus_16 = sp;
1504           sp_plus_12 = sp_plus_12 + TaggedUWord(12);
1505           sp_plus_16 = sp_plus_16 + TaggedUWord(16);
1506           TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg);
1507           TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg);
1508           TaggedUWord new_esp = sp_plus_16;
1509           if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) {
1510             regs.xbp = new_ebp;
1511             regs.xip = new_eip;
1512             regs.xsp = new_esp;
1513             continue;
1514           }
1515         }
1516       }
1517     }
1518     ////
1519     /////////////////////////////////////////////
1520 #endif  // defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
1521 
1522     // So, do we have a ruleset for this address?  If so, use it now.
1523     if (ruleset) {
1524       if (DEBUG_MAIN) {
1525         ruleset->Print(ia.Value(), 1 /*bogus, but doesn't matter*/, mLog);
1526         mLog("\n");
1527       }
1528       // Use the RuleSet to compute the registers for the previous
1529       // frame.  |regs| is modified in-place.
1530       UseRuleSet(&regs, aStackImg, ruleset, pfxinstrs);
1531       continue;
1532     }
1533 
1534 #if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||     \
1535     defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \
1536     defined(GP_PLAT_amd64_freebsd)
1537     // There's no RuleSet for the specified address.  On amd64/x86_linux, see if
1538     // it's possible to recover the caller's frame by using the frame pointer.
1539 
1540     // We seek to compute (new_IP, new_SP, new_BP) from (old_BP, stack image),
1541     // and assume the following layout:
1542     //
1543     //                 <--- new_SP
1544     //   +----------+
1545     //   |  new_IP  |  (return address)
1546     //   +----------+
1547     //   |  new_BP  |  <--- old_BP
1548     //   +----------+
1549     //   |   ....   |
1550     //   |   ....   |
1551     //   |   ....   |
1552     //   +----------+  <---- old_SP (arbitrary, but must be <= old_BP)
1553 
1554     const size_t wordSzB = sizeof(uintptr_t);
1555     TaggedUWord old_xsp = regs.xsp;
1556 
1557     // points at new_BP ?
1558     TaggedUWord old_xbp = regs.xbp;
1559     // points at new_IP ?
1560     TaggedUWord old_xbp_plus1 = regs.xbp + TaggedUWord(1 * wordSzB);
1561     // is the new_SP ?
1562     TaggedUWord old_xbp_plus2 = regs.xbp + TaggedUWord(2 * wordSzB);
1563 
1564     if (old_xbp.Valid() && old_xbp.IsAligned() && old_xsp.Valid() &&
1565         old_xsp.IsAligned() && old_xsp.Value() <= old_xbp.Value()) {
1566       // We don't need to do any range, alignment or validity checks for
1567       // addresses passed to DerefTUW, since that performs them itself, and
1568       // returns an invalid value on failure.  Any such value will poison
1569       // subsequent uses, and we do a final check for validity before putting
1570       // the computed values into |regs|.
1571       TaggedUWord new_xbp = DerefTUW(old_xbp, aStackImg);
1572       if (new_xbp.Valid() && new_xbp.IsAligned() &&
1573           old_xbp.Value() < new_xbp.Value()) {
1574         TaggedUWord new_xip = DerefTUW(old_xbp_plus1, aStackImg);
1575         TaggedUWord new_xsp = old_xbp_plus2;
1576         if (new_xbp.Valid() && new_xip.Valid() && new_xsp.Valid()) {
1577           regs.xbp = new_xbp;
1578           regs.xip = new_xip;
1579           regs.xsp = new_xsp;
1580           (*aFramePointerFramesAcquired)++;
1581           continue;
1582         }
1583       }
1584     }
1585 #elif defined(GP_ARCH_arm64)
1586     // Here is an example of generated code for prologue and epilogue..
1587     //
1588     // stp     x29, x30, [sp, #-16]!
1589     // mov     x29, sp
1590     // ...
1591     // ldp     x29, x30, [sp], #16
1592     // ret
1593     //
1594     // Next is another example of generated code.
1595     //
1596     // stp     x20, x19, [sp, #-32]!
1597     // stp     x29, x30, [sp, #16]
1598     // add     x29, sp, #0x10
1599     // ...
1600     // ldp     x29, x30, [sp, #16]
1601     // ldp     x20, x19, [sp], #32
1602     // ret
1603     //
1604     // Previous x29 and x30 register are stored in the address of x29 register.
1605     // But since sp register value depends on local variables, we cannot compute
1606     // previous sp register from current sp/fp/lr register and there is no
1607     // regular rule for sp register in prologue. But since return address is lr
1608     // register, if x29 is valid, we will get return address without sp
1609     // register.
1610     //
1611     // So we assume the following layout that if no rule set. x29 is frame
1612     // pointer, so we will be able to compute x29 and x30 .
1613     //
1614     //   +----------+  <--- new_sp (cannot compute)
1615     //   |   ....   |
1616     //   +----------+
1617     //   |  new_lr  |  (return address)
1618     //   +----------+
1619     //   |  new_fp  |  <--- old_fp
1620     //   +----------+
1621     //   |   ....   |
1622     //   |   ....   |
1623     //   +----------+  <---- old_sp (arbitrary, but unused)
1624 
1625     TaggedUWord old_fp = regs.x29;
1626     if (old_fp.Valid() && old_fp.IsAligned() && last_valid_sp.Valid() &&
1627         last_valid_sp.Value() <= old_fp.Value()) {
1628       TaggedUWord new_fp = DerefTUW(old_fp, aStackImg);
1629       if (new_fp.Valid() && new_fp.IsAligned() &&
1630           old_fp.Value() < new_fp.Value()) {
1631         TaggedUWord old_fp_plus1 = old_fp + TaggedUWord(8);
1632         TaggedUWord new_lr = DerefTUW(old_fp_plus1, aStackImg);
1633         if (new_lr.Valid()) {
1634           regs.x29 = new_fp;
1635           regs.x30 = new_lr;
1636           // When using frame pointer to walk stack, we cannot compute sp
1637           // register since we cannot compute sp register from fp/lr/sp
1638           // register, and there is no regular rule to compute previous sp
1639           // register. So mark as invalid.
1640           regs.sp = TaggedUWord();
1641           (*aFramePointerFramesAcquired)++;
1642           continue;
1643         }
1644       }
1645     }
1646 #endif  // defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||
1647         // defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) ||
1648         // defined(GP_PLAT_amd64_freebsd)
1649 
1650     // We failed to recover a frame either using CFI or FP chasing, and we
1651     // have no other ways to recover the frame.  So we have to give up.
1652     break;
1653 
1654   }  // top level unwind loop
1655 
1656   // END UNWIND
1657   /////////////////////////////////////////////////////////
1658 }
1659 
1660 ////////////////////////////////////////////////////////////////
1661 // LUL Unit Testing                                           //
1662 ////////////////////////////////////////////////////////////////
1663 
1664 static const int LUL_UNIT_TEST_STACK_SIZE = 32768;
1665 
1666 #if defined(GP_ARCH_mips64)
__getpc(void)1667 static __attribute__((noinline)) unsigned long __getpc(void) {
1668   unsigned long rtaddr;
1669   __asm__ volatile("move %0, $31" : "=r"(rtaddr));
1670   return rtaddr;
1671 }
1672 #endif
1673 
1674 // This function is innermost in the test call sequence.  It uses LUL
1675 // to unwind, and compares the result with the sequence specified in
1676 // the director string.  These need to agree in order for the test to
1677 // pass.  In order not to screw up the results, this function needs
1678 // to have a not-very big stack frame, since we're only presenting
1679 // the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and
1680 // that chunk unavoidably includes the frame for this function.
1681 //
1682 // This function must not be inlined into its callers.  Doing so will
1683 // cause the expected-vs-actual backtrace consistency checking to
1684 // fail.  Prints summary results to |aLUL|'s logging sink and also
1685 // returns a boolean indicating whether or not the test passed.
GetAndCheckStackTrace(LUL * aLUL,const char * dstring)1686 static __attribute__((noinline)) bool GetAndCheckStackTrace(
1687     LUL* aLUL, const char* dstring) {
1688   // Get hold of the current unwind-start registers.
1689   UnwindRegs startRegs;
1690   memset(&startRegs, 0, sizeof(startRegs));
1691 #if defined(GP_ARCH_amd64)
1692   volatile uintptr_t block[3];
1693   MOZ_ASSERT(sizeof(block) == 24);
1694   __asm__ __volatile__(
1695       "leaq 0(%%rip), %%r15"
1696       "\n\t"
1697       "movq %%r15, 0(%0)"
1698       "\n\t"
1699       "movq %%rsp, 8(%0)"
1700       "\n\t"
1701       "movq %%rbp, 16(%0)"
1702       "\n"
1703       :
1704       : "r"(&block[0])
1705       : "memory", "r15");
1706   startRegs.xip = TaggedUWord(block[0]);
1707   startRegs.xsp = TaggedUWord(block[1]);
1708   startRegs.xbp = TaggedUWord(block[2]);
1709   const uintptr_t REDZONE_SIZE = 128;
1710   uintptr_t start = block[1] - REDZONE_SIZE;
1711 #elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
1712   volatile uintptr_t block[3];
1713   MOZ_ASSERT(sizeof(block) == 12);
1714   __asm__ __volatile__(
1715       ".byte 0xE8,0x00,0x00,0x00,0x00" /*call next insn*/
1716       "\n\t"
1717       "popl %%edi"
1718       "\n\t"
1719       "movl %%edi, 0(%0)"
1720       "\n\t"
1721       "movl %%esp, 4(%0)"
1722       "\n\t"
1723       "movl %%ebp, 8(%0)"
1724       "\n"
1725       :
1726       : "r"(&block[0])
1727       : "memory", "edi");
1728   startRegs.xip = TaggedUWord(block[0]);
1729   startRegs.xsp = TaggedUWord(block[1]);
1730   startRegs.xbp = TaggedUWord(block[2]);
1731   const uintptr_t REDZONE_SIZE = 0;
1732   uintptr_t start = block[1] - REDZONE_SIZE;
1733 #elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
1734   volatile uintptr_t block[6];
1735   MOZ_ASSERT(sizeof(block) == 24);
1736   __asm__ __volatile__(
1737       "mov r0, r15"
1738       "\n\t"
1739       "str r0,  [%0, #0]"
1740       "\n\t"
1741       "str r14, [%0, #4]"
1742       "\n\t"
1743       "str r13, [%0, #8]"
1744       "\n\t"
1745       "str r12, [%0, #12]"
1746       "\n\t"
1747       "str r11, [%0, #16]"
1748       "\n\t"
1749       "str r7,  [%0, #20]"
1750       "\n"
1751       :
1752       : "r"(&block[0])
1753       : "memory", "r0");
1754   startRegs.r15 = TaggedUWord(block[0]);
1755   startRegs.r14 = TaggedUWord(block[1]);
1756   startRegs.r13 = TaggedUWord(block[2]);
1757   startRegs.r12 = TaggedUWord(block[3]);
1758   startRegs.r11 = TaggedUWord(block[4]);
1759   startRegs.r7 = TaggedUWord(block[5]);
1760   const uintptr_t REDZONE_SIZE = 0;
1761   uintptr_t start = block[1] - REDZONE_SIZE;
1762 #elif defined(GP_ARCH_arm64)
1763   volatile uintptr_t block[4];
1764   MOZ_ASSERT(sizeof(block) == 32);
1765   __asm__ __volatile__(
1766       "adr x0, . \n\t"
1767       "str x0, [%0, #0] \n\t"
1768       "str x29, [%0, #8] \n\t"
1769       "str x30, [%0, #16] \n\t"
1770       "mov x0, sp \n\t"
1771       "str x0, [%0, #24] \n\t"
1772       :
1773       : "r"(&block[0])
1774       : "memory", "x0");
1775   startRegs.pc = TaggedUWord(block[0]);
1776   startRegs.x29 = TaggedUWord(block[1]);
1777   startRegs.x30 = TaggedUWord(block[2]);
1778   startRegs.sp = TaggedUWord(block[3]);
1779   const uintptr_t REDZONE_SIZE = 0;
1780   uintptr_t start = block[1] - REDZONE_SIZE;
1781 #elif defined(GP_ARCH_mips64)
1782   volatile uintptr_t block[3];
1783   MOZ_ASSERT(sizeof(block) == 24);
1784   __asm__ __volatile__(
1785       "sd $29, 8(%0)     \n"
1786       "sd $30, 16(%0)    \n"
1787       :
1788       : "r"(block)
1789       : "memory");
1790   block[0] = __getpc();
1791   startRegs.pc = TaggedUWord(block[0]);
1792   startRegs.sp = TaggedUWord(block[1]);
1793   startRegs.fp = TaggedUWord(block[2]);
1794   const uintptr_t REDZONE_SIZE = 0;
1795   uintptr_t start = block[1] - REDZONE_SIZE;
1796 #else
1797 #  error "Unsupported platform"
1798 #endif
1799 
1800   // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the
1801   // stack.
1802   uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE;
1803   uintptr_t ws = sizeof(void*);
1804   start &= ~(ws - 1);
1805   end &= ~(ws - 1);
1806   uintptr_t nToCopy = end - start;
1807   if (nToCopy > lul::N_STACK_BYTES) {
1808     nToCopy = lul::N_STACK_BYTES;
1809   }
1810   MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
1811   StackImage* stackImg = new StackImage();
1812   stackImg->mLen = nToCopy;
1813   stackImg->mStartAvma = start;
1814   if (nToCopy > 0) {
1815     MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy);
1816     memcpy(&stackImg->mContents[0], (void*)start, nToCopy);
1817   }
1818 
1819   // Unwind it.
1820   const int MAX_TEST_FRAMES = 64;
1821   uintptr_t framePCs[MAX_TEST_FRAMES];
1822   uintptr_t frameSPs[MAX_TEST_FRAMES];
1823   size_t framesAvail = mozilla::ArrayLength(framePCs);
1824   size_t framesUsed = 0;
1825   size_t framePointerFramesAcquired = 0;
1826   aLUL->Unwind(&framePCs[0], &frameSPs[0], &framesUsed,
1827                &framePointerFramesAcquired, framesAvail, &startRegs, stackImg);
1828 
1829   delete stackImg;
1830 
1831   // if (0) {
1832   //  // Show what we have.
1833   //  fprintf(stderr, "Got %d frames:\n", (int)framesUsed);
1834   //  for (size_t i = 0; i < framesUsed; i++) {
1835   //    fprintf(stderr, "  [%2d]   SP %p   PC %p\n",
1836   //            (int)i, (void*)frameSPs[i], (void*)framePCs[i]);
1837   //  }
1838   //  fprintf(stderr, "\n");
1839   //}
1840 
1841   // Check to see if there's a consistent binding between digits in
1842   // the director string ('1' .. '8') and the PC values acquired by
1843   // the unwind.  If there isn't, the unwinding has failed somehow.
1844   uintptr_t binding[8];  // binding for '1' .. binding for '8'
1845   memset((void*)binding, 0, sizeof(binding));
1846 
1847   // The general plan is to work backwards along the director string
1848   // and forwards along the framePCs array.  Doing so corresponds to
1849   // working outwards from the innermost frame of the recursive test set.
1850   const char* cursor = dstring;
1851 
1852   // Find the end.  This leaves |cursor| two bytes past the first
1853   // character we want to look at -- see comment below.
1854   while (*cursor) cursor++;
1855 
1856   // Counts the number of consistent frames.
1857   size_t nConsistent = 0;
1858 
1859   // Iterate back to the start of the director string.  The starting
1860   // points are a bit complex.  We can't use framePCs[0] because that
1861   // contains the PC in this frame (above).  We can't use framePCs[1]
1862   // because that will contain the PC at return point in the recursive
1863   // test group (TestFn[1-8]) for their call "out" to this function,
1864   // GetAndCheckStackTrace.  Although LUL will compute a correct
1865   // return address, that will not be the same return address as for a
1866   // recursive call out of the the function to another function in the
1867   // group.  Hence we can only start consistency checking at
1868   // framePCs[2].
1869   //
1870   // To be consistent, then, we must ignore the last element in the
1871   // director string as that corresponds to framePCs[1].  Hence the
1872   // start points are: framePCs[2] and the director string 2 bytes
1873   // before the terminating zero.
1874   //
1875   // Also as a result of this, the number of consistent frames counted
1876   // will always be one less than the length of the director string
1877   // (not including its terminating zero).
1878   size_t frameIx;
1879   for (cursor = cursor - 2, frameIx = 2;
1880        cursor >= dstring && frameIx < framesUsed; cursor--, frameIx++) {
1881     char c = *cursor;
1882     uintptr_t pc = framePCs[frameIx];
1883     // If this doesn't hold, the director string is ill-formed.
1884     MOZ_ASSERT(c >= '1' && c <= '8');
1885     int n = ((int)c) - ((int)'1');
1886     if (binding[n] == 0) {
1887       // There's no binding for |c| yet, so install |pc| and carry on.
1888       binding[n] = pc;
1889       nConsistent++;
1890       continue;
1891     }
1892     // There's a pre-existing binding for |c|.  Check it's consistent.
1893     if (binding[n] != pc) {
1894       // Not consistent.  Give up now.
1895       break;
1896     }
1897     // Consistent.  Keep going.
1898     nConsistent++;
1899   }
1900 
1901   // So, did we succeed?
1902   bool passed = nConsistent + 1 == strlen(dstring);
1903 
1904   // Show the results.
1905   char buf[200];
1906   SprintfLiteral(buf, "LULUnitTest:   dstring = %s\n", dstring);
1907   buf[sizeof(buf) - 1] = 0;
1908   aLUL->mLog(buf);
1909   SprintfLiteral(buf, "LULUnitTest:     %d consistent, %d in dstring: %s\n",
1910                  (int)nConsistent, (int)strlen(dstring),
1911                  passed ? "PASS" : "FAIL");
1912   buf[sizeof(buf) - 1] = 0;
1913   aLUL->mLog(buf);
1914 
1915   return passed;
1916 }
1917 
1918 // Macro magic to create a set of 8 mutually recursive functions with
1919 // varying frame sizes.  These will recurse amongst themselves as
1920 // specified by |strP|, the directory string, and call
1921 // GetAndCheckStackTrace when the string becomes empty, passing it the
1922 // original value of the string.  This checks the result, printing
1923 // results on |aLUL|'s logging sink, and also returns a boolean
1924 // indicating whether or not the results are acceptable (correct).
1925 
1926 #define DECL_TEST_FN(NAME) \
1927   bool NAME(LUL* aLUL, const char* strPorig, const char* strP);
1928 
1929 #define GEN_TEST_FN(NAME, FRAMESIZE)                                          \
1930   bool NAME(LUL* aLUL, const char* strPorig, const char* strP) {              \
1931     /* Create a frame of size (at least) FRAMESIZE, so that the */            \
1932     /* 8 functions created by this macro offer some variation in frame */     \
1933     /* sizes.  This isn't as simple as it might seem, since a clever */       \
1934     /* optimizing compiler (eg, clang-5) detects that the array is unused */  \
1935     /* and removes it.  We try to defeat this by passing it to a function */  \
1936     /* in a different compilation unit, and hoping that clang does not */     \
1937     /* notice that the call is a no-op. */                                    \
1938     char space[FRAMESIZE];                                                    \
1939     Unused << write(1, space, 0); /* write zero bytes of |space| to stdout */ \
1940                                                                               \
1941     if (*strP == '\0') {                                                      \
1942       /* We've come to the end of the director string. */                     \
1943       /* Take a stack snapshot. */                                            \
1944       return GetAndCheckStackTrace(aLUL, strPorig);                           \
1945     } else {                                                                  \
1946       /* Recurse onwards.  This is a bit subtle.  The obvious */              \
1947       /* thing to do here is call onwards directly, from within the */        \
1948       /* arms of the case statement.  That gives a problem in that */         \
1949       /* there will be multiple return points inside each function when */    \
1950       /* unwinding, so it will be difficult to check for consistency */       \
1951       /* against the director string.  Instead, we make an indirect */        \
1952       /* call, so as to guarantee that there is only one call site */         \
1953       /* within each function.  This does assume that the compiler */         \
1954       /* won't transform it back to the simple direct-call form. */           \
1955       /* To discourage it from doing so, the call is bracketed with */        \
1956       /* __asm__ __volatile__ sections so as to make it not-movable. */       \
1957       bool (*nextFn)(LUL*, const char*, const char*) = NULL;                  \
1958       switch (*strP) {                                                        \
1959         case '1':                                                             \
1960           nextFn = TestFn1;                                                   \
1961           break;                                                              \
1962         case '2':                                                             \
1963           nextFn = TestFn2;                                                   \
1964           break;                                                              \
1965         case '3':                                                             \
1966           nextFn = TestFn3;                                                   \
1967           break;                                                              \
1968         case '4':                                                             \
1969           nextFn = TestFn4;                                                   \
1970           break;                                                              \
1971         case '5':                                                             \
1972           nextFn = TestFn5;                                                   \
1973           break;                                                              \
1974         case '6':                                                             \
1975           nextFn = TestFn6;                                                   \
1976           break;                                                              \
1977         case '7':                                                             \
1978           nextFn = TestFn7;                                                   \
1979           break;                                                              \
1980         case '8':                                                             \
1981           nextFn = TestFn8;                                                   \
1982           break;                                                              \
1983         default:                                                              \
1984           nextFn = TestFn8;                                                   \
1985           break;                                                              \
1986       }                                                                       \
1987       /* "use" |space| immediately after the recursive call, */               \
1988       /* so as to dissuade clang from deallocating the space while */         \
1989       /* the call is active, or otherwise messing with the stack frame. */    \
1990       __asm__ __volatile__("" ::: "cc", "memory");                            \
1991       bool passed = nextFn(aLUL, strPorig, strP + 1);                         \
1992       Unused << write(1, space, 0);                                           \
1993       __asm__ __volatile__("" ::: "cc", "memory");                            \
1994       return passed;                                                          \
1995     }                                                                         \
1996   }
1997 
1998 // The test functions are mutually recursive, so it is necessary to
1999 // declare them before defining them.
2000 DECL_TEST_FN(TestFn1)
DECL_TEST_FN(TestFn2)2001 DECL_TEST_FN(TestFn2)
2002 DECL_TEST_FN(TestFn3)
2003 DECL_TEST_FN(TestFn4)
2004 DECL_TEST_FN(TestFn5)
2005 DECL_TEST_FN(TestFn6)
2006 DECL_TEST_FN(TestFn7)
2007 DECL_TEST_FN(TestFn8)
2008 
2009 GEN_TEST_FN(TestFn1, 123)
2010 GEN_TEST_FN(TestFn2, 456)
2011 GEN_TEST_FN(TestFn3, 789)
2012 GEN_TEST_FN(TestFn4, 23)
2013 GEN_TEST_FN(TestFn5, 47)
2014 GEN_TEST_FN(TestFn6, 117)
2015 GEN_TEST_FN(TestFn7, 1)
2016 GEN_TEST_FN(TestFn8, 99)
2017 
2018 // This starts the test sequence going.  Call here to generate a
2019 // sequence of calls as directed by the string |dstring|.  The call
2020 // sequence will, from its innermost frame, finish by calling
2021 // GetAndCheckStackTrace() and passing it |dstring|.
2022 // GetAndCheckStackTrace() will unwind the stack, check consistency
2023 // of those results against |dstring|, and print a pass/fail message
2024 // to aLUL's logging sink.  It also updates the counters in *aNTests
2025 // and aNTestsPassed.
2026 __attribute__((noinline)) void TestUnw(/*OUT*/ int* aNTests,
2027                                        /*OUT*/ int* aNTestsPassed, LUL* aLUL,
2028                                        const char* dstring) {
2029   // Ensure that the stack has at least this much space on it.  This
2030   // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes
2031   // and hand it to LUL.  Safe in the sense that no segfault can
2032   // happen because the stack is at least this big.  This is all
2033   // somewhat dubious in the sense that a sufficiently clever compiler
2034   // (clang, for one) can figure out that space[] is unused and delete
2035   // it from the frame.  Hence the somewhat elaborate hoop jumping to
2036   // fill it up before the call and to at least appear to use the
2037   // value afterwards.
2038   int i;
2039   volatile char space[LUL_UNIT_TEST_STACK_SIZE];
2040   for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
2041     space[i] = (char)(i & 0x7F);
2042   }
2043 
2044   // Really run the test.
2045   bool passed = TestFn1(aLUL, dstring, dstring);
2046 
2047   // Appear to use space[], by visiting the value to compute some kind
2048   // of checksum, and then (apparently) using the checksum.
2049   int sum = 0;
2050   for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
2051     // If this doesn't fool LLVM, I don't know what will.
2052     sum += space[i] - 3 * i;
2053   }
2054   __asm__ __volatile__("" : : "r"(sum));
2055 
2056   // Update the counters.
2057   (*aNTests)++;
2058   if (passed) {
2059     (*aNTestsPassed)++;
2060   }
2061 }
2062 
RunLulUnitTests(int * aNTests,int * aNTestsPassed,LUL * aLUL)2063 void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed,
2064                      LUL* aLUL) {
2065   aLUL->mLog(":\n");
2066   aLUL->mLog("LULUnitTest: BEGIN\n");
2067   *aNTests = *aNTestsPassed = 0;
2068   TestUnw(aNTests, aNTestsPassed, aLUL, "11111111");
2069   TestUnw(aNTests, aNTestsPassed, aLUL, "11222211");
2070   TestUnw(aNTests, aNTestsPassed, aLUL, "111222333");
2071   TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212");
2072   TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258");
2073   TestUnw(aNTests, aNTestsPassed, aLUL,
2074           "123456781122334455667788777777777777777777777");
2075   aLUL->mLog("LULUnitTest: END\n");
2076   aLUL->mLog(":\n");
2077 }
2078 
2079 }  // namespace lul
2080