1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "LulMain.h"
8 
9 #include <string.h>
10 #include <stdlib.h>
11 #include <stdio.h>
12 #include <unistd.h>  // write(), only for testing LUL
13 
14 #include <algorithm>  // std::sort
15 #include <string>
16 #include <utility>
17 
18 #include "mozilla/Assertions.h"
19 #include "mozilla/ArrayUtils.h"
20 #include "mozilla/CheckedInt.h"
21 #include "mozilla/DebugOnly.h"
22 #include "mozilla/MemoryChecking.h"
23 #include "mozilla/Sprintf.h"
24 #include "mozilla/UniquePtr.h"
25 #include "mozilla/Unused.h"
26 
27 #include "BaseProfiler.h"
28 #include "LulCommonExt.h"
29 #include "LulElfExt.h"
30 #include "LulMainInt.h"
31 
32 using mozilla::baseprofiler::profiler_current_process_id;
33 using mozilla::baseprofiler::profiler_current_thread_id;
34 
35 // Set this to 1 for verbose logging
36 #define DEBUG_MAIN 0
37 
38 namespace lul {
39 
40 using mozilla::CheckedInt;
41 using mozilla::DebugOnly;
42 using mozilla::MallocSizeOf;
43 using mozilla::Unused;
44 using std::pair;
45 using std::string;
46 using std::vector;
47 
48 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
49 //
50 // Some functions in this file are marked RUNS IN NO-MALLOC CONTEXT.
51 // Any such function -- and, hence, the transitive closure of those
52 // reachable from it -- must not do any dynamic memory allocation.
53 // Doing so risks deadlock.  There is exactly one root function for
54 // the transitive closure: Lul::Unwind.
55 //
56 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
57 
58 ////////////////////////////////////////////////////////////////
59 // RuleSet                                                    //
60 ////////////////////////////////////////////////////////////////
61 
NameOf_DW_REG(int16_t aReg)62 static const char* NameOf_DW_REG(int16_t aReg) {
63   switch (aReg) {
64     case DW_REG_CFA:
65       return "cfa";
66 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
67     case DW_REG_INTEL_XBP:
68       return "xbp";
69     case DW_REG_INTEL_XSP:
70       return "xsp";
71     case DW_REG_INTEL_XIP:
72       return "xip";
73 #elif defined(GP_ARCH_arm)
74     case DW_REG_ARM_R7:
75       return "r7";
76     case DW_REG_ARM_R11:
77       return "r11";
78     case DW_REG_ARM_R12:
79       return "r12";
80     case DW_REG_ARM_R13:
81       return "r13";
82     case DW_REG_ARM_R14:
83       return "r14";
84     case DW_REG_ARM_R15:
85       return "r15";
86 #elif defined(GP_ARCH_arm64)
87     case DW_REG_AARCH64_X29:
88       return "x29";
89     case DW_REG_AARCH64_X30:
90       return "x30";
91     case DW_REG_AARCH64_SP:
92       return "sp";
93 #elif defined(GP_ARCH_mips64)
94     case DW_REG_MIPS_SP:
95       return "sp";
96     case DW_REG_MIPS_FP:
97       return "fp";
98     case DW_REG_MIPS_PC:
99       return "pc";
100 #else
101 #  error "Unsupported arch"
102 #endif
103     default:
104       return "???";
105   }
106 }
107 
ShowRule(const char * aNewReg) const108 string LExpr::ShowRule(const char* aNewReg) const {
109   char buf[64];
110   string res = string(aNewReg) + "=";
111   switch (mHow) {
112     case UNKNOWN:
113       res += "Unknown";
114       break;
115     case NODEREF:
116       SprintfLiteral(buf, "%s+%d", NameOf_DW_REG(mReg), (int)mOffset);
117       res += buf;
118       break;
119     case DEREF:
120       SprintfLiteral(buf, "*(%s+%d)", NameOf_DW_REG(mReg), (int)mOffset);
121       res += buf;
122       break;
123     case PFXEXPR:
124       SprintfLiteral(buf, "PfxExpr-at-%d", (int)mOffset);
125       res += buf;
126       break;
127     default:
128       res += "???";
129       break;
130   }
131   return res;
132 }
133 
Print(void (* aLog)(const char *)) const134 void RuleSet::Print(void (*aLog)(const char*)) const {
135   char buf[96];
136   SprintfLiteral(buf, "[%llx .. %llx]: let ", (unsigned long long int)mAddr,
137                  (unsigned long long int)(mAddr + mLen - 1));
138   string res = string(buf);
139   res += mCfaExpr.ShowRule("cfa");
140   res += " in";
141   // For each reg we care about, print the recovery expression.
142 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
143   res += mXipExpr.ShowRule(" RA");
144   res += mXspExpr.ShowRule(" SP");
145   res += mXbpExpr.ShowRule(" BP");
146 #elif defined(GP_ARCH_arm)
147   res += mR15expr.ShowRule(" R15");
148   res += mR7expr.ShowRule(" R7");
149   res += mR11expr.ShowRule(" R11");
150   res += mR12expr.ShowRule(" R12");
151   res += mR13expr.ShowRule(" R13");
152   res += mR14expr.ShowRule(" R14");
153 #elif defined(GP_ARCH_arm64)
154   res += mX29expr.ShowRule(" X29");
155   res += mX30expr.ShowRule(" X30");
156   res += mSPexpr.ShowRule(" SP");
157 #elif defined(GP_ARCH_mips64)
158   res += mPCexpr.ShowRule(" PC");
159   res += mSPexpr.ShowRule(" SP");
160   res += mFPexpr.ShowRule(" FP");
161 #else
162 #  error "Unsupported arch"
163 #endif
164   aLog(res.c_str());
165 }
166 
ExprForRegno(DW_REG_NUMBER aRegno)167 LExpr* RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) {
168   switch (aRegno) {
169     case DW_REG_CFA:
170       return &mCfaExpr;
171 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
172     case DW_REG_INTEL_XIP:
173       return &mXipExpr;
174     case DW_REG_INTEL_XSP:
175       return &mXspExpr;
176     case DW_REG_INTEL_XBP:
177       return &mXbpExpr;
178 #elif defined(GP_ARCH_arm)
179     case DW_REG_ARM_R15:
180       return &mR15expr;
181     case DW_REG_ARM_R14:
182       return &mR14expr;
183     case DW_REG_ARM_R13:
184       return &mR13expr;
185     case DW_REG_ARM_R12:
186       return &mR12expr;
187     case DW_REG_ARM_R11:
188       return &mR11expr;
189     case DW_REG_ARM_R7:
190       return &mR7expr;
191 #elif defined(GP_ARCH_arm64)
192     case DW_REG_AARCH64_X29:
193       return &mX29expr;
194     case DW_REG_AARCH64_X30:
195       return &mX30expr;
196     case DW_REG_AARCH64_SP:
197       return &mSPexpr;
198 #elif defined(GP_ARCH_mips64)
199     case DW_REG_MIPS_SP:
200       return &mSPexpr;
201     case DW_REG_MIPS_FP:
202       return &mFPexpr;
203     case DW_REG_MIPS_PC:
204       return &mPCexpr;
205 #else
206 #  error "Unknown arch"
207 #endif
208     default:
209       return nullptr;
210   }
211 }
212 
RuleSet()213 RuleSet::RuleSet() {
214   mAddr = 0;
215   mLen = 0;
216   // The only other fields are of type LExpr and those are initialised
217   // by LExpr::LExpr().
218 }
219 
220 ////////////////////////////////////////////////////////////////
221 // SecMap                                                     //
222 ////////////////////////////////////////////////////////////////
223 
224 // See header file LulMainInt.h for comments about invariants.
225 
SecMap(void (* aLog)(const char *))226 SecMap::SecMap(void (*aLog)(const char*))
227     : mSummaryMinAddr(1), mSummaryMaxAddr(0), mUsable(true), mLog(aLog) {}
228 
~SecMap()229 SecMap::~SecMap() { mRuleSets.clear(); }
230 
231 // RUNS IN NO-MALLOC CONTEXT
FindRuleSet(uintptr_t ia)232 RuleSet* SecMap::FindRuleSet(uintptr_t ia) {
233   // Binary search mRuleSets to find one that brackets |ia|.
234   // lo and hi need to be signed, else the loop termination tests
235   // don't work properly.  Note that this works correctly even when
236   // mRuleSets.size() == 0.
237 
238   // Can't do this until the array has been sorted and preened.
239   MOZ_ASSERT(mUsable);
240 
241   long int lo = 0;
242   long int hi = (long int)mRuleSets.size() - 1;
243   while (true) {
244     // current unsearched space is from lo to hi, inclusive.
245     if (lo > hi) {
246       // not found
247       return nullptr;
248     }
249     long int mid = lo + ((hi - lo) / 2);
250     RuleSet* mid_ruleSet = &mRuleSets[mid];
251     uintptr_t mid_minAddr = mid_ruleSet->mAddr;
252     uintptr_t mid_maxAddr = mid_minAddr + mid_ruleSet->mLen - 1;
253     if (ia < mid_minAddr) {
254       hi = mid - 1;
255       continue;
256     }
257     if (ia > mid_maxAddr) {
258       lo = mid + 1;
259       continue;
260     }
261     MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
262     return mid_ruleSet;
263   }
264   // NOTREACHED
265 }
266 
267 // Add a RuleSet to the collection.  The rule is copied in.  Calling
268 // this makes the map non-searchable.
AddRuleSet(const RuleSet * rs)269 void SecMap::AddRuleSet(const RuleSet* rs) {
270   mUsable = false;
271   mRuleSets.push_back(*rs);
272 }
273 
274 // Add a PfxInstr to the vector of such instrs, and return the index
275 // in the vector.  Calling this makes the map non-searchable.
AddPfxInstr(PfxInstr pfxi)276 uint32_t SecMap::AddPfxInstr(PfxInstr pfxi) {
277   mUsable = false;
278   mPfxInstrs.push_back(pfxi);
279   return mPfxInstrs.size() - 1;
280 }
281 
CmpRuleSetsByAddrLE(const RuleSet & rs1,const RuleSet & rs2)282 static bool CmpRuleSetsByAddrLE(const RuleSet& rs1, const RuleSet& rs2) {
283   return rs1.mAddr < rs2.mAddr;
284 }
285 
286 // Prepare the map for searching.  Completely remove any which don't
287 // fall inside the specified range [start, +len).
PrepareRuleSets(uintptr_t aStart,size_t aLen)288 void SecMap::PrepareRuleSets(uintptr_t aStart, size_t aLen) {
289   if (mRuleSets.empty()) {
290     return;
291   }
292 
293   MOZ_ASSERT(aLen > 0);
294   if (aLen == 0) {
295     // This should never happen.
296     mRuleSets.clear();
297     return;
298   }
299 
300   // Sort by start addresses.
301   std::sort(mRuleSets.begin(), mRuleSets.end(), CmpRuleSetsByAddrLE);
302 
303   // Detect any entry not completely contained within [start, +len).
304   // Set its length to zero, so that the next pass will remove it.
305   for (size_t i = 0; i < mRuleSets.size(); ++i) {
306     RuleSet* rs = &mRuleSets[i];
307     if (rs->mLen > 0 &&
308         (rs->mAddr < aStart || rs->mAddr + rs->mLen > aStart + aLen)) {
309       rs->mLen = 0;
310     }
311   }
312 
313   // Iteratively truncate any overlaps and remove any zero length
314   // entries that might result, or that may have been present
315   // initially.  Unless the input is seriously screwy, this is
316   // expected to iterate only once.
317   while (true) {
318     size_t i;
319     size_t n = mRuleSets.size();
320     size_t nZeroLen = 0;
321 
322     if (n == 0) {
323       break;
324     }
325 
326     for (i = 1; i < n; ++i) {
327       RuleSet* prev = &mRuleSets[i - 1];
328       RuleSet* here = &mRuleSets[i];
329       MOZ_ASSERT(prev->mAddr <= here->mAddr);
330       if (prev->mAddr + prev->mLen > here->mAddr) {
331         prev->mLen = here->mAddr - prev->mAddr;
332       }
333       if (prev->mLen == 0) nZeroLen++;
334     }
335 
336     if (mRuleSets[n - 1].mLen == 0) {
337       nZeroLen++;
338     }
339 
340     // At this point, the entries are in-order and non-overlapping.
341     // If none of them are zero-length, we are done.
342     if (nZeroLen == 0) {
343       break;
344     }
345 
346     // Slide back the entries to remove the zero length ones.
347     size_t j = 0;  // The write-point.
348     for (i = 0; i < n; ++i) {
349       if (mRuleSets[i].mLen == 0) {
350         continue;
351       }
352       if (j != i) mRuleSets[j] = mRuleSets[i];
353       ++j;
354     }
355     MOZ_ASSERT(i == n);
356     MOZ_ASSERT(nZeroLen <= n);
357     MOZ_ASSERT(j == n - nZeroLen);
358     while (nZeroLen > 0) {
359       mRuleSets.pop_back();
360       nZeroLen--;
361     }
362 
363     MOZ_ASSERT(mRuleSets.size() == j);
364   }
365 
366   size_t n = mRuleSets.size();
367 
368 #ifdef DEBUG
369   // Do a final check on the rules: their address ranges must be
370   // ascending, non overlapping, non zero sized.
371   if (n > 0) {
372     MOZ_ASSERT(mRuleSets[0].mLen > 0);
373     for (size_t i = 1; i < n; ++i) {
374       RuleSet* prev = &mRuleSets[i - 1];
375       RuleSet* here = &mRuleSets[i];
376       MOZ_ASSERT(prev->mAddr < here->mAddr);
377       MOZ_ASSERT(here->mLen > 0);
378       MOZ_ASSERT(prev->mAddr + prev->mLen <= here->mAddr);
379     }
380   }
381 #endif
382 
383   // Set the summary min and max address values.
384   if (n == 0) {
385     // Use the values defined in comments in the class declaration.
386     mSummaryMinAddr = 1;
387     mSummaryMaxAddr = 0;
388   } else {
389     mSummaryMinAddr = mRuleSets[0].mAddr;
390     mSummaryMaxAddr = mRuleSets[n - 1].mAddr + mRuleSets[n - 1].mLen - 1;
391   }
392   char buf[150];
393   SprintfLiteral(buf, "PrepareRuleSets: %d entries, smin/smax 0x%llx, 0x%llx\n",
394                  (int)n, (unsigned long long int)mSummaryMinAddr,
395                  (unsigned long long int)mSummaryMaxAddr);
396   buf[sizeof(buf) - 1] = 0;
397   mLog(buf);
398 
399   // Is now usable for binary search.
400   mUsable = true;
401 
402 #if 0
403   mLog("\nRulesets after preening\n");
404   for (size_t i = 0; i < mRuleSets.size(); ++i) {
405     mRuleSets[i].Print(mLog);
406     mLog("\n");
407   }
408   mLog("\n");
409 #endif
410 }
411 
IsEmpty()412 bool SecMap::IsEmpty() { return mRuleSets.empty(); }
413 
SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const414 size_t SecMap::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
415   size_t n = aMallocSizeOf(this);
416 
417   // It's conceivable that these calls would be unsafe with some
418   // implementations of std::vector, but it seems to be working for now...
419   n += aMallocSizeOf(mRuleSets.data());
420   n += aMallocSizeOf(mPfxInstrs.data());
421 
422   return n;
423 }
424 
425 ////////////////////////////////////////////////////////////////
426 // SegArray                                                   //
427 ////////////////////////////////////////////////////////////////
428 
429 // A SegArray holds a set of address ranges that together exactly
430 // cover an address range, with no overlaps or holes.  Each range has
431 // an associated value, which in this case has been specialised to be
432 // a simple boolean.  The representation is kept to minimal canonical
433 // form in which adjacent ranges with the same associated value are
434 // merged together.  Each range is represented by a |struct Seg|.
435 //
436 // SegArrays are used to keep track of which parts of the address
437 // space are known to contain instructions.
438 class SegArray {
439  public:
add(uintptr_t lo,uintptr_t hi,bool val)440   void add(uintptr_t lo, uintptr_t hi, bool val) {
441     if (lo > hi) {
442       return;
443     }
444     split_at(lo);
445     if (hi < UINTPTR_MAX) {
446       split_at(hi + 1);
447     }
448     std::vector<Seg>::size_type iLo, iHi, i;
449     iLo = find(lo);
450     iHi = find(hi);
451     for (i = iLo; i <= iHi; ++i) {
452       mSegs[i].val = val;
453     }
454     preen();
455   }
456 
457   // RUNS IN NO-MALLOC CONTEXT
getBoundingCodeSegment(uintptr_t * rx_min,uintptr_t * rx_max,uintptr_t addr)458   bool getBoundingCodeSegment(/*OUT*/ uintptr_t* rx_min,
459                               /*OUT*/ uintptr_t* rx_max, uintptr_t addr) {
460     std::vector<Seg>::size_type i = find(addr);
461     if (!mSegs[i].val) {
462       return false;
463     }
464     *rx_min = mSegs[i].lo;
465     *rx_max = mSegs[i].hi;
466     return true;
467   }
468 
SegArray()469   SegArray() {
470     Seg s(0, UINTPTR_MAX, false);
471     mSegs.push_back(s);
472   }
473 
474  private:
475   struct Seg {
Seglul::SegArray::Seg476     Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {}
477     uintptr_t lo;
478     uintptr_t hi;
479     bool val;
480   };
481 
preen()482   void preen() {
483     for (std::vector<Seg>::iterator iter = mSegs.begin();
484          iter < mSegs.end() - 1; ++iter) {
485       if (iter[0].val != iter[1].val) {
486         continue;
487       }
488       iter[0].hi = iter[1].hi;
489       mSegs.erase(iter + 1);
490       // Back up one, so as not to miss an opportunity to merge
491       // with the entry after this one.
492       --iter;
493     }
494   }
495 
496   // RUNS IN NO-MALLOC CONTEXT
find(uintptr_t a)497   std::vector<Seg>::size_type find(uintptr_t a) {
498     long int lo = 0;
499     long int hi = (long int)mSegs.size();
500     while (true) {
501       // The unsearched space is lo .. hi inclusive.
502       if (lo > hi) {
503         // Not found.  This can't happen.
504         return (std::vector<Seg>::size_type)(-1);
505       }
506       long int mid = lo + ((hi - lo) / 2);
507       uintptr_t mid_lo = mSegs[mid].lo;
508       uintptr_t mid_hi = mSegs[mid].hi;
509       if (a < mid_lo) {
510         hi = mid - 1;
511         continue;
512       }
513       if (a > mid_hi) {
514         lo = mid + 1;
515         continue;
516       }
517       return (std::vector<Seg>::size_type)mid;
518     }
519   }
520 
split_at(uintptr_t a)521   void split_at(uintptr_t a) {
522     std::vector<Seg>::size_type i = find(a);
523     if (mSegs[i].lo == a) {
524       return;
525     }
526     mSegs.insert(mSegs.begin() + i + 1, mSegs[i]);
527     mSegs[i].hi = a - 1;
528     mSegs[i + 1].lo = a;
529   }
530 
show()531   void show() {
532     printf("<< %d entries:\n", (int)mSegs.size());
533     for (std::vector<Seg>::iterator iter = mSegs.begin(); iter < mSegs.end();
534          ++iter) {
535       printf("  %016llx  %016llx  %s\n", (unsigned long long int)(*iter).lo,
536              (unsigned long long int)(*iter).hi,
537              (*iter).val ? "true" : "false");
538     }
539     printf(">>\n");
540   }
541 
542   std::vector<Seg> mSegs;
543 };
544 
545 ////////////////////////////////////////////////////////////////
546 // PriMap                                                     //
547 ////////////////////////////////////////////////////////////////
548 
549 class PriMap {
550  public:
PriMap(void (* aLog)(const char *))551   explicit PriMap(void (*aLog)(const char*)) : mLog(aLog) {}
552 
553   // RUNS IN NO-MALLOC CONTEXT
Lookup(uintptr_t ia)554   pair<const RuleSet*, const vector<PfxInstr>*> Lookup(uintptr_t ia) {
555     SecMap* sm = FindSecMap(ia);
556     return pair<const RuleSet*, const vector<PfxInstr>*>(
557         sm ? sm->FindRuleSet(ia) : nullptr, sm ? sm->GetPfxInstrs() : nullptr);
558   }
559 
560   // Add a secondary map.  No overlaps allowed w.r.t. existing
561   // secondary maps.
AddSecMap(mozilla::UniquePtr<SecMap> && aSecMap)562   void AddSecMap(mozilla::UniquePtr<SecMap>&& aSecMap) {
563     // We can't add an empty SecMap to the PriMap.  But that's OK
564     // since we'd never be able to find anything in it anyway.
565     if (aSecMap->IsEmpty()) {
566       return;
567     }
568 
569     // Iterate through the SecMaps and find the right place for this
570     // one.  At the same time, ensure that the in-order
571     // non-overlapping invariant is preserved (and, generally, holds).
572     // FIXME: this gives a cost that is O(N^2) in the total number of
573     // shared objects in the system.  ToDo: better.
574     MOZ_ASSERT(aSecMap->mSummaryMinAddr <= aSecMap->mSummaryMaxAddr);
575 
576     size_t num_secMaps = mSecMaps.size();
577     uintptr_t i;
578     for (i = 0; i < num_secMaps; ++i) {
579       mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
580       MOZ_ASSERT(sm_i->mSummaryMinAddr <= sm_i->mSummaryMaxAddr);
581       if (aSecMap->mSummaryMinAddr < sm_i->mSummaryMaxAddr) {
582         // |aSecMap| needs to be inserted immediately before mSecMaps[i].
583         break;
584       }
585     }
586     MOZ_ASSERT(i <= num_secMaps);
587     if (i == num_secMaps) {
588       // It goes at the end.
589       mSecMaps.push_back(std::move(aSecMap));
590     } else {
591       std::vector<mozilla::UniquePtr<SecMap>>::iterator iter =
592           mSecMaps.begin() + i;
593       mSecMaps.insert(iter, std::move(aSecMap));
594     }
595     char buf[100];
596     SprintfLiteral(buf, "AddSecMap: now have %d SecMaps\n",
597                    (int)mSecMaps.size());
598     buf[sizeof(buf) - 1] = 0;
599     mLog(buf);
600   }
601 
602   // Remove and delete any SecMaps in the mapping, that intersect
603   // with the specified address range.
RemoveSecMapsInRange(uintptr_t avma_min,uintptr_t avma_max)604   void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) {
605     MOZ_ASSERT(avma_min <= avma_max);
606     size_t num_secMaps = mSecMaps.size();
607     if (num_secMaps > 0) {
608       intptr_t i;
609       // Iterate from end to start over the vector, so as to ensure
610       // that the special case where |avma_min| and |avma_max| denote
611       // the entire address space, can be completed in time proportional
612       // to the number of elements in the map.
613       for (i = (intptr_t)num_secMaps - 1; i >= 0; i--) {
614         mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
615         if (sm_i->mSummaryMaxAddr < avma_min ||
616             avma_max < sm_i->mSummaryMinAddr) {
617           // There's no overlap.  Move on.
618           continue;
619         }
620         // We need to remove mSecMaps[i] and slide all those above it
621         // downwards to cover the hole.
622         mSecMaps.erase(mSecMaps.begin() + i);
623       }
624     }
625   }
626 
627   // Return the number of currently contained SecMaps.
CountSecMaps()628   size_t CountSecMaps() { return mSecMaps.size(); }
629 
SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const630   size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
631     size_t n = aMallocSizeOf(this);
632 
633     // It's conceivable that this call would be unsafe with some
634     // implementations of std::vector, but it seems to be working for now...
635     n += aMallocSizeOf(mSecMaps.data());
636 
637     for (size_t i = 0; i < mSecMaps.size(); i++) {
638       n += mSecMaps[i]->SizeOfIncludingThis(aMallocSizeOf);
639     }
640 
641     return n;
642   }
643 
644  private:
645   // RUNS IN NO-MALLOC CONTEXT
FindSecMap(uintptr_t ia)646   SecMap* FindSecMap(uintptr_t ia) {
647     // Binary search mSecMaps to find one that brackets |ia|.
648     // lo and hi need to be signed, else the loop termination tests
649     // don't work properly.
650     long int lo = 0;
651     long int hi = (long int)mSecMaps.size() - 1;
652     while (true) {
653       // current unsearched space is from lo to hi, inclusive.
654       if (lo > hi) {
655         // not found
656         return nullptr;
657       }
658       long int mid = lo + ((hi - lo) / 2);
659       mozilla::UniquePtr<SecMap>& mid_secMap = mSecMaps[mid];
660       uintptr_t mid_minAddr = mid_secMap->mSummaryMinAddr;
661       uintptr_t mid_maxAddr = mid_secMap->mSummaryMaxAddr;
662       if (ia < mid_minAddr) {
663         hi = mid - 1;
664         continue;
665       }
666       if (ia > mid_maxAddr) {
667         lo = mid + 1;
668         continue;
669       }
670       MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
671       return mid_secMap.get();
672     }
673     // NOTREACHED
674   }
675 
676  private:
677   // sorted array of per-object ranges, non overlapping, non empty
678   std::vector<mozilla::UniquePtr<SecMap>> mSecMaps;
679 
680   // a logging sink, for debugging.
681   void (*mLog)(const char*);
682 };
683 
684 ////////////////////////////////////////////////////////////////
685 // LUL                                                        //
686 ////////////////////////////////////////////////////////////////
687 
688 #define LUL_LOG(_str)                                           \
689   do {                                                          \
690     char buf[200];                                              \
691     SprintfLiteral(buf, "LUL: pid %d tid %d lul-obj %p: %s",    \
692                    profiler_current_process_id(),               \
693                    profiler_current_thread_id(), this, (_str)); \
694     buf[sizeof(buf) - 1] = 0;                                   \
695     mLog(buf);                                                  \
696   } while (0)
697 
LUL(void (* aLog)(const char *))698 LUL::LUL(void (*aLog)(const char*))
699     : mLog(aLog),
700       mAdminMode(true),
701       mAdminThreadId(profiler_current_thread_id()),
702       mPriMap(new PriMap(aLog)),
703       mSegArray(new SegArray()),
704       mUSU(new UniqueStringUniverse()) {
705   LUL_LOG("LUL::LUL: Created object");
706 }
707 
~LUL()708 LUL::~LUL() {
709   LUL_LOG("LUL::~LUL: Destroyed object");
710   delete mPriMap;
711   delete mSegArray;
712   mLog = nullptr;
713   delete mUSU;
714 }
715 
MaybeShowStats()716 void LUL::MaybeShowStats() {
717   // This is racey in the sense that it can't guarantee that
718   //   n_new == n_new_Context + n_new_CFI + n_new_Scanned
719   // if it should happen that mStats is updated by some other thread
720   // in between computation of n_new and n_new_{Context,CFI,FP}.
721   // But it's just stats printing, so we don't really care.
722   uint32_t n_new = mStats - mStatsPrevious;
723   if (n_new >= 5000) {
724     uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext;
725     uint32_t n_new_CFI = mStats.mCFI - mStatsPrevious.mCFI;
726     uint32_t n_new_FP = mStats.mFP - mStatsPrevious.mFP;
727     mStatsPrevious = mStats;
728     char buf[200];
729     SprintfLiteral(buf,
730                    "LUL frame stats: TOTAL %5u"
731                    "    CTX %4u    CFI %4u    FP %4u",
732                    n_new, n_new_Context, n_new_CFI, n_new_FP);
733     buf[sizeof(buf) - 1] = 0;
734     mLog(buf);
735   }
736 }
737 
SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const738 size_t LUL::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
739   size_t n = aMallocSizeOf(this);
740   n += mPriMap->SizeOfIncludingThis(aMallocSizeOf);
741 
742   // Measurement of the following members may be added later if DMD finds it
743   // is worthwhile:
744   // - mSegArray
745   // - mUSU
746 
747   return n;
748 }
749 
EnableUnwinding()750 void LUL::EnableUnwinding() {
751   LUL_LOG("LUL::EnableUnwinding");
752   // Don't assert for Admin mode here.  That is, tolerate a call here
753   // if we are already in Unwinding mode.
754   MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
755 
756   mAdminMode = false;
757 }
758 
NotifyAfterMap(uintptr_t aRXavma,size_t aSize,const char * aFileName,const void * aMappedImage)759 void LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName,
760                          const void* aMappedImage) {
761   MOZ_RELEASE_ASSERT(mAdminMode);
762   MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
763 
764   mLog(":\n");
765   char buf[200];
766   SprintfLiteral(buf, "NotifyMap %llx %llu %s\n",
767                  (unsigned long long int)aRXavma, (unsigned long long int)aSize,
768                  aFileName);
769   buf[sizeof(buf) - 1] = 0;
770   mLog(buf);
771 
772   // Ignore obviously-stupid notifications.
773   if (aSize > 0) {
774     // Here's a new mapping, for this object.
775     mozilla::UniquePtr<SecMap> smap = mozilla::MakeUnique<SecMap>(mLog);
776 
777     // Read CFI or EXIDX unwind data into |smap|.
778     if (!aMappedImage) {
779       (void)lul::ReadSymbolData(string(aFileName), std::vector<string>(),
780                                 smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
781     } else {
782       (void)lul::ReadSymbolDataInternal(
783           (const uint8_t*)aMappedImage, string(aFileName),
784           std::vector<string>(), smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
785     }
786 
787     mLog("NotifyMap .. preparing entries\n");
788 
789     smap->PrepareRuleSets(aRXavma, aSize);
790 
791     SprintfLiteral(buf, "NotifyMap got %lld entries\n",
792                    (long long int)smap->Size());
793     buf[sizeof(buf) - 1] = 0;
794     mLog(buf);
795 
796     // Add it to the primary map (the top level set of mapped objects).
797     mPriMap->AddSecMap(std::move(smap));
798 
799     // Tell the segment array about the mapping, so that the stack
800     // scan and __kernel_syscall mechanisms know where valid code is.
801     mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
802   }
803 }
804 
NotifyExecutableArea(uintptr_t aRXavma,size_t aSize)805 void LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize) {
806   MOZ_RELEASE_ASSERT(mAdminMode);
807   MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
808 
809   mLog(":\n");
810   char buf[200];
811   SprintfLiteral(buf, "NotifyExecutableArea %llx %llu\n",
812                  (unsigned long long int)aRXavma,
813                  (unsigned long long int)aSize);
814   buf[sizeof(buf) - 1] = 0;
815   mLog(buf);
816 
817   // Ignore obviously-stupid notifications.
818   if (aSize > 0) {
819     // Tell the segment array about the mapping, so that the stack
820     // scan and __kernel_syscall mechanisms know where valid code is.
821     mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
822   }
823 }
824 
NotifyBeforeUnmap(uintptr_t aRXavmaMin,uintptr_t aRXavmaMax)825 void LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax) {
826   MOZ_RELEASE_ASSERT(mAdminMode);
827   MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
828 
829   mLog(":\n");
830   char buf[100];
831   SprintfLiteral(buf, "NotifyUnmap %016llx-%016llx\n",
832                  (unsigned long long int)aRXavmaMin,
833                  (unsigned long long int)aRXavmaMax);
834   buf[sizeof(buf) - 1] = 0;
835   mLog(buf);
836 
837   MOZ_ASSERT(aRXavmaMin <= aRXavmaMax);
838 
839   // Remove from the primary map, any secondary maps that intersect
840   // with the address range.  Also delete the secondary maps.
841   mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax);
842 
843   // Tell the segment array that the address range no longer
844   // contains valid code.
845   mSegArray->add(aRXavmaMin, aRXavmaMax, false);
846 
847   SprintfLiteral(buf, "NotifyUnmap: now have %d SecMaps\n",
848                  (int)mPriMap->CountSecMaps());
849   buf[sizeof(buf) - 1] = 0;
850   mLog(buf);
851 }
852 
CountMappings()853 size_t LUL::CountMappings() {
854   MOZ_RELEASE_ASSERT(mAdminMode);
855   MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
856 
857   return mPriMap->CountSecMaps();
858 }
859 
860 // RUNS IN NO-MALLOC CONTEXT
DerefTUW(TaggedUWord aAddr,const StackImage * aStackImg)861 static TaggedUWord DerefTUW(TaggedUWord aAddr, const StackImage* aStackImg) {
862   if (!aAddr.Valid()) {
863     return TaggedUWord();
864   }
865 
866   // Lower limit check.  |aAddr.Value()| is the lowest requested address
867   // and |aStackImg->mStartAvma| is the lowest address we actually have,
868   // so the comparison is straightforward.
869   if (aAddr.Value() < aStackImg->mStartAvma) {
870     return TaggedUWord();
871   }
872 
873   // Upper limit check.  We must compute the highest requested address
874   // and the highest address we actually have, but being careful to
875   // avoid overflow.  In particular if |aAddr| is 0xFFF...FFF or the
876   // 3/7 values below that, then we will get overflow.  See bug #1245477.
877   typedef CheckedInt<uintptr_t> CheckedUWord;
878   CheckedUWord highest_requested_plus_one =
879       CheckedUWord(aAddr.Value()) + CheckedUWord(sizeof(uintptr_t));
880   CheckedUWord highest_available_plus_one =
881       CheckedUWord(aStackImg->mStartAvma) + CheckedUWord(aStackImg->mLen);
882   if (!highest_requested_plus_one.isValid()     // overflow?
883       || !highest_available_plus_one.isValid()  // overflow?
884       || (highest_requested_plus_one.value() >
885           highest_available_plus_one.value())) {  // in range?
886     return TaggedUWord();
887   }
888 
889   return TaggedUWord(
890       *(uintptr_t*)(&aStackImg
891                          ->mContents[aAddr.Value() - aStackImg->mStartAvma]));
892 }
893 
894 // RUNS IN NO-MALLOC CONTEXT
EvaluateReg(int16_t aReg,const UnwindRegs * aOldRegs,TaggedUWord aCFA)895 static TaggedUWord EvaluateReg(int16_t aReg, const UnwindRegs* aOldRegs,
896                                TaggedUWord aCFA) {
897   switch (aReg) {
898     case DW_REG_CFA:
899       return aCFA;
900 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
901     case DW_REG_INTEL_XBP:
902       return aOldRegs->xbp;
903     case DW_REG_INTEL_XSP:
904       return aOldRegs->xsp;
905     case DW_REG_INTEL_XIP:
906       return aOldRegs->xip;
907 #elif defined(GP_ARCH_arm)
908     case DW_REG_ARM_R7:
909       return aOldRegs->r7;
910     case DW_REG_ARM_R11:
911       return aOldRegs->r11;
912     case DW_REG_ARM_R12:
913       return aOldRegs->r12;
914     case DW_REG_ARM_R13:
915       return aOldRegs->r13;
916     case DW_REG_ARM_R14:
917       return aOldRegs->r14;
918     case DW_REG_ARM_R15:
919       return aOldRegs->r15;
920 #elif defined(GP_ARCH_arm64)
921     case DW_REG_AARCH64_X29:
922       return aOldRegs->x29;
923     case DW_REG_AARCH64_X30:
924       return aOldRegs->x30;
925     case DW_REG_AARCH64_SP:
926       return aOldRegs->sp;
927 #elif defined(GP_ARCH_mips64)
928     case DW_REG_MIPS_SP:
929       return aOldRegs->sp;
930     case DW_REG_MIPS_FP:
931       return aOldRegs->fp;
932     case DW_REG_MIPS_PC:
933       return aOldRegs->pc;
934 #else
935 #  error "Unsupported arch"
936 #endif
937     default:
938       MOZ_ASSERT(0);
939       return TaggedUWord();
940   }
941 }
942 
943 // RUNS IN NO-MALLOC CONTEXT
944 // See prototype for comment.
EvaluatePfxExpr(int32_t start,const UnwindRegs * aOldRegs,TaggedUWord aCFA,const StackImage * aStackImg,const vector<PfxInstr> & aPfxInstrs)945 TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs,
946                             TaggedUWord aCFA, const StackImage* aStackImg,
947                             const vector<PfxInstr>& aPfxInstrs) {
948   // A small evaluation stack, and a stack pointer, which points to
949   // the highest numbered in-use element.
950   const int N_STACK = 10;
951   TaggedUWord stack[N_STACK];
952   int stackPointer = -1;
953   for (int i = 0; i < N_STACK; i++) stack[i] = TaggedUWord();
954 
955 #define PUSH(_tuw)                                             \
956   do {                                                         \
957     if (stackPointer >= N_STACK - 1) goto fail; /* overflow */ \
958     stack[++stackPointer] = (_tuw);                            \
959   } while (0)
960 
961 #define POP(_lval)                                   \
962   do {                                               \
963     if (stackPointer < 0) goto fail; /* underflow */ \
964     _lval = stack[stackPointer--];                   \
965   } while (0)
966 
967   // Cursor in the instruction sequence.
968   size_t curr = start + 1;
969 
970   // Check the start point is sane.
971   size_t nInstrs = aPfxInstrs.size();
972   if (start < 0 || (size_t)start >= nInstrs) goto fail;
973 
974   {
975     // The instruction sequence must start with PX_Start.  If not,
976     // something is seriously wrong.
977     PfxInstr first = aPfxInstrs[start];
978     if (first.mOpcode != PX_Start) goto fail;
979 
980     // Push the CFA on the stack to start with (or not), as required by
981     // the original DW_OP_*expression* CFI.
982     if (first.mOperand != 0) PUSH(aCFA);
983   }
984 
985   while (true) {
986     if (curr >= nInstrs) goto fail;  // ran off the end of the sequence
987 
988     PfxInstr pfxi = aPfxInstrs[curr++];
989     if (pfxi.mOpcode == PX_End) break;  // we're done
990 
991     switch (pfxi.mOpcode) {
992       case PX_Start:
993         // This should appear only at the start of the sequence.
994         goto fail;
995       case PX_End:
996         // We just took care of that, so we shouldn't see it again.
997         MOZ_ASSERT(0);
998         goto fail;
999       case PX_SImm32:
1000         PUSH(TaggedUWord((intptr_t)pfxi.mOperand));
1001         break;
1002       case PX_DwReg: {
1003         DW_REG_NUMBER reg = (DW_REG_NUMBER)pfxi.mOperand;
1004         MOZ_ASSERT(reg != DW_REG_CFA);
1005         PUSH(EvaluateReg(reg, aOldRegs, aCFA));
1006         break;
1007       }
1008       case PX_Deref: {
1009         TaggedUWord addr;
1010         POP(addr);
1011         PUSH(DerefTUW(addr, aStackImg));
1012         break;
1013       }
1014       case PX_Add: {
1015         TaggedUWord x, y;
1016         POP(x);
1017         POP(y);
1018         PUSH(y + x);
1019         break;
1020       }
1021       case PX_Sub: {
1022         TaggedUWord x, y;
1023         POP(x);
1024         POP(y);
1025         PUSH(y - x);
1026         break;
1027       }
1028       case PX_And: {
1029         TaggedUWord x, y;
1030         POP(x);
1031         POP(y);
1032         PUSH(y & x);
1033         break;
1034       }
1035       case PX_Or: {
1036         TaggedUWord x, y;
1037         POP(x);
1038         POP(y);
1039         PUSH(y | x);
1040         break;
1041       }
1042       case PX_CmpGES: {
1043         TaggedUWord x, y;
1044         POP(x);
1045         POP(y);
1046         PUSH(y.CmpGEs(x));
1047         break;
1048       }
1049       case PX_Shl: {
1050         TaggedUWord x, y;
1051         POP(x);
1052         POP(y);
1053         PUSH(y << x);
1054         break;
1055       }
1056       default:
1057         MOZ_ASSERT(0);
1058         goto fail;
1059     }
1060   }  // while (true)
1061 
1062   // Evaluation finished.  The top value on the stack is the result.
1063   if (stackPointer >= 0) {
1064     return stack[stackPointer];
1065   }
1066   // Else fall through
1067 
1068 fail:
1069   return TaggedUWord();
1070 
1071 #undef PUSH
1072 #undef POP
1073 }
1074 
1075 // RUNS IN NO-MALLOC CONTEXT
EvaluateExpr(const UnwindRegs * aOldRegs,TaggedUWord aCFA,const StackImage * aStackImg,const vector<PfxInstr> * aPfxInstrs) const1076 TaggedUWord LExpr::EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA,
1077                                 const StackImage* aStackImg,
1078                                 const vector<PfxInstr>* aPfxInstrs) const {
1079   switch (mHow) {
1080     case UNKNOWN:
1081       return TaggedUWord();
1082     case NODEREF: {
1083       TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
1084       tuw = tuw + TaggedUWord((intptr_t)mOffset);
1085       return tuw;
1086     }
1087     case DEREF: {
1088       TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
1089       tuw = tuw + TaggedUWord((intptr_t)mOffset);
1090       return DerefTUW(tuw, aStackImg);
1091     }
1092     case PFXEXPR: {
1093       MOZ_ASSERT(aPfxInstrs);
1094       if (!aPfxInstrs) {
1095         return TaggedUWord();
1096       }
1097       return EvaluatePfxExpr(mOffset, aOldRegs, aCFA, aStackImg, *aPfxInstrs);
1098     }
1099     default:
1100       MOZ_ASSERT(0);
1101       return TaggedUWord();
1102   }
1103 }
1104 
1105 // RUNS IN NO-MALLOC CONTEXT
UseRuleSet(UnwindRegs * aRegs,const StackImage * aStackImg,const RuleSet * aRS,const vector<PfxInstr> * aPfxInstrs)1106 static void UseRuleSet(/*MOD*/ UnwindRegs* aRegs, const StackImage* aStackImg,
1107                        const RuleSet* aRS, const vector<PfxInstr>* aPfxInstrs) {
1108   // Take a copy of regs, since we'll need to refer to the old values
1109   // whilst computing the new ones.
1110   UnwindRegs old_regs = *aRegs;
1111 
1112   // Mark all the current register values as invalid, so that the
1113   // caller can see, on our return, which ones have been computed
1114   // anew.  If we don't even manage to compute a new PC value, then
1115   // the caller will have to abandon the unwind.
1116   // FIXME: Create and use instead: aRegs->SetAllInvalid();
1117 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1118   aRegs->xbp = TaggedUWord();
1119   aRegs->xsp = TaggedUWord();
1120   aRegs->xip = TaggedUWord();
1121 #elif defined(GP_ARCH_arm)
1122   aRegs->r7 = TaggedUWord();
1123   aRegs->r11 = TaggedUWord();
1124   aRegs->r12 = TaggedUWord();
1125   aRegs->r13 = TaggedUWord();
1126   aRegs->r14 = TaggedUWord();
1127   aRegs->r15 = TaggedUWord();
1128 #elif defined(GP_ARCH_arm64)
1129   aRegs->x29 = TaggedUWord();
1130   aRegs->x30 = TaggedUWord();
1131   aRegs->sp = TaggedUWord();
1132   aRegs->pc = TaggedUWord();
1133 #elif defined(GP_ARCH_mips64)
1134   aRegs->sp = TaggedUWord();
1135   aRegs->fp = TaggedUWord();
1136   aRegs->pc = TaggedUWord();
1137 #else
1138 #  error "Unsupported arch"
1139 #endif
1140 
1141   // This is generally useful.
1142   const TaggedUWord inval = TaggedUWord();
1143 
1144   // First, compute the CFA.
1145   TaggedUWord cfa = aRS->mCfaExpr.EvaluateExpr(&old_regs, inval /*old cfa*/,
1146                                                aStackImg, aPfxInstrs);
1147 
1148   // If we didn't manage to compute the CFA, well .. that's ungood,
1149   // but keep going anyway.  It'll be OK provided none of the register
1150   // value rules mention the CFA.  In any case, compute the new values
1151   // for each register that we're tracking.
1152 
1153 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1154   aRegs->xbp =
1155       aRS->mXbpExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1156   aRegs->xsp =
1157       aRS->mXspExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1158   aRegs->xip =
1159       aRS->mXipExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1160 #elif defined(GP_ARCH_arm)
1161   aRegs->r7 = aRS->mR7expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1162   aRegs->r11 =
1163       aRS->mR11expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1164   aRegs->r12 =
1165       aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1166   aRegs->r13 =
1167       aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1168   aRegs->r14 =
1169       aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1170   aRegs->r15 =
1171       aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1172 #elif defined(GP_ARCH_arm64)
1173   aRegs->x29 =
1174       aRS->mX29expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1175   aRegs->x30 =
1176       aRS->mX30expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1177   aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1178 #elif defined(GP_ARCH_mips64)
1179   aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1180   aRegs->fp = aRS->mFPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1181   aRegs->pc = aRS->mPCexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1182 #else
1183 #  error "Unsupported arch"
1184 #endif
1185 
1186   // We're done.  Any regs for which we didn't manage to compute a
1187   // new value will now be marked as invalid.
1188 }
1189 
1190 // RUNS IN NO-MALLOC CONTEXT
Unwind(uintptr_t * aFramePCs,uintptr_t * aFrameSPs,size_t * aFramesUsed,size_t * aFramePointerFramesAcquired,size_t aFramesAvail,UnwindRegs * aStartRegs,StackImage * aStackImg)1191 void LUL::Unwind(/*OUT*/ uintptr_t* aFramePCs,
1192                  /*OUT*/ uintptr_t* aFrameSPs,
1193                  /*OUT*/ size_t* aFramesUsed,
1194                  /*OUT*/ size_t* aFramePointerFramesAcquired,
1195                  size_t aFramesAvail, UnwindRegs* aStartRegs,
1196                  StackImage* aStackImg) {
1197   MOZ_RELEASE_ASSERT(!mAdminMode);
1198 
1199   /////////////////////////////////////////////////////////
1200   // BEGIN UNWIND
1201 
1202   *aFramesUsed = 0;
1203 
1204   UnwindRegs regs = *aStartRegs;
1205   TaggedUWord last_valid_sp = TaggedUWord();
1206 
1207   while (true) {
1208     if (DEBUG_MAIN) {
1209       char buf[300];
1210       mLog("\n");
1211 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1212       SprintfLiteral(
1213           buf, "LoopTop: rip %d/%llx  rsp %d/%llx  rbp %d/%llx\n",
1214           (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(),
1215           (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(),
1216           (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value());
1217       buf[sizeof(buf) - 1] = 0;
1218       mLog(buf);
1219 #elif defined(GP_ARCH_arm)
1220       SprintfLiteral(
1221           buf,
1222           "LoopTop: r15 %d/%llx  r7 %d/%llx  r11 %d/%llx"
1223           "  r12 %d/%llx  r13 %d/%llx  r14 %d/%llx\n",
1224           (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(),
1225           (int)regs.r7.Valid(), (unsigned long long int)regs.r7.Value(),
1226           (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(),
1227           (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(),
1228           (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(),
1229           (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value());
1230       buf[sizeof(buf) - 1] = 0;
1231       mLog(buf);
1232 #elif defined(GP_ARCH_arm64)
1233       SprintfLiteral(
1234           buf,
1235           "LoopTop: pc %d/%llx  x29 %d/%llx  x30 %d/%llx"
1236           "  sp %d/%llx\n",
1237           (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
1238           (int)regs.x29.Valid(), (unsigned long long int)regs.x29.Value(),
1239           (int)regs.x30.Valid(), (unsigned long long int)regs.x30.Value(),
1240           (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value());
1241       buf[sizeof(buf) - 1] = 0;
1242       mLog(buf);
1243 #elif defined(GP_ARCH_mips64)
1244       SprintfLiteral(
1245           buf, "LoopTop: pc %d/%llx  sp %d/%llx  fp %d/%llx\n",
1246           (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
1247           (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value(),
1248           (int)regs.fp.Valid(), (unsigned long long int)regs.fp.Value());
1249       buf[sizeof(buf) - 1] = 0;
1250       mLog(buf);
1251 #else
1252 #  error "Unsupported arch"
1253 #endif
1254     }
1255 
1256 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1257     TaggedUWord ia = regs.xip;
1258     TaggedUWord sp = regs.xsp;
1259 #elif defined(GP_ARCH_arm)
1260     TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14);
1261     TaggedUWord sp = regs.r13;
1262 #elif defined(GP_ARCH_arm64)
1263     TaggedUWord ia = (*aFramesUsed == 0 ? regs.pc : regs.x30);
1264     TaggedUWord sp = regs.sp;
1265 #elif defined(GP_ARCH_mips64)
1266     TaggedUWord ia = regs.pc;
1267     TaggedUWord sp = regs.sp;
1268 #else
1269 #  error "Unsupported arch"
1270 #endif
1271 
1272     if (*aFramesUsed >= aFramesAvail) {
1273       break;
1274     }
1275 
1276     // If we don't have a valid value for the PC, give up.
1277     if (!ia.Valid()) {
1278       break;
1279     }
1280 
1281     // If this is the innermost frame, record the SP value, which
1282     // presumably is valid.  If this isn't the innermost frame, and we
1283     // have a valid SP value, check that its SP value isn't less that
1284     // the one we've seen so far, so as to catch potential SP value
1285     // cycles.
1286     if (*aFramesUsed == 0) {
1287       last_valid_sp = sp;
1288     } else {
1289       MOZ_ASSERT(last_valid_sp.Valid());
1290       if (sp.Valid()) {
1291         if (sp.Value() < last_valid_sp.Value()) {
1292           // Hmm, SP going in the wrong direction.  Let's stop.
1293           break;
1294         }
1295         // Remember where we got to.
1296         last_valid_sp = sp;
1297       }
1298     }
1299 
1300     // For the innermost frame, the IA value is what we need.  For all
1301     // other frames, it's actually the return address, so back up one
1302     // byte so as to get it into the calling instruction.
1303     aFramePCs[*aFramesUsed] = ia.Value() - (*aFramesUsed == 0 ? 0 : 1);
1304     aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0;
1305     (*aFramesUsed)++;
1306 
1307     // Find the RuleSet for the current IA, if any.  This will also
1308     // query the backing (secondary) maps if it isn't found in the
1309     // thread-local cache.
1310 
1311     // If this isn't the innermost frame, back up into the calling insn.
1312     if (*aFramesUsed > 1) {
1313       ia = ia + TaggedUWord((uintptr_t)(-1));
1314     }
1315 
1316     pair<const RuleSet*, const vector<PfxInstr>*> ruleset_and_pfxinstrs =
1317         mPriMap->Lookup(ia.Value());
1318     const RuleSet* ruleset = ruleset_and_pfxinstrs.first;
1319     const vector<PfxInstr>* pfxinstrs = ruleset_and_pfxinstrs.second;
1320 
1321     if (DEBUG_MAIN) {
1322       char buf[100];
1323       SprintfLiteral(buf, "ruleset for 0x%llx = %p\n",
1324                      (unsigned long long int)ia.Value(), ruleset);
1325       buf[sizeof(buf) - 1] = 0;
1326       mLog(buf);
1327     }
1328 
1329 #if defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
1330     /////////////////////////////////////////////
1331     ////
1332     // On 32 bit x86-linux, syscalls are often done via the VDSO
1333     // function __kernel_vsyscall, which doesn't have a corresponding
1334     // object that we can read debuginfo from.  That effectively kills
1335     // off all stack traces for threads blocked in syscalls.  Hence
1336     // special-case by looking at the code surrounding the program
1337     // counter.
1338     //
1339     // 0xf7757420 <__kernel_vsyscall+0>:	push   %ecx
1340     // 0xf7757421 <__kernel_vsyscall+1>:	push   %edx
1341     // 0xf7757422 <__kernel_vsyscall+2>:	push   %ebp
1342     // 0xf7757423 <__kernel_vsyscall+3>:	mov    %esp,%ebp
1343     // 0xf7757425 <__kernel_vsyscall+5>:	sysenter
1344     // 0xf7757427 <__kernel_vsyscall+7>:	nop
1345     // 0xf7757428 <__kernel_vsyscall+8>:	nop
1346     // 0xf7757429 <__kernel_vsyscall+9>:	nop
1347     // 0xf775742a <__kernel_vsyscall+10>:	nop
1348     // 0xf775742b <__kernel_vsyscall+11>:	nop
1349     // 0xf775742c <__kernel_vsyscall+12>:	nop
1350     // 0xf775742d <__kernel_vsyscall+13>:	nop
1351     // 0xf775742e <__kernel_vsyscall+14>:	int    $0x80
1352     // 0xf7757430 <__kernel_vsyscall+16>:	pop    %ebp
1353     // 0xf7757431 <__kernel_vsyscall+17>:	pop    %edx
1354     // 0xf7757432 <__kernel_vsyscall+18>:	pop    %ecx
1355     // 0xf7757433 <__kernel_vsyscall+19>:	ret
1356     //
1357     // In cases where the sampled thread is blocked in a syscall, its
1358     // program counter will point at "pop %ebp".  Hence we look for
1359     // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and
1360     // the corresponding register-recovery actions are:
1361     //    new_ebp = *(old_esp + 0)
1362     //    new eip = *(old_esp + 12)
1363     //    new_esp = old_esp + 16
1364     //
1365     // It may also be the case that the program counter points two
1366     // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in
1367     // the case where the syscall has been restarted but the thread
1368     // hasn't been rescheduled.  The code below doesn't handle that;
1369     // it could easily be made to.
1370     //
1371     if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) {
1372       uintptr_t insns_min, insns_max;
1373       uintptr_t eip = ia.Value();
1374       bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip);
1375       if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) {
1376         uint8_t* eipC = (uint8_t*)eip;
1377         if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D &&
1378             eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) {
1379           TaggedUWord sp_plus_0 = sp;
1380           TaggedUWord sp_plus_12 = sp;
1381           TaggedUWord sp_plus_16 = sp;
1382           sp_plus_12 = sp_plus_12 + TaggedUWord(12);
1383           sp_plus_16 = sp_plus_16 + TaggedUWord(16);
1384           TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg);
1385           TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg);
1386           TaggedUWord new_esp = sp_plus_16;
1387           if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) {
1388             regs.xbp = new_ebp;
1389             regs.xip = new_eip;
1390             regs.xsp = new_esp;
1391             continue;
1392           }
1393         }
1394       }
1395     }
1396     ////
1397     /////////////////////////////////////////////
1398 #endif  // defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
1399 
1400     // So, do we have a ruleset for this address?  If so, use it now.
1401     if (ruleset) {
1402       if (DEBUG_MAIN) {
1403         ruleset->Print(mLog);
1404         mLog("\n");
1405       }
1406       // Use the RuleSet to compute the registers for the previous
1407       // frame.  |regs| is modified in-place.
1408       UseRuleSet(&regs, aStackImg, ruleset, pfxinstrs);
1409       continue;
1410     }
1411 
1412 #if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||     \
1413     defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \
1414     defined(GP_PLAT_amd64_freebsd)
1415     // There's no RuleSet for the specified address.  On amd64/x86_linux, see if
1416     // it's possible to recover the caller's frame by using the frame pointer.
1417 
1418     // We seek to compute (new_IP, new_SP, new_BP) from (old_BP, stack image),
1419     // and assume the following layout:
1420     //
1421     //                 <--- new_SP
1422     //   +----------+
1423     //   |  new_IP  |  (return address)
1424     //   +----------+
1425     //   |  new_BP  |  <--- old_BP
1426     //   +----------+
1427     //   |   ....   |
1428     //   |   ....   |
1429     //   |   ....   |
1430     //   +----------+  <---- old_SP (arbitrary, but must be <= old_BP)
1431 
1432     const size_t wordSzB = sizeof(uintptr_t);
1433     TaggedUWord old_xsp = regs.xsp;
1434 
1435     // points at new_BP ?
1436     TaggedUWord old_xbp = regs.xbp;
1437     // points at new_IP ?
1438     TaggedUWord old_xbp_plus1 = regs.xbp + TaggedUWord(1 * wordSzB);
1439     // is the new_SP ?
1440     TaggedUWord old_xbp_plus2 = regs.xbp + TaggedUWord(2 * wordSzB);
1441 
1442     if (old_xbp.Valid() && old_xbp.IsAligned() && old_xsp.Valid() &&
1443         old_xsp.IsAligned() && old_xsp.Value() <= old_xbp.Value()) {
1444       // We don't need to do any range, alignment or validity checks for
1445       // addresses passed to DerefTUW, since that performs them itself, and
1446       // returns an invalid value on failure.  Any such value will poison
1447       // subsequent uses, and we do a final check for validity before putting
1448       // the computed values into |regs|.
1449       TaggedUWord new_xbp = DerefTUW(old_xbp, aStackImg);
1450       if (new_xbp.Valid() && new_xbp.IsAligned() &&
1451           old_xbp.Value() < new_xbp.Value()) {
1452         TaggedUWord new_xip = DerefTUW(old_xbp_plus1, aStackImg);
1453         TaggedUWord new_xsp = old_xbp_plus2;
1454         if (new_xbp.Valid() && new_xip.Valid() && new_xsp.Valid()) {
1455           regs.xbp = new_xbp;
1456           regs.xip = new_xip;
1457           regs.xsp = new_xsp;
1458           (*aFramePointerFramesAcquired)++;
1459           continue;
1460         }
1461       }
1462     }
1463 #elif defined(GP_ARCH_arm64)
1464     // Here is an example of generated code for prologue and epilogue..
1465     //
1466     // stp     x29, x30, [sp, #-16]!
1467     // mov     x29, sp
1468     // ...
1469     // ldp     x29, x30, [sp], #16
1470     // ret
1471     //
1472     // Next is another example of generated code.
1473     //
1474     // stp     x20, x19, [sp, #-32]!
1475     // stp     x29, x30, [sp, #16]
1476     // add     x29, sp, #0x10
1477     // ...
1478     // ldp     x29, x30, [sp, #16]
1479     // ldp     x20, x19, [sp], #32
1480     // ret
1481     //
1482     // Previous x29 and x30 register are stored in the address of x29 register.
1483     // But since sp register value depends on local variables, we cannot compute
1484     // previous sp register from current sp/fp/lr register and there is no
1485     // regular rule for sp register in prologue. But since return address is lr
1486     // register, if x29 is valid, we will get return address without sp
1487     // register.
1488     //
1489     // So we assume the following layout that if no rule set. x29 is frame
1490     // pointer, so we will be able to compute x29 and x30 .
1491     //
1492     //   +----------+  <--- new_sp (cannot compute)
1493     //   |   ....   |
1494     //   +----------+
1495     //   |  new_lr  |  (return address)
1496     //   +----------+
1497     //   |  new_fp  |  <--- old_fp
1498     //   +----------+
1499     //   |   ....   |
1500     //   |   ....   |
1501     //   +----------+  <---- old_sp (arbitrary, but unused)
1502 
1503     TaggedUWord old_fp = regs.x29;
1504     if (old_fp.Valid() && old_fp.IsAligned() && last_valid_sp.Valid() &&
1505         last_valid_sp.Value() <= old_fp.Value()) {
1506       TaggedUWord new_fp = DerefTUW(old_fp, aStackImg);
1507       if (new_fp.Valid() && new_fp.IsAligned() &&
1508           old_fp.Value() < new_fp.Value()) {
1509         TaggedUWord old_fp_plus1 = old_fp + TaggedUWord(8);
1510         TaggedUWord new_lr = DerefTUW(old_fp_plus1, aStackImg);
1511         if (new_lr.Valid()) {
1512           regs.x29 = new_fp;
1513           regs.x30 = new_lr;
1514           // When using frame pointer to walk stack, we cannot compute sp
1515           // register since we cannot compute sp register from fp/lr/sp
1516           // register, and there is no regular rule to compute previous sp
1517           // register. So mark as invalid.
1518           regs.sp = TaggedUWord();
1519           (*aFramePointerFramesAcquired)++;
1520           continue;
1521         }
1522       }
1523     }
1524 #endif  // defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||
1525         // defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android)
1526 
1527     // We failed to recover a frame either using CFI or FP chasing, and we
1528     // have no other ways to recover the frame.  So we have to give up.
1529     break;
1530 
1531   }  // top level unwind loop
1532 
1533   // END UNWIND
1534   /////////////////////////////////////////////////////////
1535 }
1536 
1537 ////////////////////////////////////////////////////////////////
1538 // LUL Unit Testing                                           //
1539 ////////////////////////////////////////////////////////////////
1540 
1541 static const int LUL_UNIT_TEST_STACK_SIZE = 32768;
1542 
1543 #if defined(GP_ARCH_mips64)
__getpc(void)1544 static __attribute__((noinline)) unsigned long __getpc(void) {
1545   unsigned long rtaddr;
1546   __asm__ volatile("move %0, $31" : "=r"(rtaddr));
1547   return rtaddr;
1548 }
1549 #endif
1550 
1551 // This function is innermost in the test call sequence.  It uses LUL
1552 // to unwind, and compares the result with the sequence specified in
1553 // the director string.  These need to agree in order for the test to
1554 // pass.  In order not to screw up the results, this function needs
1555 // to have a not-very big stack frame, since we're only presenting
1556 // the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and
1557 // that chunk unavoidably includes the frame for this function.
1558 //
1559 // This function must not be inlined into its callers.  Doing so will
1560 // cause the expected-vs-actual backtrace consistency checking to
1561 // fail.  Prints summary results to |aLUL|'s logging sink and also
1562 // returns a boolean indicating whether or not the test passed.
GetAndCheckStackTrace(LUL * aLUL,const char * dstring)1563 static __attribute__((noinline)) bool GetAndCheckStackTrace(
1564     LUL* aLUL, const char* dstring) {
1565   // Get hold of the current unwind-start registers.
1566   UnwindRegs startRegs;
1567   memset(&startRegs, 0, sizeof(startRegs));
1568 #if defined(GP_ARCH_amd64)
1569   volatile uintptr_t block[3];
1570   MOZ_ASSERT(sizeof(block) == 24);
1571   __asm__ __volatile__(
1572       "leaq 0(%%rip), %%r15"
1573       "\n\t"
1574       "movq %%r15, 0(%0)"
1575       "\n\t"
1576       "movq %%rsp, 8(%0)"
1577       "\n\t"
1578       "movq %%rbp, 16(%0)"
1579       "\n"
1580       :
1581       : "r"(&block[0])
1582       : "memory", "r15");
1583   startRegs.xip = TaggedUWord(block[0]);
1584   startRegs.xsp = TaggedUWord(block[1]);
1585   startRegs.xbp = TaggedUWord(block[2]);
1586   const uintptr_t REDZONE_SIZE = 128;
1587   uintptr_t start = block[1] - REDZONE_SIZE;
1588 #elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
1589   volatile uintptr_t block[3];
1590   MOZ_ASSERT(sizeof(block) == 12);
1591   __asm__ __volatile__(
1592       ".byte 0xE8,0x00,0x00,0x00,0x00" /*call next insn*/
1593       "\n\t"
1594       "popl %%edi"
1595       "\n\t"
1596       "movl %%edi, 0(%0)"
1597       "\n\t"
1598       "movl %%esp, 4(%0)"
1599       "\n\t"
1600       "movl %%ebp, 8(%0)"
1601       "\n"
1602       :
1603       : "r"(&block[0])
1604       : "memory", "edi");
1605   startRegs.xip = TaggedUWord(block[0]);
1606   startRegs.xsp = TaggedUWord(block[1]);
1607   startRegs.xbp = TaggedUWord(block[2]);
1608   const uintptr_t REDZONE_SIZE = 0;
1609   uintptr_t start = block[1] - REDZONE_SIZE;
1610 #elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
1611   volatile uintptr_t block[6];
1612   MOZ_ASSERT(sizeof(block) == 24);
1613   __asm__ __volatile__(
1614       "mov r0, r15"
1615       "\n\t"
1616       "str r0,  [%0, #0]"
1617       "\n\t"
1618       "str r14, [%0, #4]"
1619       "\n\t"
1620       "str r13, [%0, #8]"
1621       "\n\t"
1622       "str r12, [%0, #12]"
1623       "\n\t"
1624       "str r11, [%0, #16]"
1625       "\n\t"
1626       "str r7,  [%0, #20]"
1627       "\n"
1628       :
1629       : "r"(&block[0])
1630       : "memory", "r0");
1631   startRegs.r15 = TaggedUWord(block[0]);
1632   startRegs.r14 = TaggedUWord(block[1]);
1633   startRegs.r13 = TaggedUWord(block[2]);
1634   startRegs.r12 = TaggedUWord(block[3]);
1635   startRegs.r11 = TaggedUWord(block[4]);
1636   startRegs.r7 = TaggedUWord(block[5]);
1637   const uintptr_t REDZONE_SIZE = 0;
1638   uintptr_t start = block[1] - REDZONE_SIZE;
1639 #elif defined(GP_ARCH_arm64)
1640   volatile uintptr_t block[4];
1641   MOZ_ASSERT(sizeof(block) == 32);
1642   __asm__ __volatile__(
1643       "adr x0, . \n\t"
1644       "str x0, [%0, #0] \n\t"
1645       "str x29, [%0, #8] \n\t"
1646       "str x30, [%0, #16] \n\t"
1647       "mov x0, sp \n\t"
1648       "str x0, [%0, #24] \n\t"
1649       :
1650       : "r"(&block[0])
1651       : "memory", "x0");
1652   startRegs.pc = TaggedUWord(block[0]);
1653   startRegs.x29 = TaggedUWord(block[1]);
1654   startRegs.x30 = TaggedUWord(block[2]);
1655   startRegs.sp = TaggedUWord(block[3]);
1656   const uintptr_t REDZONE_SIZE = 0;
1657   uintptr_t start = block[1] - REDZONE_SIZE;
1658 #elif defined(GP_ARCH_mips64)
1659   volatile uintptr_t block[3];
1660   MOZ_ASSERT(sizeof(block) == 24);
1661   __asm__ __volatile__(
1662       "sd $29, 8(%0)     \n"
1663       "sd $30, 16(%0)    \n"
1664       :
1665       : "r"(block)
1666       : "memory");
1667   block[0] = __getpc();
1668   startRegs.pc = TaggedUWord(block[0]);
1669   startRegs.sp = TaggedUWord(block[1]);
1670   startRegs.fp = TaggedUWord(block[2]);
1671   const uintptr_t REDZONE_SIZE = 0;
1672   uintptr_t start = block[1] - REDZONE_SIZE;
1673 #else
1674 #  error "Unsupported platform"
1675 #endif
1676 
1677   // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the
1678   // stack.
1679   uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE;
1680   uintptr_t ws = sizeof(void*);
1681   start &= ~(ws - 1);
1682   end &= ~(ws - 1);
1683   uintptr_t nToCopy = end - start;
1684   if (nToCopy > lul::N_STACK_BYTES) {
1685     nToCopy = lul::N_STACK_BYTES;
1686   }
1687   MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
1688   StackImage* stackImg = new StackImage();
1689   stackImg->mLen = nToCopy;
1690   stackImg->mStartAvma = start;
1691   if (nToCopy > 0) {
1692     MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy);
1693     memcpy(&stackImg->mContents[0], (void*)start, nToCopy);
1694   }
1695 
1696   // Unwind it.
1697   const int MAX_TEST_FRAMES = 64;
1698   uintptr_t framePCs[MAX_TEST_FRAMES];
1699   uintptr_t frameSPs[MAX_TEST_FRAMES];
1700   size_t framesAvail = mozilla::ArrayLength(framePCs);
1701   size_t framesUsed = 0;
1702   size_t framePointerFramesAcquired = 0;
1703   aLUL->Unwind(&framePCs[0], &frameSPs[0], &framesUsed,
1704                &framePointerFramesAcquired, framesAvail, &startRegs, stackImg);
1705 
1706   delete stackImg;
1707 
1708   // if (0) {
1709   //  // Show what we have.
1710   //  fprintf(stderr, "Got %d frames:\n", (int)framesUsed);
1711   //  for (size_t i = 0; i < framesUsed; i++) {
1712   //    fprintf(stderr, "  [%2d]   SP %p   PC %p\n",
1713   //            (int)i, (void*)frameSPs[i], (void*)framePCs[i]);
1714   //  }
1715   //  fprintf(stderr, "\n");
1716   //}
1717 
1718   // Check to see if there's a consistent binding between digits in
1719   // the director string ('1' .. '8') and the PC values acquired by
1720   // the unwind.  If there isn't, the unwinding has failed somehow.
1721   uintptr_t binding[8];  // binding for '1' .. binding for '8'
1722   memset((void*)binding, 0, sizeof(binding));
1723 
1724   // The general plan is to work backwards along the director string
1725   // and forwards along the framePCs array.  Doing so corresponds to
1726   // working outwards from the innermost frame of the recursive test set.
1727   const char* cursor = dstring;
1728 
1729   // Find the end.  This leaves |cursor| two bytes past the first
1730   // character we want to look at -- see comment below.
1731   while (*cursor) cursor++;
1732 
1733   // Counts the number of consistent frames.
1734   size_t nConsistent = 0;
1735 
1736   // Iterate back to the start of the director string.  The starting
1737   // points are a bit complex.  We can't use framePCs[0] because that
1738   // contains the PC in this frame (above).  We can't use framePCs[1]
1739   // because that will contain the PC at return point in the recursive
1740   // test group (TestFn[1-8]) for their call "out" to this function,
1741   // GetAndCheckStackTrace.  Although LUL will compute a correct
1742   // return address, that will not be the same return address as for a
1743   // recursive call out of the the function to another function in the
1744   // group.  Hence we can only start consistency checking at
1745   // framePCs[2].
1746   //
1747   // To be consistent, then, we must ignore the last element in the
1748   // director string as that corresponds to framePCs[1].  Hence the
1749   // start points are: framePCs[2] and the director string 2 bytes
1750   // before the terminating zero.
1751   //
1752   // Also as a result of this, the number of consistent frames counted
1753   // will always be one less than the length of the director string
1754   // (not including its terminating zero).
1755   size_t frameIx;
1756   for (cursor = cursor - 2, frameIx = 2;
1757        cursor >= dstring && frameIx < framesUsed; cursor--, frameIx++) {
1758     char c = *cursor;
1759     uintptr_t pc = framePCs[frameIx];
1760     // If this doesn't hold, the director string is ill-formed.
1761     MOZ_ASSERT(c >= '1' && c <= '8');
1762     int n = ((int)c) - ((int)'1');
1763     if (binding[n] == 0) {
1764       // There's no binding for |c| yet, so install |pc| and carry on.
1765       binding[n] = pc;
1766       nConsistent++;
1767       continue;
1768     }
1769     // There's a pre-existing binding for |c|.  Check it's consistent.
1770     if (binding[n] != pc) {
1771       // Not consistent.  Give up now.
1772       break;
1773     }
1774     // Consistent.  Keep going.
1775     nConsistent++;
1776   }
1777 
1778   // So, did we succeed?
1779   bool passed = nConsistent + 1 == strlen(dstring);
1780 
1781   // Show the results.
1782   char buf[200];
1783   SprintfLiteral(buf, "LULUnitTest:   dstring = %s\n", dstring);
1784   buf[sizeof(buf) - 1] = 0;
1785   aLUL->mLog(buf);
1786   SprintfLiteral(buf, "LULUnitTest:     %d consistent, %d in dstring: %s\n",
1787                  (int)nConsistent, (int)strlen(dstring),
1788                  passed ? "PASS" : "FAIL");
1789   buf[sizeof(buf) - 1] = 0;
1790   aLUL->mLog(buf);
1791 
1792   return passed;
1793 }
1794 
1795 // Macro magic to create a set of 8 mutually recursive functions with
1796 // varying frame sizes.  These will recurse amongst themselves as
1797 // specified by |strP|, the directory string, and call
1798 // GetAndCheckStackTrace when the string becomes empty, passing it the
1799 // original value of the string.  This checks the result, printing
1800 // results on |aLUL|'s logging sink, and also returns a boolean
1801 // indicating whether or not the results are acceptable (correct).
1802 
1803 #define DECL_TEST_FN(NAME) \
1804   bool NAME(LUL* aLUL, const char* strPorig, const char* strP);
1805 
1806 #define GEN_TEST_FN(NAME, FRAMESIZE)                                          \
1807   bool NAME(LUL* aLUL, const char* strPorig, const char* strP) {              \
1808     /* Create a frame of size (at least) FRAMESIZE, so that the */            \
1809     /* 8 functions created by this macro offer some variation in frame */     \
1810     /* sizes.  This isn't as simple as it might seem, since a clever */       \
1811     /* optimizing compiler (eg, clang-5) detects that the array is unused */  \
1812     /* and removes it.  We try to defeat this by passing it to a function */  \
1813     /* in a different compilation unit, and hoping that clang does not */     \
1814     /* notice that the call is a no-op. */                                    \
1815     char space[FRAMESIZE];                                                    \
1816     Unused << write(1, space, 0); /* write zero bytes of |space| to stdout */ \
1817                                                                               \
1818     if (*strP == '\0') {                                                      \
1819       /* We've come to the end of the director string. */                     \
1820       /* Take a stack snapshot. */                                            \
1821       return GetAndCheckStackTrace(aLUL, strPorig);                           \
1822     } else {                                                                  \
1823       /* Recurse onwards.  This is a bit subtle.  The obvious */              \
1824       /* thing to do here is call onwards directly, from within the */        \
1825       /* arms of the case statement.  That gives a problem in that */         \
1826       /* there will be multiple return points inside each function when */    \
1827       /* unwinding, so it will be difficult to check for consistency */       \
1828       /* against the director string.  Instead, we make an indirect */        \
1829       /* call, so as to guarantee that there is only one call site */         \
1830       /* within each function.  This does assume that the compiler */         \
1831       /* won't transform it back to the simple direct-call form. */           \
1832       /* To discourage it from doing so, the call is bracketed with */        \
1833       /* __asm__ __volatile__ sections so as to make it not-movable. */       \
1834       bool (*nextFn)(LUL*, const char*, const char*) = NULL;                  \
1835       switch (*strP) {                                                        \
1836         case '1':                                                             \
1837           nextFn = TestFn1;                                                   \
1838           break;                                                              \
1839         case '2':                                                             \
1840           nextFn = TestFn2;                                                   \
1841           break;                                                              \
1842         case '3':                                                             \
1843           nextFn = TestFn3;                                                   \
1844           break;                                                              \
1845         case '4':                                                             \
1846           nextFn = TestFn4;                                                   \
1847           break;                                                              \
1848         case '5':                                                             \
1849           nextFn = TestFn5;                                                   \
1850           break;                                                              \
1851         case '6':                                                             \
1852           nextFn = TestFn6;                                                   \
1853           break;                                                              \
1854         case '7':                                                             \
1855           nextFn = TestFn7;                                                   \
1856           break;                                                              \
1857         case '8':                                                             \
1858           nextFn = TestFn8;                                                   \
1859           break;                                                              \
1860         default:                                                              \
1861           nextFn = TestFn8;                                                   \
1862           break;                                                              \
1863       }                                                                       \
1864       /* "use" |space| immediately after the recursive call, */               \
1865       /* so as to dissuade clang from deallocating the space while */         \
1866       /* the call is active, or otherwise messing with the stack frame. */    \
1867       __asm__ __volatile__("" ::: "cc", "memory");                            \
1868       bool passed = nextFn(aLUL, strPorig, strP + 1);                         \
1869       Unused << write(1, space, 0);                                           \
1870       __asm__ __volatile__("" ::: "cc", "memory");                            \
1871       return passed;                                                          \
1872     }                                                                         \
1873   }
1874 
1875 // The test functions are mutually recursive, so it is necessary to
1876 // declare them before defining them.
1877 DECL_TEST_FN(TestFn1)
DECL_TEST_FN(TestFn2)1878 DECL_TEST_FN(TestFn2)
1879 DECL_TEST_FN(TestFn3)
1880 DECL_TEST_FN(TestFn4)
1881 DECL_TEST_FN(TestFn5)
1882 DECL_TEST_FN(TestFn6)
1883 DECL_TEST_FN(TestFn7)
1884 DECL_TEST_FN(TestFn8)
1885 
1886 GEN_TEST_FN(TestFn1, 123)
1887 GEN_TEST_FN(TestFn2, 456)
1888 GEN_TEST_FN(TestFn3, 789)
1889 GEN_TEST_FN(TestFn4, 23)
1890 GEN_TEST_FN(TestFn5, 47)
1891 GEN_TEST_FN(TestFn6, 117)
1892 GEN_TEST_FN(TestFn7, 1)
1893 GEN_TEST_FN(TestFn8, 99)
1894 
1895 // This starts the test sequence going.  Call here to generate a
1896 // sequence of calls as directed by the string |dstring|.  The call
1897 // sequence will, from its innermost frame, finish by calling
1898 // GetAndCheckStackTrace() and passing it |dstring|.
1899 // GetAndCheckStackTrace() will unwind the stack, check consistency
1900 // of those results against |dstring|, and print a pass/fail message
1901 // to aLUL's logging sink.  It also updates the counters in *aNTests
1902 // and aNTestsPassed.
1903 __attribute__((noinline)) void TestUnw(/*OUT*/ int* aNTests,
1904                                        /*OUT*/ int* aNTestsPassed, LUL* aLUL,
1905                                        const char* dstring) {
1906   // Ensure that the stack has at least this much space on it.  This
1907   // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes
1908   // and hand it to LUL.  Safe in the sense that no segfault can
1909   // happen because the stack is at least this big.  This is all
1910   // somewhat dubious in the sense that a sufficiently clever compiler
1911   // (clang, for one) can figure out that space[] is unused and delete
1912   // it from the frame.  Hence the somewhat elaborate hoop jumping to
1913   // fill it up before the call and to at least appear to use the
1914   // value afterwards.
1915   int i;
1916   volatile char space[LUL_UNIT_TEST_STACK_SIZE];
1917   for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
1918     space[i] = (char)(i & 0x7F);
1919   }
1920 
1921   // Really run the test.
1922   bool passed = TestFn1(aLUL, dstring, dstring);
1923 
1924   // Appear to use space[], by visiting the value to compute some kind
1925   // of checksum, and then (apparently) using the checksum.
1926   int sum = 0;
1927   for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
1928     // If this doesn't fool LLVM, I don't know what will.
1929     sum += space[i] - 3 * i;
1930   }
1931   __asm__ __volatile__("" : : "r"(sum));
1932 
1933   // Update the counters.
1934   (*aNTests)++;
1935   if (passed) {
1936     (*aNTestsPassed)++;
1937   }
1938 }
1939 
RunLulUnitTests(int * aNTests,int * aNTestsPassed,LUL * aLUL)1940 void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed,
1941                      LUL* aLUL) {
1942   aLUL->mLog(":\n");
1943   aLUL->mLog("LULUnitTest: BEGIN\n");
1944   *aNTests = *aNTestsPassed = 0;
1945   TestUnw(aNTests, aNTestsPassed, aLUL, "11111111");
1946   TestUnw(aNTests, aNTestsPassed, aLUL, "11222211");
1947   TestUnw(aNTests, aNTestsPassed, aLUL, "111222333");
1948   TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212");
1949   TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258");
1950   TestUnw(aNTests, aNTestsPassed, aLUL,
1951           "123456781122334455667788777777777777777777777");
1952   aLUL->mLog("LULUnitTest: END\n");
1953   aLUL->mLog(":\n");
1954 }
1955 
1956 }  // namespace lul
1957