1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "LulMain.h"
8
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <unistd.h> // write(), only for testing LUL
13
14 #include <algorithm> // std::sort
15 #include <string>
16 #include <utility>
17
18 #include "GeckoProfiler.h" // for profiler_current_thread_id()
19 #include "LulCommonExt.h"
20 #include "LulElfExt.h"
21 #include "LulMainInt.h"
22 #include "mozilla/ArrayUtils.h"
23 #include "mozilla/Assertions.h"
24 #include "mozilla/CheckedInt.h"
25 #include "mozilla/DebugOnly.h"
26 #include "mozilla/MemoryChecking.h"
27 #include "mozilla/Sprintf.h"
28 #include "mozilla/UniquePtr.h"
29 #include "mozilla/Unused.h"
30
31 // Set this to 1 for verbose logging
32 #define DEBUG_MAIN 0
33
34 namespace lul {
35
36 using mozilla::CheckedInt;
37 using mozilla::DebugOnly;
38 using mozilla::MallocSizeOf;
39 using mozilla::Unused;
40 using std::pair;
41 using std::string;
42 using std::vector;
43
44 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
45 //
46 // Some functions in this file are marked RUNS IN NO-MALLOC CONTEXT.
47 // Any such function -- and, hence, the transitive closure of those
48 // reachable from it -- must not do any dynamic memory allocation.
49 // Doing so risks deadlock. There is exactly one root function for
50 // the transitive closure: Lul::Unwind.
51 //
52 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
53
54 ////////////////////////////////////////////////////////////////
55 // RuleSet //
56 ////////////////////////////////////////////////////////////////
57
NameOf_DW_REG(int16_t aReg)58 static const char* NameOf_DW_REG(int16_t aReg) {
59 switch (aReg) {
60 case DW_REG_CFA:
61 return "cfa";
62 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
63 case DW_REG_INTEL_XBP:
64 return "xbp";
65 case DW_REG_INTEL_XSP:
66 return "xsp";
67 case DW_REG_INTEL_XIP:
68 return "xip";
69 #elif defined(GP_ARCH_arm)
70 case DW_REG_ARM_R7:
71 return "r7";
72 case DW_REG_ARM_R11:
73 return "r11";
74 case DW_REG_ARM_R12:
75 return "r12";
76 case DW_REG_ARM_R13:
77 return "r13";
78 case DW_REG_ARM_R14:
79 return "r14";
80 case DW_REG_ARM_R15:
81 return "r15";
82 #elif defined(GP_ARCH_arm64)
83 case DW_REG_AARCH64_X29:
84 return "x29";
85 case DW_REG_AARCH64_X30:
86 return "x30";
87 case DW_REG_AARCH64_SP:
88 return "sp";
89 #elif defined(GP_ARCH_mips64)
90 case DW_REG_MIPS_SP:
91 return "sp";
92 case DW_REG_MIPS_FP:
93 return "fp";
94 case DW_REG_MIPS_PC:
95 return "pc";
96 #else
97 # error "Unsupported arch"
98 #endif
99 default:
100 return "???";
101 }
102 }
103
ShowRule(const char * aNewReg) const104 string LExpr::ShowRule(const char* aNewReg) const {
105 char buf[64];
106 string res = string(aNewReg) + "=";
107 switch (mHow) {
108 case UNKNOWN:
109 res += "Unknown";
110 break;
111 case NODEREF:
112 SprintfLiteral(buf, "%s+%d", NameOf_DW_REG(mReg), (int)mOffset);
113 res += buf;
114 break;
115 case DEREF:
116 SprintfLiteral(buf, "*(%s+%d)", NameOf_DW_REG(mReg), (int)mOffset);
117 res += buf;
118 break;
119 case PFXEXPR:
120 SprintfLiteral(buf, "PfxExpr-at-%d", (int)mOffset);
121 res += buf;
122 break;
123 default:
124 res += "???";
125 break;
126 }
127 return res;
128 }
129
Print(uintptr_t avma,uintptr_t len,void (* aLog)(const char *)) const130 void RuleSet::Print(uintptr_t avma, uintptr_t len,
131 void (*aLog)(const char*)) const {
132 char buf[96];
133 SprintfLiteral(buf, "[%llx .. %llx]: let ", (unsigned long long int)avma,
134 (unsigned long long int)(avma + len - 1));
135 string res = string(buf);
136 res += mCfaExpr.ShowRule("cfa");
137 res += " in";
138 // For each reg we care about, print the recovery expression.
139 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
140 res += mXipExpr.ShowRule(" RA");
141 res += mXspExpr.ShowRule(" SP");
142 res += mXbpExpr.ShowRule(" BP");
143 #elif defined(GP_ARCH_arm)
144 res += mR15expr.ShowRule(" R15");
145 res += mR7expr.ShowRule(" R7");
146 res += mR11expr.ShowRule(" R11");
147 res += mR12expr.ShowRule(" R12");
148 res += mR13expr.ShowRule(" R13");
149 res += mR14expr.ShowRule(" R14");
150 #elif defined(GP_ARCH_arm64)
151 res += mX29expr.ShowRule(" X29");
152 res += mX30expr.ShowRule(" X30");
153 res += mSPexpr.ShowRule(" SP");
154 #elif defined(GP_ARCH_mips64)
155 res += mPCexpr.ShowRule(" PC");
156 res += mSPexpr.ShowRule(" SP");
157 res += mFPexpr.ShowRule(" FP");
158 #else
159 # error "Unsupported arch"
160 #endif
161 aLog(res.c_str());
162 }
163
ExprForRegno(DW_REG_NUMBER aRegno)164 LExpr* RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) {
165 switch (aRegno) {
166 case DW_REG_CFA:
167 return &mCfaExpr;
168 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
169 case DW_REG_INTEL_XIP:
170 return &mXipExpr;
171 case DW_REG_INTEL_XSP:
172 return &mXspExpr;
173 case DW_REG_INTEL_XBP:
174 return &mXbpExpr;
175 #elif defined(GP_ARCH_arm)
176 case DW_REG_ARM_R15:
177 return &mR15expr;
178 case DW_REG_ARM_R14:
179 return &mR14expr;
180 case DW_REG_ARM_R13:
181 return &mR13expr;
182 case DW_REG_ARM_R12:
183 return &mR12expr;
184 case DW_REG_ARM_R11:
185 return &mR11expr;
186 case DW_REG_ARM_R7:
187 return &mR7expr;
188 #elif defined(GP_ARCH_arm64)
189 case DW_REG_AARCH64_X29:
190 return &mX29expr;
191 case DW_REG_AARCH64_X30:
192 return &mX30expr;
193 case DW_REG_AARCH64_SP:
194 return &mSPexpr;
195 #elif defined(GP_ARCH_mips64)
196 case DW_REG_MIPS_SP:
197 return &mSPexpr;
198 case DW_REG_MIPS_FP:
199 return &mFPexpr;
200 case DW_REG_MIPS_PC:
201 return &mPCexpr;
202 #else
203 # error "Unknown arch"
204 #endif
205 default:
206 return nullptr;
207 }
208 }
209
RuleSet()210 RuleSet::RuleSet() {
211 // All fields are of type LExpr and so are initialised by LExpr::LExpr().
212 }
213
214 ////////////////////////////////////////////////////////////////
215 // SecMap //
216 ////////////////////////////////////////////////////////////////
217
218 // See header file LulMainInt.h for comments about invariants.
219
SecMap(uintptr_t mapStartAVMA,uint32_t mapLen,void (* aLog)(const char *))220 SecMap::SecMap(uintptr_t mapStartAVMA, uint32_t mapLen,
221 void (*aLog)(const char*))
222 : mUsable(false),
223 mUniqifier(new mozilla::HashMap<RuleSet, uint32_t, RuleSet,
224 InfallibleAllocPolicy>),
225 mLog(aLog) {
226 if (mapLen == 0) {
227 // Degenerate case.
228 mMapMinAVMA = 1;
229 mMapMaxAVMA = 0;
230 } else {
231 mMapMinAVMA = mapStartAVMA;
232 mMapMaxAVMA = mapStartAVMA + (uintptr_t)mapLen - 1;
233 }
234 }
235
~SecMap()236 SecMap::~SecMap() {
237 mExtents.clear();
238 mDictionary.clear();
239 if (mUniqifier) {
240 mUniqifier->clear();
241 mUniqifier = nullptr;
242 }
243 }
244
245 // RUNS IN NO-MALLOC CONTEXT
FindRuleSet(uintptr_t ia)246 RuleSet* SecMap::FindRuleSet(uintptr_t ia) {
247 // Binary search mExtents to find one that brackets |ia|.
248 // lo and hi need to be signed, else the loop termination tests
249 // don't work properly. Note that this works correctly even when
250 // mExtents.size() == 0.
251
252 // Can't do this until the array has been sorted and preened.
253 MOZ_ASSERT(mUsable);
254
255 long int lo = 0;
256 long int hi = (long int)mExtents.size() - 1;
257 while (true) {
258 // current unsearched space is from lo to hi, inclusive.
259 if (lo > hi) {
260 // not found
261 return nullptr;
262 }
263 long int mid = lo + ((hi - lo) / 2);
264 Extent* mid_extent = &mExtents[mid];
265 uintptr_t mid_offset = mid_extent->offset();
266 uintptr_t mid_len = mid_extent->len();
267 uintptr_t mid_minAddr = mMapMinAVMA + mid_offset;
268 uintptr_t mid_maxAddr = mid_minAddr + mid_len - 1;
269 if (ia < mid_minAddr) {
270 hi = mid - 1;
271 continue;
272 }
273 if (ia > mid_maxAddr) {
274 lo = mid + 1;
275 continue;
276 }
277 MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
278 uint32_t mid_extent_dictIx = mid_extent->dictIx();
279 MOZ_RELEASE_ASSERT(mid_extent_dictIx < mExtents.size());
280 return &mDictionary[mid_extent_dictIx];
281 }
282 // NOTREACHED
283 }
284
285 // Add a RuleSet to the collection. The rule is copied in. Calling
286 // this makes the map non-searchable.
AddRuleSet(const RuleSet * rs,uintptr_t avma,uintptr_t len)287 void SecMap::AddRuleSet(const RuleSet* rs, uintptr_t avma, uintptr_t len) {
288 mUsable = false;
289
290 // Zero length RuleSet? Meaningless, but ignore it anyway.
291 if (len == 0) {
292 return;
293 }
294
295 // Ignore attempts to add RuleSets whose address range doesn't fall within
296 // the declared address range for the SecMap. Maybe we should print some
297 // kind of error message rather than silently ignoring them.
298 if (!(avma >= mMapMinAVMA && avma + len - 1 <= mMapMaxAVMA)) {
299 return;
300 }
301
302 // Because `mMapStartAVMA` .. `mMapEndAVMA` can specify at most a 2^32-1 byte
303 // chunk of address space, the following must now hold.
304 MOZ_RELEASE_ASSERT(len <= (uintptr_t)0xFFFFFFFF);
305
306 // See if `mUniqifier` already has `rs`. If so set `dictIx` to the assigned
307 // dictionary index; if not, add `rs` to `mUniqifier` and assign a new
308 // dictionary index. This is the core of the RuleSet-de-duplication process.
309 uint32_t dictIx = 0;
310 mozilla::HashMap<RuleSet, uint32_t, RuleSet, InfallibleAllocPolicy>::AddPtr
311 p = mUniqifier->lookupForAdd(*rs);
312 if (!p) {
313 dictIx = mUniqifier->count();
314 // If this ever fails, Extents::dictIx will need to be changed to be a
315 // type wider than the current uint16_t.
316 MOZ_RELEASE_ASSERT(dictIx < (1 << 16));
317 // This returns `false` on OOM. We ignore the return value since we asked
318 // for it to use the InfallibleAllocPolicy.
319 DebugOnly<bool> addedOK = mUniqifier->add(p, *rs, dictIx);
320 MOZ_ASSERT(addedOK);
321 } else {
322 dictIx = p->value();
323 }
324
325 uint32_t offset = (uint32_t)(avma - mMapMinAVMA);
326 while (len > 0) {
327 // Because Extents::len is a uint16_t, we have to add multiple `mExtents`
328 // entries to cover the case where `len` is equal to or greater than 2^16.
329 // This happens only exceedingly rarely. In order to get more test
330 // coverage on what would otherwise be a very low probability (less than
331 // 0.0002%) corner case, we do this in steps of 4095. On libxul.so as of
332 // Sept 2020, this increases the number of `mExtents` entries by about
333 // 0.05%, hence has no meaningful effect on space use, but increases the
334 // use of this corner case, and hence its test coverage, by a factor of 250.
335 uint32_t this_step_len = (len > 4095) ? 4095 : len;
336 mExtents.emplace_back(offset, this_step_len, dictIx);
337 offset += this_step_len;
338 len -= this_step_len;
339 }
340 }
341
342 // Add a PfxInstr to the vector of such instrs, and return the index
343 // in the vector. Calling this makes the map non-searchable.
AddPfxInstr(PfxInstr pfxi)344 uint32_t SecMap::AddPfxInstr(PfxInstr pfxi) {
345 mUsable = false;
346 mPfxInstrs.push_back(pfxi);
347 return mPfxInstrs.size() - 1;
348 }
349
CmpExtentsByOffsetLE(const Extent & ext1,const Extent & ext2)350 static bool CmpExtentsByOffsetLE(const Extent& ext1, const Extent& ext2) {
351 return ext1.offset() < ext2.offset();
352 }
353
354 // Prepare the map for searching, by sorting it, de-overlapping entries and
355 // removing any resulting zero-length entries. At the start of this routine,
356 // all Extents should fall within [mMapMinAVMA, mMapMaxAVMA] and not have zero
357 // length, as a result of the checks in AddRuleSet().
PrepareRuleSets()358 void SecMap::PrepareRuleSets() {
359 // At this point, the de-duped RuleSets are in `mUniqifier`, and
360 // `mDictionary` is empty. This method will, amongst other things, copy
361 // them into `mDictionary` in order of their assigned dictionary-index
362 // values, as established by `SecMap::AddRuleSet`, and free `mUniqifier`;
363 // after this method, it has no further use.
364 MOZ_RELEASE_ASSERT(mUniqifier);
365 MOZ_RELEASE_ASSERT(mDictionary.empty());
366
367 if (mExtents.empty()) {
368 mUniqifier->clear();
369 mUniqifier = nullptr;
370 return;
371 }
372
373 if (mMapMinAVMA == 1 && mMapMaxAVMA == 0) {
374 // The map is empty. This should never happen.
375 mExtents.clear();
376 mUniqifier->clear();
377 mUniqifier = nullptr;
378 return;
379 }
380 MOZ_RELEASE_ASSERT(mMapMinAVMA <= mMapMaxAVMA);
381
382 // We must have at least one Extent, and as a consequence there must be at
383 // least one entry in the uniqifier.
384 MOZ_RELEASE_ASSERT(!mExtents.empty() && !mUniqifier->empty());
385
386 #ifdef DEBUG
387 // Check invariants on incoming Extents.
388 for (size_t i = 0; i < mExtents.size(); ++i) {
389 Extent* ext = &mExtents[i];
390 uint32_t len = ext->len();
391 MOZ_ASSERT(len > 0);
392 MOZ_ASSERT(len <= 4095 /* per '4095' in AddRuleSet() */);
393 uint32_t offset = ext->offset();
394 uintptr_t avma = mMapMinAVMA + (uintptr_t)offset;
395 // Upper bounds test. There's no lower bounds test because `offset` is a
396 // positive displacement from `mMapMinAVMA`, so a small underrun will
397 // manifest as `len` being close to 2^32.
398 MOZ_ASSERT(avma + (uintptr_t)len - 1 <= mMapMaxAVMA);
399 }
400 #endif
401
402 // Sort by start addresses.
403 std::sort(mExtents.begin(), mExtents.end(), CmpExtentsByOffsetLE);
404
405 // Iteratively truncate any overlaps and remove any zero length
406 // entries that might result, or that may have been present
407 // initially. Unless the input is seriously screwy, this is
408 // expected to iterate only once.
409 while (true) {
410 size_t i;
411 size_t n = mExtents.size();
412 size_t nZeroLen = 0;
413
414 if (n == 0) {
415 break;
416 }
417
418 for (i = 1; i < n; ++i) {
419 Extent* prev = &mExtents[i - 1];
420 Extent* here = &mExtents[i];
421 MOZ_ASSERT(prev->offset() <= here->offset());
422 if (prev->offset() + prev->len() > here->offset()) {
423 prev->setLen(here->offset() - prev->offset());
424 }
425 if (prev->len() == 0) {
426 nZeroLen++;
427 }
428 }
429
430 if (mExtents[n - 1].len() == 0) {
431 nZeroLen++;
432 }
433
434 // At this point, the entries are in-order and non-overlapping.
435 // If none of them are zero-length, we are done.
436 if (nZeroLen == 0) {
437 break;
438 }
439
440 // Slide back the entries to remove the zero length ones.
441 size_t j = 0; // The write-point.
442 for (i = 0; i < n; ++i) {
443 if (mExtents[i].len() == 0) {
444 continue;
445 }
446 if (j != i) {
447 mExtents[j] = mExtents[i];
448 }
449 ++j;
450 }
451 MOZ_ASSERT(i == n);
452 MOZ_ASSERT(nZeroLen <= n);
453 MOZ_ASSERT(j == n - nZeroLen);
454 while (nZeroLen > 0) {
455 mExtents.pop_back();
456 nZeroLen--;
457 }
458
459 MOZ_ASSERT(mExtents.size() == j);
460 }
461
462 size_t nExtents = mExtents.size();
463
464 #ifdef DEBUG
465 // Do a final check on the extents: their address ranges must be
466 // ascending, non overlapping, non zero sized.
467 if (nExtents > 0) {
468 MOZ_ASSERT(mExtents[0].len() > 0);
469 for (size_t i = 1; i < nExtents; ++i) {
470 const Extent* prev = &mExtents[i - 1];
471 const Extent* here = &mExtents[i];
472 MOZ_ASSERT(prev->offset() < here->offset());
473 MOZ_ASSERT(here->len() > 0);
474 MOZ_ASSERT(prev->offset() + prev->len() <= here->offset());
475 }
476 }
477 #endif
478
479 // Create the final dictionary by enumerating the uniqifier.
480 size_t nUniques = mUniqifier->count();
481
482 RuleSet dummy;
483 mozilla::PodZero(&dummy);
484
485 mDictionary.reserve(nUniques);
486 for (size_t i = 0; i < nUniques; i++) {
487 mDictionary.push_back(dummy);
488 }
489
490 for (auto iter = mUniqifier->iter(); !iter.done(); iter.next()) {
491 MOZ_RELEASE_ASSERT(iter.get().value() < nUniques);
492 mDictionary[iter.get().value()] = iter.get().key();
493 }
494
495 mUniqifier = nullptr;
496
497 char buf[150];
498 SprintfLiteral(
499 buf,
500 "PrepareRuleSets: %lu extents, %lu rulesets, "
501 "avma min/max 0x%llx, 0x%llx\n",
502 (unsigned long int)nExtents, (unsigned long int)mDictionary.size(),
503 (unsigned long long int)mMapMinAVMA, (unsigned long long int)mMapMaxAVMA);
504 buf[sizeof(buf) - 1] = 0;
505 mLog(buf);
506
507 // Is now usable for binary search.
508 mUsable = true;
509
510 #if 0
511 mLog("\nRulesets after preening\n");
512 for (size_t i = 0; i < nExtents; ++i) {
513 const Extent* extent = &mExtents[i];
514 uintptr_t avma = mMapMinAVMA + (uintptr_t)extent->offset();
515 mDictionary[extent->dictIx()].Print(avma, extent->len(), mLog);
516 mLog("\n");
517 }
518 mLog("\n");
519 #endif
520 }
521
IsEmpty()522 bool SecMap::IsEmpty() { return mExtents.empty(); }
523
SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const524 size_t SecMap::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
525 size_t n = aMallocSizeOf(this);
526
527 // It's conceivable that these calls would be unsafe with some
528 // implementations of std::vector, but it seems to be working for now...
529 n += aMallocSizeOf(mPfxInstrs.data());
530
531 if (mUniqifier) {
532 n += mUniqifier->shallowSizeOfIncludingThis(aMallocSizeOf);
533 }
534 n += aMallocSizeOf(mDictionary.data());
535 n += aMallocSizeOf(mExtents.data());
536
537 return n;
538 }
539
540 ////////////////////////////////////////////////////////////////
541 // SegArray //
542 ////////////////////////////////////////////////////////////////
543
544 // A SegArray holds a set of address ranges that together exactly
545 // cover an address range, with no overlaps or holes. Each range has
546 // an associated value, which in this case has been specialised to be
547 // a simple boolean. The representation is kept to minimal canonical
548 // form in which adjacent ranges with the same associated value are
549 // merged together. Each range is represented by a |struct Seg|.
550 //
551 // SegArrays are used to keep track of which parts of the address
552 // space are known to contain instructions.
553 class SegArray {
554 public:
add(uintptr_t lo,uintptr_t hi,bool val)555 void add(uintptr_t lo, uintptr_t hi, bool val) {
556 if (lo > hi) {
557 return;
558 }
559 split_at(lo);
560 if (hi < UINTPTR_MAX) {
561 split_at(hi + 1);
562 }
563 std::vector<Seg>::size_type iLo, iHi, i;
564 iLo = find(lo);
565 iHi = find(hi);
566 for (i = iLo; i <= iHi; ++i) {
567 mSegs[i].val = val;
568 }
569 preen();
570 }
571
572 // RUNS IN NO-MALLOC CONTEXT
getBoundingCodeSegment(uintptr_t * rx_min,uintptr_t * rx_max,uintptr_t addr)573 bool getBoundingCodeSegment(/*OUT*/ uintptr_t* rx_min,
574 /*OUT*/ uintptr_t* rx_max, uintptr_t addr) {
575 std::vector<Seg>::size_type i = find(addr);
576 if (!mSegs[i].val) {
577 return false;
578 }
579 *rx_min = mSegs[i].lo;
580 *rx_max = mSegs[i].hi;
581 return true;
582 }
583
SegArray()584 SegArray() {
585 Seg s(0, UINTPTR_MAX, false);
586 mSegs.push_back(s);
587 }
588
589 private:
590 struct Seg {
Seglul::SegArray::Seg591 Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {}
592 uintptr_t lo;
593 uintptr_t hi;
594 bool val;
595 };
596
preen()597 void preen() {
598 for (std::vector<Seg>::iterator iter = mSegs.begin();
599 iter < mSegs.end() - 1; ++iter) {
600 if (iter[0].val != iter[1].val) {
601 continue;
602 }
603 iter[0].hi = iter[1].hi;
604 mSegs.erase(iter + 1);
605 // Back up one, so as not to miss an opportunity to merge
606 // with the entry after this one.
607 --iter;
608 }
609 }
610
611 // RUNS IN NO-MALLOC CONTEXT
find(uintptr_t a)612 std::vector<Seg>::size_type find(uintptr_t a) {
613 long int lo = 0;
614 long int hi = (long int)mSegs.size();
615 while (true) {
616 // The unsearched space is lo .. hi inclusive.
617 if (lo > hi) {
618 // Not found. This can't happen.
619 return (std::vector<Seg>::size_type)(-1);
620 }
621 long int mid = lo + ((hi - lo) / 2);
622 uintptr_t mid_lo = mSegs[mid].lo;
623 uintptr_t mid_hi = mSegs[mid].hi;
624 if (a < mid_lo) {
625 hi = mid - 1;
626 continue;
627 }
628 if (a > mid_hi) {
629 lo = mid + 1;
630 continue;
631 }
632 return (std::vector<Seg>::size_type)mid;
633 }
634 }
635
split_at(uintptr_t a)636 void split_at(uintptr_t a) {
637 std::vector<Seg>::size_type i = find(a);
638 if (mSegs[i].lo == a) {
639 return;
640 }
641 mSegs.insert(mSegs.begin() + i + 1, mSegs[i]);
642 mSegs[i].hi = a - 1;
643 mSegs[i + 1].lo = a;
644 }
645
show()646 void show() {
647 printf("<< %d entries:\n", (int)mSegs.size());
648 for (std::vector<Seg>::iterator iter = mSegs.begin(); iter < mSegs.end();
649 ++iter) {
650 printf(" %016llx %016llx %s\n", (unsigned long long int)(*iter).lo,
651 (unsigned long long int)(*iter).hi,
652 (*iter).val ? "true" : "false");
653 }
654 printf(">>\n");
655 }
656
657 std::vector<Seg> mSegs;
658 };
659
660 ////////////////////////////////////////////////////////////////
661 // PriMap //
662 ////////////////////////////////////////////////////////////////
663
664 class PriMap {
665 public:
PriMap(void (* aLog)(const char *))666 explicit PriMap(void (*aLog)(const char*)) : mLog(aLog) {}
667
668 // RUNS IN NO-MALLOC CONTEXT
Lookup(uintptr_t ia)669 pair<const RuleSet*, const vector<PfxInstr>*> Lookup(uintptr_t ia) {
670 SecMap* sm = FindSecMap(ia);
671 return pair<const RuleSet*, const vector<PfxInstr>*>(
672 sm ? sm->FindRuleSet(ia) : nullptr, sm ? sm->GetPfxInstrs() : nullptr);
673 }
674
675 // Add a secondary map. No overlaps allowed w.r.t. existing
676 // secondary maps.
AddSecMap(mozilla::UniquePtr<SecMap> && aSecMap)677 void AddSecMap(mozilla::UniquePtr<SecMap>&& aSecMap) {
678 // We can't add an empty SecMap to the PriMap. But that's OK
679 // since we'd never be able to find anything in it anyway.
680 if (aSecMap->IsEmpty()) {
681 return;
682 }
683
684 // Iterate through the SecMaps and find the right place for this
685 // one. At the same time, ensure that the in-order
686 // non-overlapping invariant is preserved (and, generally, holds).
687 // FIXME: this gives a cost that is O(N^2) in the total number of
688 // shared objects in the system. ToDo: better.
689 MOZ_ASSERT(aSecMap->mMapMinAVMA <= aSecMap->mMapMaxAVMA);
690
691 size_t num_secMaps = mSecMaps.size();
692 uintptr_t i;
693 for (i = 0; i < num_secMaps; ++i) {
694 mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
695 MOZ_ASSERT(sm_i->mMapMinAVMA <= sm_i->mMapMaxAVMA);
696 if (aSecMap->mMapMinAVMA < sm_i->mMapMaxAVMA) {
697 // |aSecMap| needs to be inserted immediately before mSecMaps[i].
698 break;
699 }
700 }
701 MOZ_ASSERT(i <= num_secMaps);
702 if (i == num_secMaps) {
703 // It goes at the end.
704 mSecMaps.push_back(std::move(aSecMap));
705 } else {
706 std::vector<mozilla::UniquePtr<SecMap>>::iterator iter =
707 mSecMaps.begin() + i;
708 mSecMaps.insert(iter, std::move(aSecMap));
709 }
710 char buf[100];
711 SprintfLiteral(buf, "AddSecMap: now have %d SecMaps\n",
712 (int)mSecMaps.size());
713 buf[sizeof(buf) - 1] = 0;
714 mLog(buf);
715 }
716
717 // Remove and delete any SecMaps in the mapping, that intersect
718 // with the specified address range.
RemoveSecMapsInRange(uintptr_t avma_min,uintptr_t avma_max)719 void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) {
720 MOZ_ASSERT(avma_min <= avma_max);
721 size_t num_secMaps = mSecMaps.size();
722 if (num_secMaps > 0) {
723 intptr_t i;
724 // Iterate from end to start over the vector, so as to ensure
725 // that the special case where |avma_min| and |avma_max| denote
726 // the entire address space, can be completed in time proportional
727 // to the number of elements in the map.
728 for (i = (intptr_t)num_secMaps - 1; i >= 0; i--) {
729 mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
730 if (sm_i->mMapMaxAVMA < avma_min || avma_max < sm_i->mMapMinAVMA) {
731 // There's no overlap. Move on.
732 continue;
733 }
734 // We need to remove mSecMaps[i] and slide all those above it
735 // downwards to cover the hole.
736 mSecMaps.erase(mSecMaps.begin() + i);
737 }
738 }
739 }
740
741 // Return the number of currently contained SecMaps.
CountSecMaps()742 size_t CountSecMaps() { return mSecMaps.size(); }
743
SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const744 size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
745 size_t n = aMallocSizeOf(this);
746
747 // It's conceivable that this call would be unsafe with some
748 // implementations of std::vector, but it seems to be working for now...
749 n += aMallocSizeOf(mSecMaps.data());
750
751 for (size_t i = 0; i < mSecMaps.size(); i++) {
752 n += mSecMaps[i]->SizeOfIncludingThis(aMallocSizeOf);
753 }
754
755 return n;
756 }
757
758 private:
759 // RUNS IN NO-MALLOC CONTEXT
FindSecMap(uintptr_t ia)760 SecMap* FindSecMap(uintptr_t ia) {
761 // Binary search mSecMaps to find one that brackets |ia|.
762 // lo and hi need to be signed, else the loop termination tests
763 // don't work properly.
764 long int lo = 0;
765 long int hi = (long int)mSecMaps.size() - 1;
766 while (true) {
767 // current unsearched space is from lo to hi, inclusive.
768 if (lo > hi) {
769 // not found
770 return nullptr;
771 }
772 long int mid = lo + ((hi - lo) / 2);
773 mozilla::UniquePtr<SecMap>& mid_secMap = mSecMaps[mid];
774 uintptr_t mid_minAddr = mid_secMap->mMapMinAVMA;
775 uintptr_t mid_maxAddr = mid_secMap->mMapMaxAVMA;
776 if (ia < mid_minAddr) {
777 hi = mid - 1;
778 continue;
779 }
780 if (ia > mid_maxAddr) {
781 lo = mid + 1;
782 continue;
783 }
784 MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
785 return mid_secMap.get();
786 }
787 // NOTREACHED
788 }
789
790 private:
791 // sorted array of per-object ranges, non overlapping, non empty
792 std::vector<mozilla::UniquePtr<SecMap>> mSecMaps;
793
794 // a logging sink, for debugging.
795 void (*mLog)(const char*);
796 };
797
798 ////////////////////////////////////////////////////////////////
799 // LUL //
800 ////////////////////////////////////////////////////////////////
801
802 #define LUL_LOG(_str) \
803 do { \
804 char buf[200]; \
805 SprintfLiteral(buf, "LUL: pid %" PRIu64 " tid %" PRIu64 " lul-obj %p: %s", \
806 uint64_t(profiler_current_process_id().ToNumber()), \
807 uint64_t(profiler_current_thread_id().ToNumber()), this, \
808 (_str)); \
809 buf[sizeof(buf) - 1] = 0; \
810 mLog(buf); \
811 } while (0)
812
LUL(void (* aLog)(const char *))813 LUL::LUL(void (*aLog)(const char*))
814 : mLog(aLog),
815 mAdminMode(true),
816 mAdminThreadId(profiler_current_thread_id()),
817 mPriMap(new PriMap(aLog)),
818 mSegArray(new SegArray()),
819 mUSU(new UniqueStringUniverse()) {
820 LUL_LOG("LUL::LUL: Created object");
821 }
822
~LUL()823 LUL::~LUL() {
824 LUL_LOG("LUL::~LUL: Destroyed object");
825 delete mPriMap;
826 delete mSegArray;
827 mLog = nullptr;
828 delete mUSU;
829 }
830
MaybeShowStats()831 void LUL::MaybeShowStats() {
832 // This is racey in the sense that it can't guarantee that
833 // n_new == n_new_Context + n_new_CFI + n_new_Scanned
834 // if it should happen that mStats is updated by some other thread
835 // in between computation of n_new and n_new_{Context,CFI,FP}.
836 // But it's just stats printing, so we don't really care.
837 uint32_t n_new = mStats - mStatsPrevious;
838 if (n_new >= 5000) {
839 uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext;
840 uint32_t n_new_CFI = mStats.mCFI - mStatsPrevious.mCFI;
841 uint32_t n_new_FP = mStats.mFP - mStatsPrevious.mFP;
842 mStatsPrevious = mStats;
843 char buf[200];
844 SprintfLiteral(buf,
845 "LUL frame stats: TOTAL %5u"
846 " CTX %4u CFI %4u FP %4u",
847 n_new, n_new_Context, n_new_CFI, n_new_FP);
848 buf[sizeof(buf) - 1] = 0;
849 mLog(buf);
850 }
851 }
852
SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const853 size_t LUL::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
854 size_t n = aMallocSizeOf(this);
855 n += mPriMap->SizeOfIncludingThis(aMallocSizeOf);
856
857 // Measurement of the following members may be added later if DMD finds it
858 // is worthwhile:
859 // - mSegArray
860 // - mUSU
861
862 return n;
863 }
864
EnableUnwinding()865 void LUL::EnableUnwinding() {
866 LUL_LOG("LUL::EnableUnwinding");
867 // Don't assert for Admin mode here. That is, tolerate a call here
868 // if we are already in Unwinding mode.
869 MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
870
871 mAdminMode = false;
872 }
873
NotifyAfterMap(uintptr_t aRXavma,size_t aSize,const char * aFileName,const void * aMappedImage)874 void LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName,
875 const void* aMappedImage) {
876 MOZ_RELEASE_ASSERT(mAdminMode);
877 MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
878
879 mLog(":\n");
880 char buf[200];
881 SprintfLiteral(buf, "NotifyMap %llx %llu %s\n",
882 (unsigned long long int)aRXavma, (unsigned long long int)aSize,
883 aFileName);
884 buf[sizeof(buf) - 1] = 0;
885 mLog(buf);
886
887 // We can't have a SecMap covering more than 2^32-1 bytes of address space.
888 // See the definition of SecMap for why. Rather than crash the system, just
889 // limit the SecMap's size accordingly. This case is never actually
890 // expected to happen.
891 if (((unsigned long long int)aSize) > 0xFFFFFFFFULL) {
892 aSize = (uintptr_t)0xFFFFFFFF;
893 }
894 MOZ_RELEASE_ASSERT(aSize <= 0xFFFFFFFF);
895
896 // Ignore obviously-stupid notifications.
897 if (aSize > 0) {
898 // Here's a new mapping, for this object.
899 mozilla::UniquePtr<SecMap> smap =
900 mozilla::MakeUnique<SecMap>(aRXavma, (uint32_t)aSize, mLog);
901
902 // Read CFI or EXIDX unwind data into |smap|.
903 if (!aMappedImage) {
904 (void)lul::ReadSymbolData(string(aFileName), std::vector<string>(),
905 smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
906 } else {
907 (void)lul::ReadSymbolDataInternal(
908 (const uint8_t*)aMappedImage, string(aFileName),
909 std::vector<string>(), smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
910 }
911
912 mLog("NotifyMap .. preparing entries\n");
913
914 smap->PrepareRuleSets();
915
916 SprintfLiteral(buf, "NotifyMap got %lld entries\n",
917 (long long int)smap->Size());
918 buf[sizeof(buf) - 1] = 0;
919 mLog(buf);
920
921 // Add it to the primary map (the top level set of mapped objects).
922 mPriMap->AddSecMap(std::move(smap));
923
924 // Tell the segment array about the mapping, so that the stack
925 // scan and __kernel_syscall mechanisms know where valid code is.
926 mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
927 }
928 }
929
NotifyExecutableArea(uintptr_t aRXavma,size_t aSize)930 void LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize) {
931 MOZ_RELEASE_ASSERT(mAdminMode);
932 MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
933
934 mLog(":\n");
935 char buf[200];
936 SprintfLiteral(buf, "NotifyExecutableArea %llx %llu\n",
937 (unsigned long long int)aRXavma,
938 (unsigned long long int)aSize);
939 buf[sizeof(buf) - 1] = 0;
940 mLog(buf);
941
942 // Ignore obviously-stupid notifications.
943 if (aSize > 0) {
944 // Tell the segment array about the mapping, so that the stack
945 // scan and __kernel_syscall mechanisms know where valid code is.
946 mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
947 }
948 }
949
NotifyBeforeUnmap(uintptr_t aRXavmaMin,uintptr_t aRXavmaMax)950 void LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax) {
951 MOZ_RELEASE_ASSERT(mAdminMode);
952 MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
953
954 mLog(":\n");
955 char buf[100];
956 SprintfLiteral(buf, "NotifyUnmap %016llx-%016llx\n",
957 (unsigned long long int)aRXavmaMin,
958 (unsigned long long int)aRXavmaMax);
959 buf[sizeof(buf) - 1] = 0;
960 mLog(buf);
961
962 MOZ_ASSERT(aRXavmaMin <= aRXavmaMax);
963
964 // Remove from the primary map, any secondary maps that intersect
965 // with the address range. Also delete the secondary maps.
966 mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax);
967
968 // Tell the segment array that the address range no longer
969 // contains valid code.
970 mSegArray->add(aRXavmaMin, aRXavmaMax, false);
971
972 SprintfLiteral(buf, "NotifyUnmap: now have %d SecMaps\n",
973 (int)mPriMap->CountSecMaps());
974 buf[sizeof(buf) - 1] = 0;
975 mLog(buf);
976 }
977
CountMappings()978 size_t LUL::CountMappings() {
979 MOZ_RELEASE_ASSERT(mAdminMode);
980 MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
981
982 return mPriMap->CountSecMaps();
983 }
984
985 // RUNS IN NO-MALLOC CONTEXT
DerefTUW(TaggedUWord aAddr,const StackImage * aStackImg)986 static TaggedUWord DerefTUW(TaggedUWord aAddr, const StackImage* aStackImg) {
987 if (!aAddr.Valid()) {
988 return TaggedUWord();
989 }
990
991 // Lower limit check. |aAddr.Value()| is the lowest requested address
992 // and |aStackImg->mStartAvma| is the lowest address we actually have,
993 // so the comparison is straightforward.
994 if (aAddr.Value() < aStackImg->mStartAvma) {
995 return TaggedUWord();
996 }
997
998 // Upper limit check. We must compute the highest requested address
999 // and the highest address we actually have, but being careful to
1000 // avoid overflow. In particular if |aAddr| is 0xFFF...FFF or the
1001 // 3/7 values below that, then we will get overflow. See bug #1245477.
1002 typedef CheckedInt<uintptr_t> CheckedUWord;
1003 CheckedUWord highest_requested_plus_one =
1004 CheckedUWord(aAddr.Value()) + CheckedUWord(sizeof(uintptr_t));
1005 CheckedUWord highest_available_plus_one =
1006 CheckedUWord(aStackImg->mStartAvma) + CheckedUWord(aStackImg->mLen);
1007 if (!highest_requested_plus_one.isValid() // overflow?
1008 || !highest_available_plus_one.isValid() // overflow?
1009 || (highest_requested_plus_one.value() >
1010 highest_available_plus_one.value())) { // in range?
1011 return TaggedUWord();
1012 }
1013
1014 return TaggedUWord(
1015 *(uintptr_t*)(&aStackImg
1016 ->mContents[aAddr.Value() - aStackImg->mStartAvma]));
1017 }
1018
1019 // RUNS IN NO-MALLOC CONTEXT
EvaluateReg(int16_t aReg,const UnwindRegs * aOldRegs,TaggedUWord aCFA)1020 static TaggedUWord EvaluateReg(int16_t aReg, const UnwindRegs* aOldRegs,
1021 TaggedUWord aCFA) {
1022 switch (aReg) {
1023 case DW_REG_CFA:
1024 return aCFA;
1025 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1026 case DW_REG_INTEL_XBP:
1027 return aOldRegs->xbp;
1028 case DW_REG_INTEL_XSP:
1029 return aOldRegs->xsp;
1030 case DW_REG_INTEL_XIP:
1031 return aOldRegs->xip;
1032 #elif defined(GP_ARCH_arm)
1033 case DW_REG_ARM_R7:
1034 return aOldRegs->r7;
1035 case DW_REG_ARM_R11:
1036 return aOldRegs->r11;
1037 case DW_REG_ARM_R12:
1038 return aOldRegs->r12;
1039 case DW_REG_ARM_R13:
1040 return aOldRegs->r13;
1041 case DW_REG_ARM_R14:
1042 return aOldRegs->r14;
1043 case DW_REG_ARM_R15:
1044 return aOldRegs->r15;
1045 #elif defined(GP_ARCH_arm64)
1046 case DW_REG_AARCH64_X29:
1047 return aOldRegs->x29;
1048 case DW_REG_AARCH64_X30:
1049 return aOldRegs->x30;
1050 case DW_REG_AARCH64_SP:
1051 return aOldRegs->sp;
1052 #elif defined(GP_ARCH_mips64)
1053 case DW_REG_MIPS_SP:
1054 return aOldRegs->sp;
1055 case DW_REG_MIPS_FP:
1056 return aOldRegs->fp;
1057 case DW_REG_MIPS_PC:
1058 return aOldRegs->pc;
1059 #else
1060 # error "Unsupported arch"
1061 #endif
1062 default:
1063 MOZ_ASSERT(0);
1064 return TaggedUWord();
1065 }
1066 }
1067
1068 // RUNS IN NO-MALLOC CONTEXT
1069 // See prototype for comment.
EvaluatePfxExpr(int32_t start,const UnwindRegs * aOldRegs,TaggedUWord aCFA,const StackImage * aStackImg,const vector<PfxInstr> & aPfxInstrs)1070 TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs,
1071 TaggedUWord aCFA, const StackImage* aStackImg,
1072 const vector<PfxInstr>& aPfxInstrs) {
1073 // A small evaluation stack, and a stack pointer, which points to
1074 // the highest numbered in-use element.
1075 const int N_STACK = 10;
1076 TaggedUWord stack[N_STACK];
1077 int stackPointer = -1;
1078 for (int i = 0; i < N_STACK; i++) stack[i] = TaggedUWord();
1079
1080 #define PUSH(_tuw) \
1081 do { \
1082 if (stackPointer >= N_STACK - 1) goto fail; /* overflow */ \
1083 stack[++stackPointer] = (_tuw); \
1084 } while (0)
1085
1086 #define POP(_lval) \
1087 do { \
1088 if (stackPointer < 0) goto fail; /* underflow */ \
1089 _lval = stack[stackPointer--]; \
1090 } while (0)
1091
1092 // Cursor in the instruction sequence.
1093 size_t curr = start + 1;
1094
1095 // Check the start point is sane.
1096 size_t nInstrs = aPfxInstrs.size();
1097 if (start < 0 || (size_t)start >= nInstrs) goto fail;
1098
1099 {
1100 // The instruction sequence must start with PX_Start. If not,
1101 // something is seriously wrong.
1102 PfxInstr first = aPfxInstrs[start];
1103 if (first.mOpcode != PX_Start) goto fail;
1104
1105 // Push the CFA on the stack to start with (or not), as required by
1106 // the original DW_OP_*expression* CFI.
1107 if (first.mOperand != 0) PUSH(aCFA);
1108 }
1109
1110 while (true) {
1111 if (curr >= nInstrs) goto fail; // ran off the end of the sequence
1112
1113 PfxInstr pfxi = aPfxInstrs[curr++];
1114 if (pfxi.mOpcode == PX_End) break; // we're done
1115
1116 switch (pfxi.mOpcode) {
1117 case PX_Start:
1118 // This should appear only at the start of the sequence.
1119 goto fail;
1120 case PX_End:
1121 // We just took care of that, so we shouldn't see it again.
1122 MOZ_ASSERT(0);
1123 goto fail;
1124 case PX_SImm32:
1125 PUSH(TaggedUWord((intptr_t)pfxi.mOperand));
1126 break;
1127 case PX_DwReg: {
1128 DW_REG_NUMBER reg = (DW_REG_NUMBER)pfxi.mOperand;
1129 MOZ_ASSERT(reg != DW_REG_CFA);
1130 PUSH(EvaluateReg(reg, aOldRegs, aCFA));
1131 break;
1132 }
1133 case PX_Deref: {
1134 TaggedUWord addr;
1135 POP(addr);
1136 PUSH(DerefTUW(addr, aStackImg));
1137 break;
1138 }
1139 case PX_Add: {
1140 TaggedUWord x, y;
1141 POP(x);
1142 POP(y);
1143 PUSH(y + x);
1144 break;
1145 }
1146 case PX_Sub: {
1147 TaggedUWord x, y;
1148 POP(x);
1149 POP(y);
1150 PUSH(y - x);
1151 break;
1152 }
1153 case PX_And: {
1154 TaggedUWord x, y;
1155 POP(x);
1156 POP(y);
1157 PUSH(y & x);
1158 break;
1159 }
1160 case PX_Or: {
1161 TaggedUWord x, y;
1162 POP(x);
1163 POP(y);
1164 PUSH(y | x);
1165 break;
1166 }
1167 case PX_CmpGES: {
1168 TaggedUWord x, y;
1169 POP(x);
1170 POP(y);
1171 PUSH(y.CmpGEs(x));
1172 break;
1173 }
1174 case PX_Shl: {
1175 TaggedUWord x, y;
1176 POP(x);
1177 POP(y);
1178 PUSH(y << x);
1179 break;
1180 }
1181 default:
1182 MOZ_ASSERT(0);
1183 goto fail;
1184 }
1185 } // while (true)
1186
1187 // Evaluation finished. The top value on the stack is the result.
1188 if (stackPointer >= 0) {
1189 return stack[stackPointer];
1190 }
1191 // Else fall through
1192
1193 fail:
1194 return TaggedUWord();
1195
1196 #undef PUSH
1197 #undef POP
1198 }
1199
1200 // RUNS IN NO-MALLOC CONTEXT
EvaluateExpr(const UnwindRegs * aOldRegs,TaggedUWord aCFA,const StackImage * aStackImg,const vector<PfxInstr> * aPfxInstrs) const1201 TaggedUWord LExpr::EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA,
1202 const StackImage* aStackImg,
1203 const vector<PfxInstr>* aPfxInstrs) const {
1204 switch (mHow) {
1205 case UNKNOWN:
1206 return TaggedUWord();
1207 case NODEREF: {
1208 TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
1209 tuw = tuw + TaggedUWord((intptr_t)mOffset);
1210 return tuw;
1211 }
1212 case DEREF: {
1213 TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
1214 tuw = tuw + TaggedUWord((intptr_t)mOffset);
1215 return DerefTUW(tuw, aStackImg);
1216 }
1217 case PFXEXPR: {
1218 MOZ_ASSERT(aPfxInstrs);
1219 if (!aPfxInstrs) {
1220 return TaggedUWord();
1221 }
1222 return EvaluatePfxExpr(mOffset, aOldRegs, aCFA, aStackImg, *aPfxInstrs);
1223 }
1224 default:
1225 MOZ_ASSERT(0);
1226 return TaggedUWord();
1227 }
1228 }
1229
1230 // RUNS IN NO-MALLOC CONTEXT
UseRuleSet(UnwindRegs * aRegs,const StackImage * aStackImg,const RuleSet * aRS,const vector<PfxInstr> * aPfxInstrs)1231 static void UseRuleSet(/*MOD*/ UnwindRegs* aRegs, const StackImage* aStackImg,
1232 const RuleSet* aRS, const vector<PfxInstr>* aPfxInstrs) {
1233 // Take a copy of regs, since we'll need to refer to the old values
1234 // whilst computing the new ones.
1235 UnwindRegs old_regs = *aRegs;
1236
1237 // Mark all the current register values as invalid, so that the
1238 // caller can see, on our return, which ones have been computed
1239 // anew. If we don't even manage to compute a new PC value, then
1240 // the caller will have to abandon the unwind.
1241 // FIXME: Create and use instead: aRegs->SetAllInvalid();
1242 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1243 aRegs->xbp = TaggedUWord();
1244 aRegs->xsp = TaggedUWord();
1245 aRegs->xip = TaggedUWord();
1246 #elif defined(GP_ARCH_arm)
1247 aRegs->r7 = TaggedUWord();
1248 aRegs->r11 = TaggedUWord();
1249 aRegs->r12 = TaggedUWord();
1250 aRegs->r13 = TaggedUWord();
1251 aRegs->r14 = TaggedUWord();
1252 aRegs->r15 = TaggedUWord();
1253 #elif defined(GP_ARCH_arm64)
1254 aRegs->x29 = TaggedUWord();
1255 aRegs->x30 = TaggedUWord();
1256 aRegs->sp = TaggedUWord();
1257 aRegs->pc = TaggedUWord();
1258 #elif defined(GP_ARCH_mips64)
1259 aRegs->sp = TaggedUWord();
1260 aRegs->fp = TaggedUWord();
1261 aRegs->pc = TaggedUWord();
1262 #else
1263 # error "Unsupported arch"
1264 #endif
1265
1266 // This is generally useful.
1267 const TaggedUWord inval = TaggedUWord();
1268
1269 // First, compute the CFA.
1270 TaggedUWord cfa = aRS->mCfaExpr.EvaluateExpr(&old_regs, inval /*old cfa*/,
1271 aStackImg, aPfxInstrs);
1272
1273 // If we didn't manage to compute the CFA, well .. that's ungood,
1274 // but keep going anyway. It'll be OK provided none of the register
1275 // value rules mention the CFA. In any case, compute the new values
1276 // for each register that we're tracking.
1277
1278 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1279 aRegs->xbp =
1280 aRS->mXbpExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1281 aRegs->xsp =
1282 aRS->mXspExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1283 aRegs->xip =
1284 aRS->mXipExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1285 #elif defined(GP_ARCH_arm)
1286 aRegs->r7 = aRS->mR7expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1287 aRegs->r11 =
1288 aRS->mR11expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1289 aRegs->r12 =
1290 aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1291 aRegs->r13 =
1292 aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1293 aRegs->r14 =
1294 aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1295 aRegs->r15 =
1296 aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1297 #elif defined(GP_ARCH_arm64)
1298 aRegs->x29 =
1299 aRS->mX29expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1300 aRegs->x30 =
1301 aRS->mX30expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1302 aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1303 #elif defined(GP_ARCH_mips64)
1304 aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1305 aRegs->fp = aRS->mFPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1306 aRegs->pc = aRS->mPCexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1307 #else
1308 # error "Unsupported arch"
1309 #endif
1310
1311 // We're done. Any regs for which we didn't manage to compute a
1312 // new value will now be marked as invalid.
1313 }
1314
1315 // RUNS IN NO-MALLOC CONTEXT
Unwind(uintptr_t * aFramePCs,uintptr_t * aFrameSPs,size_t * aFramesUsed,size_t * aFramePointerFramesAcquired,size_t aFramesAvail,UnwindRegs * aStartRegs,StackImage * aStackImg)1316 void LUL::Unwind(/*OUT*/ uintptr_t* aFramePCs,
1317 /*OUT*/ uintptr_t* aFrameSPs,
1318 /*OUT*/ size_t* aFramesUsed,
1319 /*OUT*/ size_t* aFramePointerFramesAcquired,
1320 size_t aFramesAvail, UnwindRegs* aStartRegs,
1321 StackImage* aStackImg) {
1322 MOZ_RELEASE_ASSERT(!mAdminMode);
1323
1324 /////////////////////////////////////////////////////////
1325 // BEGIN UNWIND
1326
1327 *aFramesUsed = 0;
1328
1329 UnwindRegs regs = *aStartRegs;
1330 TaggedUWord last_valid_sp = TaggedUWord();
1331
1332 while (true) {
1333 if (DEBUG_MAIN) {
1334 char buf[300];
1335 mLog("\n");
1336 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1337 SprintfLiteral(
1338 buf, "LoopTop: rip %d/%llx rsp %d/%llx rbp %d/%llx\n",
1339 (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(),
1340 (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(),
1341 (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value());
1342 buf[sizeof(buf) - 1] = 0;
1343 mLog(buf);
1344 #elif defined(GP_ARCH_arm)
1345 SprintfLiteral(
1346 buf,
1347 "LoopTop: r15 %d/%llx r7 %d/%llx r11 %d/%llx"
1348 " r12 %d/%llx r13 %d/%llx r14 %d/%llx\n",
1349 (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(),
1350 (int)regs.r7.Valid(), (unsigned long long int)regs.r7.Value(),
1351 (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(),
1352 (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(),
1353 (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(),
1354 (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value());
1355 buf[sizeof(buf) - 1] = 0;
1356 mLog(buf);
1357 #elif defined(GP_ARCH_arm64)
1358 SprintfLiteral(
1359 buf,
1360 "LoopTop: pc %d/%llx x29 %d/%llx x30 %d/%llx"
1361 " sp %d/%llx\n",
1362 (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
1363 (int)regs.x29.Valid(), (unsigned long long int)regs.x29.Value(),
1364 (int)regs.x30.Valid(), (unsigned long long int)regs.x30.Value(),
1365 (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value());
1366 buf[sizeof(buf) - 1] = 0;
1367 mLog(buf);
1368 #elif defined(GP_ARCH_mips64)
1369 SprintfLiteral(
1370 buf, "LoopTop: pc %d/%llx sp %d/%llx fp %d/%llx\n",
1371 (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
1372 (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value(),
1373 (int)regs.fp.Valid(), (unsigned long long int)regs.fp.Value());
1374 buf[sizeof(buf) - 1] = 0;
1375 mLog(buf);
1376 #else
1377 # error "Unsupported arch"
1378 #endif
1379 }
1380
1381 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1382 TaggedUWord ia = regs.xip;
1383 TaggedUWord sp = regs.xsp;
1384 #elif defined(GP_ARCH_arm)
1385 TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14);
1386 TaggedUWord sp = regs.r13;
1387 #elif defined(GP_ARCH_arm64)
1388 TaggedUWord ia = (*aFramesUsed == 0 ? regs.pc : regs.x30);
1389 TaggedUWord sp = regs.sp;
1390 #elif defined(GP_ARCH_mips64)
1391 TaggedUWord ia = regs.pc;
1392 TaggedUWord sp = regs.sp;
1393 #else
1394 # error "Unsupported arch"
1395 #endif
1396
1397 if (*aFramesUsed >= aFramesAvail) {
1398 break;
1399 }
1400
1401 // If we don't have a valid value for the PC, give up.
1402 if (!ia.Valid()) {
1403 break;
1404 }
1405
1406 // If this is the innermost frame, record the SP value, which
1407 // presumably is valid. If this isn't the innermost frame, and we
1408 // have a valid SP value, check that its SP value isn't less that
1409 // the one we've seen so far, so as to catch potential SP value
1410 // cycles.
1411 if (*aFramesUsed == 0) {
1412 last_valid_sp = sp;
1413 } else {
1414 MOZ_ASSERT(last_valid_sp.Valid());
1415 if (sp.Valid()) {
1416 if (sp.Value() < last_valid_sp.Value()) {
1417 // Hmm, SP going in the wrong direction. Let's stop.
1418 break;
1419 }
1420 // Remember where we got to.
1421 last_valid_sp = sp;
1422 }
1423 }
1424
1425 aFramePCs[*aFramesUsed] = ia.Value();
1426 aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0;
1427 (*aFramesUsed)++;
1428
1429 // Find the RuleSet for the current IA, if any. This will also
1430 // query the backing (secondary) maps if it isn't found in the
1431 // thread-local cache.
1432
1433 // If this isn't the innermost frame, back up into the calling insn.
1434 if (*aFramesUsed > 1) {
1435 ia = ia + TaggedUWord((uintptr_t)(-1));
1436 }
1437
1438 pair<const RuleSet*, const vector<PfxInstr>*> ruleset_and_pfxinstrs =
1439 mPriMap->Lookup(ia.Value());
1440 const RuleSet* ruleset = ruleset_and_pfxinstrs.first;
1441 const vector<PfxInstr>* pfxinstrs = ruleset_and_pfxinstrs.second;
1442
1443 if (DEBUG_MAIN) {
1444 char buf[100];
1445 SprintfLiteral(buf, "ruleset for 0x%llx = %p\n",
1446 (unsigned long long int)ia.Value(), ruleset);
1447 buf[sizeof(buf) - 1] = 0;
1448 mLog(buf);
1449 }
1450
1451 #if defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
1452 /////////////////////////////////////////////
1453 ////
1454 // On 32 bit x86-linux, syscalls are often done via the VDSO
1455 // function __kernel_vsyscall, which doesn't have a corresponding
1456 // object that we can read debuginfo from. That effectively kills
1457 // off all stack traces for threads blocked in syscalls. Hence
1458 // special-case by looking at the code surrounding the program
1459 // counter.
1460 //
1461 // 0xf7757420 <__kernel_vsyscall+0>: push %ecx
1462 // 0xf7757421 <__kernel_vsyscall+1>: push %edx
1463 // 0xf7757422 <__kernel_vsyscall+2>: push %ebp
1464 // 0xf7757423 <__kernel_vsyscall+3>: mov %esp,%ebp
1465 // 0xf7757425 <__kernel_vsyscall+5>: sysenter
1466 // 0xf7757427 <__kernel_vsyscall+7>: nop
1467 // 0xf7757428 <__kernel_vsyscall+8>: nop
1468 // 0xf7757429 <__kernel_vsyscall+9>: nop
1469 // 0xf775742a <__kernel_vsyscall+10>: nop
1470 // 0xf775742b <__kernel_vsyscall+11>: nop
1471 // 0xf775742c <__kernel_vsyscall+12>: nop
1472 // 0xf775742d <__kernel_vsyscall+13>: nop
1473 // 0xf775742e <__kernel_vsyscall+14>: int $0x80
1474 // 0xf7757430 <__kernel_vsyscall+16>: pop %ebp
1475 // 0xf7757431 <__kernel_vsyscall+17>: pop %edx
1476 // 0xf7757432 <__kernel_vsyscall+18>: pop %ecx
1477 // 0xf7757433 <__kernel_vsyscall+19>: ret
1478 //
1479 // In cases where the sampled thread is blocked in a syscall, its
1480 // program counter will point at "pop %ebp". Hence we look for
1481 // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and
1482 // the corresponding register-recovery actions are:
1483 // new_ebp = *(old_esp + 0)
1484 // new eip = *(old_esp + 12)
1485 // new_esp = old_esp + 16
1486 //
1487 // It may also be the case that the program counter points two
1488 // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in
1489 // the case where the syscall has been restarted but the thread
1490 // hasn't been rescheduled. The code below doesn't handle that;
1491 // it could easily be made to.
1492 //
1493 if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) {
1494 uintptr_t insns_min, insns_max;
1495 uintptr_t eip = ia.Value();
1496 bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip);
1497 if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) {
1498 uint8_t* eipC = (uint8_t*)eip;
1499 if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D &&
1500 eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) {
1501 TaggedUWord sp_plus_0 = sp;
1502 TaggedUWord sp_plus_12 = sp;
1503 TaggedUWord sp_plus_16 = sp;
1504 sp_plus_12 = sp_plus_12 + TaggedUWord(12);
1505 sp_plus_16 = sp_plus_16 + TaggedUWord(16);
1506 TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg);
1507 TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg);
1508 TaggedUWord new_esp = sp_plus_16;
1509 if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) {
1510 regs.xbp = new_ebp;
1511 regs.xip = new_eip;
1512 regs.xsp = new_esp;
1513 continue;
1514 }
1515 }
1516 }
1517 }
1518 ////
1519 /////////////////////////////////////////////
1520 #endif // defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
1521
1522 // So, do we have a ruleset for this address? If so, use it now.
1523 if (ruleset) {
1524 if (DEBUG_MAIN) {
1525 ruleset->Print(ia.Value(), 1 /*bogus, but doesn't matter*/, mLog);
1526 mLog("\n");
1527 }
1528 // Use the RuleSet to compute the registers for the previous
1529 // frame. |regs| is modified in-place.
1530 UseRuleSet(®s, aStackImg, ruleset, pfxinstrs);
1531 continue;
1532 }
1533
1534 #if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \
1535 defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \
1536 defined(GP_PLAT_amd64_freebsd)
1537 // There's no RuleSet for the specified address. On amd64/x86_linux, see if
1538 // it's possible to recover the caller's frame by using the frame pointer.
1539
1540 // We seek to compute (new_IP, new_SP, new_BP) from (old_BP, stack image),
1541 // and assume the following layout:
1542 //
1543 // <--- new_SP
1544 // +----------+
1545 // | new_IP | (return address)
1546 // +----------+
1547 // | new_BP | <--- old_BP
1548 // +----------+
1549 // | .... |
1550 // | .... |
1551 // | .... |
1552 // +----------+ <---- old_SP (arbitrary, but must be <= old_BP)
1553
1554 const size_t wordSzB = sizeof(uintptr_t);
1555 TaggedUWord old_xsp = regs.xsp;
1556
1557 // points at new_BP ?
1558 TaggedUWord old_xbp = regs.xbp;
1559 // points at new_IP ?
1560 TaggedUWord old_xbp_plus1 = regs.xbp + TaggedUWord(1 * wordSzB);
1561 // is the new_SP ?
1562 TaggedUWord old_xbp_plus2 = regs.xbp + TaggedUWord(2 * wordSzB);
1563
1564 if (old_xbp.Valid() && old_xbp.IsAligned() && old_xsp.Valid() &&
1565 old_xsp.IsAligned() && old_xsp.Value() <= old_xbp.Value()) {
1566 // We don't need to do any range, alignment or validity checks for
1567 // addresses passed to DerefTUW, since that performs them itself, and
1568 // returns an invalid value on failure. Any such value will poison
1569 // subsequent uses, and we do a final check for validity before putting
1570 // the computed values into |regs|.
1571 TaggedUWord new_xbp = DerefTUW(old_xbp, aStackImg);
1572 if (new_xbp.Valid() && new_xbp.IsAligned() &&
1573 old_xbp.Value() < new_xbp.Value()) {
1574 TaggedUWord new_xip = DerefTUW(old_xbp_plus1, aStackImg);
1575 TaggedUWord new_xsp = old_xbp_plus2;
1576 if (new_xbp.Valid() && new_xip.Valid() && new_xsp.Valid()) {
1577 regs.xbp = new_xbp;
1578 regs.xip = new_xip;
1579 regs.xsp = new_xsp;
1580 (*aFramePointerFramesAcquired)++;
1581 continue;
1582 }
1583 }
1584 }
1585 #elif defined(GP_ARCH_arm64)
1586 // Here is an example of generated code for prologue and epilogue..
1587 //
1588 // stp x29, x30, [sp, #-16]!
1589 // mov x29, sp
1590 // ...
1591 // ldp x29, x30, [sp], #16
1592 // ret
1593 //
1594 // Next is another example of generated code.
1595 //
1596 // stp x20, x19, [sp, #-32]!
1597 // stp x29, x30, [sp, #16]
1598 // add x29, sp, #0x10
1599 // ...
1600 // ldp x29, x30, [sp, #16]
1601 // ldp x20, x19, [sp], #32
1602 // ret
1603 //
1604 // Previous x29 and x30 register are stored in the address of x29 register.
1605 // But since sp register value depends on local variables, we cannot compute
1606 // previous sp register from current sp/fp/lr register and there is no
1607 // regular rule for sp register in prologue. But since return address is lr
1608 // register, if x29 is valid, we will get return address without sp
1609 // register.
1610 //
1611 // So we assume the following layout that if no rule set. x29 is frame
1612 // pointer, so we will be able to compute x29 and x30 .
1613 //
1614 // +----------+ <--- new_sp (cannot compute)
1615 // | .... |
1616 // +----------+
1617 // | new_lr | (return address)
1618 // +----------+
1619 // | new_fp | <--- old_fp
1620 // +----------+
1621 // | .... |
1622 // | .... |
1623 // +----------+ <---- old_sp (arbitrary, but unused)
1624
1625 TaggedUWord old_fp = regs.x29;
1626 if (old_fp.Valid() && old_fp.IsAligned() && last_valid_sp.Valid() &&
1627 last_valid_sp.Value() <= old_fp.Value()) {
1628 TaggedUWord new_fp = DerefTUW(old_fp, aStackImg);
1629 if (new_fp.Valid() && new_fp.IsAligned() &&
1630 old_fp.Value() < new_fp.Value()) {
1631 TaggedUWord old_fp_plus1 = old_fp + TaggedUWord(8);
1632 TaggedUWord new_lr = DerefTUW(old_fp_plus1, aStackImg);
1633 if (new_lr.Valid()) {
1634 regs.x29 = new_fp;
1635 regs.x30 = new_lr;
1636 // When using frame pointer to walk stack, we cannot compute sp
1637 // register since we cannot compute sp register from fp/lr/sp
1638 // register, and there is no regular rule to compute previous sp
1639 // register. So mark as invalid.
1640 regs.sp = TaggedUWord();
1641 (*aFramePointerFramesAcquired)++;
1642 continue;
1643 }
1644 }
1645 }
1646 #endif // defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||
1647 // defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) ||
1648 // defined(GP_PLAT_amd64_freebsd)
1649
1650 // We failed to recover a frame either using CFI or FP chasing, and we
1651 // have no other ways to recover the frame. So we have to give up.
1652 break;
1653
1654 } // top level unwind loop
1655
1656 // END UNWIND
1657 /////////////////////////////////////////////////////////
1658 }
1659
1660 ////////////////////////////////////////////////////////////////
1661 // LUL Unit Testing //
1662 ////////////////////////////////////////////////////////////////
1663
1664 static const int LUL_UNIT_TEST_STACK_SIZE = 32768;
1665
1666 #if defined(GP_ARCH_mips64)
__getpc(void)1667 static __attribute__((noinline)) unsigned long __getpc(void) {
1668 unsigned long rtaddr;
1669 __asm__ volatile("move %0, $31" : "=r"(rtaddr));
1670 return rtaddr;
1671 }
1672 #endif
1673
1674 // This function is innermost in the test call sequence. It uses LUL
1675 // to unwind, and compares the result with the sequence specified in
1676 // the director string. These need to agree in order for the test to
1677 // pass. In order not to screw up the results, this function needs
1678 // to have a not-very big stack frame, since we're only presenting
1679 // the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and
1680 // that chunk unavoidably includes the frame for this function.
1681 //
1682 // This function must not be inlined into its callers. Doing so will
1683 // cause the expected-vs-actual backtrace consistency checking to
1684 // fail. Prints summary results to |aLUL|'s logging sink and also
1685 // returns a boolean indicating whether or not the test passed.
GetAndCheckStackTrace(LUL * aLUL,const char * dstring)1686 static __attribute__((noinline)) bool GetAndCheckStackTrace(
1687 LUL* aLUL, const char* dstring) {
1688 // Get hold of the current unwind-start registers.
1689 UnwindRegs startRegs;
1690 memset(&startRegs, 0, sizeof(startRegs));
1691 #if defined(GP_ARCH_amd64)
1692 volatile uintptr_t block[3];
1693 MOZ_ASSERT(sizeof(block) == 24);
1694 __asm__ __volatile__(
1695 "leaq 0(%%rip), %%r15"
1696 "\n\t"
1697 "movq %%r15, 0(%0)"
1698 "\n\t"
1699 "movq %%rsp, 8(%0)"
1700 "\n\t"
1701 "movq %%rbp, 16(%0)"
1702 "\n"
1703 :
1704 : "r"(&block[0])
1705 : "memory", "r15");
1706 startRegs.xip = TaggedUWord(block[0]);
1707 startRegs.xsp = TaggedUWord(block[1]);
1708 startRegs.xbp = TaggedUWord(block[2]);
1709 const uintptr_t REDZONE_SIZE = 128;
1710 uintptr_t start = block[1] - REDZONE_SIZE;
1711 #elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
1712 volatile uintptr_t block[3];
1713 MOZ_ASSERT(sizeof(block) == 12);
1714 __asm__ __volatile__(
1715 ".byte 0xE8,0x00,0x00,0x00,0x00" /*call next insn*/
1716 "\n\t"
1717 "popl %%edi"
1718 "\n\t"
1719 "movl %%edi, 0(%0)"
1720 "\n\t"
1721 "movl %%esp, 4(%0)"
1722 "\n\t"
1723 "movl %%ebp, 8(%0)"
1724 "\n"
1725 :
1726 : "r"(&block[0])
1727 : "memory", "edi");
1728 startRegs.xip = TaggedUWord(block[0]);
1729 startRegs.xsp = TaggedUWord(block[1]);
1730 startRegs.xbp = TaggedUWord(block[2]);
1731 const uintptr_t REDZONE_SIZE = 0;
1732 uintptr_t start = block[1] - REDZONE_SIZE;
1733 #elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
1734 volatile uintptr_t block[6];
1735 MOZ_ASSERT(sizeof(block) == 24);
1736 __asm__ __volatile__(
1737 "mov r0, r15"
1738 "\n\t"
1739 "str r0, [%0, #0]"
1740 "\n\t"
1741 "str r14, [%0, #4]"
1742 "\n\t"
1743 "str r13, [%0, #8]"
1744 "\n\t"
1745 "str r12, [%0, #12]"
1746 "\n\t"
1747 "str r11, [%0, #16]"
1748 "\n\t"
1749 "str r7, [%0, #20]"
1750 "\n"
1751 :
1752 : "r"(&block[0])
1753 : "memory", "r0");
1754 startRegs.r15 = TaggedUWord(block[0]);
1755 startRegs.r14 = TaggedUWord(block[1]);
1756 startRegs.r13 = TaggedUWord(block[2]);
1757 startRegs.r12 = TaggedUWord(block[3]);
1758 startRegs.r11 = TaggedUWord(block[4]);
1759 startRegs.r7 = TaggedUWord(block[5]);
1760 const uintptr_t REDZONE_SIZE = 0;
1761 uintptr_t start = block[1] - REDZONE_SIZE;
1762 #elif defined(GP_ARCH_arm64)
1763 volatile uintptr_t block[4];
1764 MOZ_ASSERT(sizeof(block) == 32);
1765 __asm__ __volatile__(
1766 "adr x0, . \n\t"
1767 "str x0, [%0, #0] \n\t"
1768 "str x29, [%0, #8] \n\t"
1769 "str x30, [%0, #16] \n\t"
1770 "mov x0, sp \n\t"
1771 "str x0, [%0, #24] \n\t"
1772 :
1773 : "r"(&block[0])
1774 : "memory", "x0");
1775 startRegs.pc = TaggedUWord(block[0]);
1776 startRegs.x29 = TaggedUWord(block[1]);
1777 startRegs.x30 = TaggedUWord(block[2]);
1778 startRegs.sp = TaggedUWord(block[3]);
1779 const uintptr_t REDZONE_SIZE = 0;
1780 uintptr_t start = block[1] - REDZONE_SIZE;
1781 #elif defined(GP_ARCH_mips64)
1782 volatile uintptr_t block[3];
1783 MOZ_ASSERT(sizeof(block) == 24);
1784 __asm__ __volatile__(
1785 "sd $29, 8(%0) \n"
1786 "sd $30, 16(%0) \n"
1787 :
1788 : "r"(block)
1789 : "memory");
1790 block[0] = __getpc();
1791 startRegs.pc = TaggedUWord(block[0]);
1792 startRegs.sp = TaggedUWord(block[1]);
1793 startRegs.fp = TaggedUWord(block[2]);
1794 const uintptr_t REDZONE_SIZE = 0;
1795 uintptr_t start = block[1] - REDZONE_SIZE;
1796 #else
1797 # error "Unsupported platform"
1798 #endif
1799
1800 // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the
1801 // stack.
1802 uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE;
1803 uintptr_t ws = sizeof(void*);
1804 start &= ~(ws - 1);
1805 end &= ~(ws - 1);
1806 uintptr_t nToCopy = end - start;
1807 if (nToCopy > lul::N_STACK_BYTES) {
1808 nToCopy = lul::N_STACK_BYTES;
1809 }
1810 MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
1811 StackImage* stackImg = new StackImage();
1812 stackImg->mLen = nToCopy;
1813 stackImg->mStartAvma = start;
1814 if (nToCopy > 0) {
1815 MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy);
1816 memcpy(&stackImg->mContents[0], (void*)start, nToCopy);
1817 }
1818
1819 // Unwind it.
1820 const int MAX_TEST_FRAMES = 64;
1821 uintptr_t framePCs[MAX_TEST_FRAMES];
1822 uintptr_t frameSPs[MAX_TEST_FRAMES];
1823 size_t framesAvail = mozilla::ArrayLength(framePCs);
1824 size_t framesUsed = 0;
1825 size_t framePointerFramesAcquired = 0;
1826 aLUL->Unwind(&framePCs[0], &frameSPs[0], &framesUsed,
1827 &framePointerFramesAcquired, framesAvail, &startRegs, stackImg);
1828
1829 delete stackImg;
1830
1831 // if (0) {
1832 // // Show what we have.
1833 // fprintf(stderr, "Got %d frames:\n", (int)framesUsed);
1834 // for (size_t i = 0; i < framesUsed; i++) {
1835 // fprintf(stderr, " [%2d] SP %p PC %p\n",
1836 // (int)i, (void*)frameSPs[i], (void*)framePCs[i]);
1837 // }
1838 // fprintf(stderr, "\n");
1839 //}
1840
1841 // Check to see if there's a consistent binding between digits in
1842 // the director string ('1' .. '8') and the PC values acquired by
1843 // the unwind. If there isn't, the unwinding has failed somehow.
1844 uintptr_t binding[8]; // binding for '1' .. binding for '8'
1845 memset((void*)binding, 0, sizeof(binding));
1846
1847 // The general plan is to work backwards along the director string
1848 // and forwards along the framePCs array. Doing so corresponds to
1849 // working outwards from the innermost frame of the recursive test set.
1850 const char* cursor = dstring;
1851
1852 // Find the end. This leaves |cursor| two bytes past the first
1853 // character we want to look at -- see comment below.
1854 while (*cursor) cursor++;
1855
1856 // Counts the number of consistent frames.
1857 size_t nConsistent = 0;
1858
1859 // Iterate back to the start of the director string. The starting
1860 // points are a bit complex. We can't use framePCs[0] because that
1861 // contains the PC in this frame (above). We can't use framePCs[1]
1862 // because that will contain the PC at return point in the recursive
1863 // test group (TestFn[1-8]) for their call "out" to this function,
1864 // GetAndCheckStackTrace. Although LUL will compute a correct
1865 // return address, that will not be the same return address as for a
1866 // recursive call out of the the function to another function in the
1867 // group. Hence we can only start consistency checking at
1868 // framePCs[2].
1869 //
1870 // To be consistent, then, we must ignore the last element in the
1871 // director string as that corresponds to framePCs[1]. Hence the
1872 // start points are: framePCs[2] and the director string 2 bytes
1873 // before the terminating zero.
1874 //
1875 // Also as a result of this, the number of consistent frames counted
1876 // will always be one less than the length of the director string
1877 // (not including its terminating zero).
1878 size_t frameIx;
1879 for (cursor = cursor - 2, frameIx = 2;
1880 cursor >= dstring && frameIx < framesUsed; cursor--, frameIx++) {
1881 char c = *cursor;
1882 uintptr_t pc = framePCs[frameIx];
1883 // If this doesn't hold, the director string is ill-formed.
1884 MOZ_ASSERT(c >= '1' && c <= '8');
1885 int n = ((int)c) - ((int)'1');
1886 if (binding[n] == 0) {
1887 // There's no binding for |c| yet, so install |pc| and carry on.
1888 binding[n] = pc;
1889 nConsistent++;
1890 continue;
1891 }
1892 // There's a pre-existing binding for |c|. Check it's consistent.
1893 if (binding[n] != pc) {
1894 // Not consistent. Give up now.
1895 break;
1896 }
1897 // Consistent. Keep going.
1898 nConsistent++;
1899 }
1900
1901 // So, did we succeed?
1902 bool passed = nConsistent + 1 == strlen(dstring);
1903
1904 // Show the results.
1905 char buf[200];
1906 SprintfLiteral(buf, "LULUnitTest: dstring = %s\n", dstring);
1907 buf[sizeof(buf) - 1] = 0;
1908 aLUL->mLog(buf);
1909 SprintfLiteral(buf, "LULUnitTest: %d consistent, %d in dstring: %s\n",
1910 (int)nConsistent, (int)strlen(dstring),
1911 passed ? "PASS" : "FAIL");
1912 buf[sizeof(buf) - 1] = 0;
1913 aLUL->mLog(buf);
1914
1915 return passed;
1916 }
1917
1918 // Macro magic to create a set of 8 mutually recursive functions with
1919 // varying frame sizes. These will recurse amongst themselves as
1920 // specified by |strP|, the directory string, and call
1921 // GetAndCheckStackTrace when the string becomes empty, passing it the
1922 // original value of the string. This checks the result, printing
1923 // results on |aLUL|'s logging sink, and also returns a boolean
1924 // indicating whether or not the results are acceptable (correct).
1925
1926 #define DECL_TEST_FN(NAME) \
1927 bool NAME(LUL* aLUL, const char* strPorig, const char* strP);
1928
1929 #define GEN_TEST_FN(NAME, FRAMESIZE) \
1930 bool NAME(LUL* aLUL, const char* strPorig, const char* strP) { \
1931 /* Create a frame of size (at least) FRAMESIZE, so that the */ \
1932 /* 8 functions created by this macro offer some variation in frame */ \
1933 /* sizes. This isn't as simple as it might seem, since a clever */ \
1934 /* optimizing compiler (eg, clang-5) detects that the array is unused */ \
1935 /* and removes it. We try to defeat this by passing it to a function */ \
1936 /* in a different compilation unit, and hoping that clang does not */ \
1937 /* notice that the call is a no-op. */ \
1938 char space[FRAMESIZE]; \
1939 Unused << write(1, space, 0); /* write zero bytes of |space| to stdout */ \
1940 \
1941 if (*strP == '\0') { \
1942 /* We've come to the end of the director string. */ \
1943 /* Take a stack snapshot. */ \
1944 return GetAndCheckStackTrace(aLUL, strPorig); \
1945 } else { \
1946 /* Recurse onwards. This is a bit subtle. The obvious */ \
1947 /* thing to do here is call onwards directly, from within the */ \
1948 /* arms of the case statement. That gives a problem in that */ \
1949 /* there will be multiple return points inside each function when */ \
1950 /* unwinding, so it will be difficult to check for consistency */ \
1951 /* against the director string. Instead, we make an indirect */ \
1952 /* call, so as to guarantee that there is only one call site */ \
1953 /* within each function. This does assume that the compiler */ \
1954 /* won't transform it back to the simple direct-call form. */ \
1955 /* To discourage it from doing so, the call is bracketed with */ \
1956 /* __asm__ __volatile__ sections so as to make it not-movable. */ \
1957 bool (*nextFn)(LUL*, const char*, const char*) = NULL; \
1958 switch (*strP) { \
1959 case '1': \
1960 nextFn = TestFn1; \
1961 break; \
1962 case '2': \
1963 nextFn = TestFn2; \
1964 break; \
1965 case '3': \
1966 nextFn = TestFn3; \
1967 break; \
1968 case '4': \
1969 nextFn = TestFn4; \
1970 break; \
1971 case '5': \
1972 nextFn = TestFn5; \
1973 break; \
1974 case '6': \
1975 nextFn = TestFn6; \
1976 break; \
1977 case '7': \
1978 nextFn = TestFn7; \
1979 break; \
1980 case '8': \
1981 nextFn = TestFn8; \
1982 break; \
1983 default: \
1984 nextFn = TestFn8; \
1985 break; \
1986 } \
1987 /* "use" |space| immediately after the recursive call, */ \
1988 /* so as to dissuade clang from deallocating the space while */ \
1989 /* the call is active, or otherwise messing with the stack frame. */ \
1990 __asm__ __volatile__("" ::: "cc", "memory"); \
1991 bool passed = nextFn(aLUL, strPorig, strP + 1); \
1992 Unused << write(1, space, 0); \
1993 __asm__ __volatile__("" ::: "cc", "memory"); \
1994 return passed; \
1995 } \
1996 }
1997
1998 // The test functions are mutually recursive, so it is necessary to
1999 // declare them before defining them.
2000 DECL_TEST_FN(TestFn1)
DECL_TEST_FN(TestFn2)2001 DECL_TEST_FN(TestFn2)
2002 DECL_TEST_FN(TestFn3)
2003 DECL_TEST_FN(TestFn4)
2004 DECL_TEST_FN(TestFn5)
2005 DECL_TEST_FN(TestFn6)
2006 DECL_TEST_FN(TestFn7)
2007 DECL_TEST_FN(TestFn8)
2008
2009 GEN_TEST_FN(TestFn1, 123)
2010 GEN_TEST_FN(TestFn2, 456)
2011 GEN_TEST_FN(TestFn3, 789)
2012 GEN_TEST_FN(TestFn4, 23)
2013 GEN_TEST_FN(TestFn5, 47)
2014 GEN_TEST_FN(TestFn6, 117)
2015 GEN_TEST_FN(TestFn7, 1)
2016 GEN_TEST_FN(TestFn8, 99)
2017
2018 // This starts the test sequence going. Call here to generate a
2019 // sequence of calls as directed by the string |dstring|. The call
2020 // sequence will, from its innermost frame, finish by calling
2021 // GetAndCheckStackTrace() and passing it |dstring|.
2022 // GetAndCheckStackTrace() will unwind the stack, check consistency
2023 // of those results against |dstring|, and print a pass/fail message
2024 // to aLUL's logging sink. It also updates the counters in *aNTests
2025 // and aNTestsPassed.
2026 __attribute__((noinline)) void TestUnw(/*OUT*/ int* aNTests,
2027 /*OUT*/ int* aNTestsPassed, LUL* aLUL,
2028 const char* dstring) {
2029 // Ensure that the stack has at least this much space on it. This
2030 // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes
2031 // and hand it to LUL. Safe in the sense that no segfault can
2032 // happen because the stack is at least this big. This is all
2033 // somewhat dubious in the sense that a sufficiently clever compiler
2034 // (clang, for one) can figure out that space[] is unused and delete
2035 // it from the frame. Hence the somewhat elaborate hoop jumping to
2036 // fill it up before the call and to at least appear to use the
2037 // value afterwards.
2038 int i;
2039 volatile char space[LUL_UNIT_TEST_STACK_SIZE];
2040 for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
2041 space[i] = (char)(i & 0x7F);
2042 }
2043
2044 // Really run the test.
2045 bool passed = TestFn1(aLUL, dstring, dstring);
2046
2047 // Appear to use space[], by visiting the value to compute some kind
2048 // of checksum, and then (apparently) using the checksum.
2049 int sum = 0;
2050 for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
2051 // If this doesn't fool LLVM, I don't know what will.
2052 sum += space[i] - 3 * i;
2053 }
2054 __asm__ __volatile__("" : : "r"(sum));
2055
2056 // Update the counters.
2057 (*aNTests)++;
2058 if (passed) {
2059 (*aNTestsPassed)++;
2060 }
2061 }
2062
RunLulUnitTests(int * aNTests,int * aNTestsPassed,LUL * aLUL)2063 void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed,
2064 LUL* aLUL) {
2065 aLUL->mLog(":\n");
2066 aLUL->mLog("LULUnitTest: BEGIN\n");
2067 *aNTests = *aNTestsPassed = 0;
2068 TestUnw(aNTests, aNTestsPassed, aLUL, "11111111");
2069 TestUnw(aNTests, aNTestsPassed, aLUL, "11222211");
2070 TestUnw(aNTests, aNTestsPassed, aLUL, "111222333");
2071 TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212");
2072 TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258");
2073 TestUnw(aNTests, aNTestsPassed, aLUL,
2074 "123456781122334455667788777777777777777777777");
2075 aLUL->mLog("LULUnitTest: END\n");
2076 aLUL->mLog(":\n");
2077 }
2078
2079 } // namespace lul
2080