1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "LulMain.h"
8
9 #include <string.h>
10 #include <stdlib.h>
11 #include <stdio.h>
12 #include <unistd.h> // write(), only for testing LUL
13
14 #include <algorithm> // std::sort
15 #include <string>
16 #include <utility>
17
18 #include "mozilla/Assertions.h"
19 #include "mozilla/ArrayUtils.h"
20 #include "mozilla/CheckedInt.h"
21 #include "mozilla/DebugOnly.h"
22 #include "mozilla/MemoryChecking.h"
23 #include "mozilla/Sprintf.h"
24 #include "mozilla/UniquePtr.h"
25 #include "mozilla/Unused.h"
26
27 #include "BaseProfiler.h"
28 #include "LulCommonExt.h"
29 #include "LulElfExt.h"
30 #include "LulMainInt.h"
31
32 using mozilla::baseprofiler::profiler_current_process_id;
33 using mozilla::baseprofiler::profiler_current_thread_id;
34
35 // Set this to 1 for verbose logging
36 #define DEBUG_MAIN 0
37
38 namespace lul {
39
40 using mozilla::CheckedInt;
41 using mozilla::DebugOnly;
42 using mozilla::MallocSizeOf;
43 using mozilla::Unused;
44 using std::pair;
45 using std::string;
46 using std::vector;
47
48 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
49 //
50 // Some functions in this file are marked RUNS IN NO-MALLOC CONTEXT.
51 // Any such function -- and, hence, the transitive closure of those
52 // reachable from it -- must not do any dynamic memory allocation.
53 // Doing so risks deadlock. There is exactly one root function for
54 // the transitive closure: Lul::Unwind.
55 //
56 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
57
58 ////////////////////////////////////////////////////////////////
59 // RuleSet //
60 ////////////////////////////////////////////////////////////////
61
NameOf_DW_REG(int16_t aReg)62 static const char* NameOf_DW_REG(int16_t aReg) {
63 switch (aReg) {
64 case DW_REG_CFA:
65 return "cfa";
66 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
67 case DW_REG_INTEL_XBP:
68 return "xbp";
69 case DW_REG_INTEL_XSP:
70 return "xsp";
71 case DW_REG_INTEL_XIP:
72 return "xip";
73 #elif defined(GP_ARCH_arm)
74 case DW_REG_ARM_R7:
75 return "r7";
76 case DW_REG_ARM_R11:
77 return "r11";
78 case DW_REG_ARM_R12:
79 return "r12";
80 case DW_REG_ARM_R13:
81 return "r13";
82 case DW_REG_ARM_R14:
83 return "r14";
84 case DW_REG_ARM_R15:
85 return "r15";
86 #elif defined(GP_ARCH_arm64)
87 case DW_REG_AARCH64_X29:
88 return "x29";
89 case DW_REG_AARCH64_X30:
90 return "x30";
91 case DW_REG_AARCH64_SP:
92 return "sp";
93 #elif defined(GP_ARCH_mips64)
94 case DW_REG_MIPS_SP:
95 return "sp";
96 case DW_REG_MIPS_FP:
97 return "fp";
98 case DW_REG_MIPS_PC:
99 return "pc";
100 #else
101 # error "Unsupported arch"
102 #endif
103 default:
104 return "???";
105 }
106 }
107
ShowRule(const char * aNewReg) const108 string LExpr::ShowRule(const char* aNewReg) const {
109 char buf[64];
110 string res = string(aNewReg) + "=";
111 switch (mHow) {
112 case UNKNOWN:
113 res += "Unknown";
114 break;
115 case NODEREF:
116 SprintfLiteral(buf, "%s+%d", NameOf_DW_REG(mReg), (int)mOffset);
117 res += buf;
118 break;
119 case DEREF:
120 SprintfLiteral(buf, "*(%s+%d)", NameOf_DW_REG(mReg), (int)mOffset);
121 res += buf;
122 break;
123 case PFXEXPR:
124 SprintfLiteral(buf, "PfxExpr-at-%d", (int)mOffset);
125 res += buf;
126 break;
127 default:
128 res += "???";
129 break;
130 }
131 return res;
132 }
133
Print(void (* aLog)(const char *)) const134 void RuleSet::Print(void (*aLog)(const char*)) const {
135 char buf[96];
136 SprintfLiteral(buf, "[%llx .. %llx]: let ", (unsigned long long int)mAddr,
137 (unsigned long long int)(mAddr + mLen - 1));
138 string res = string(buf);
139 res += mCfaExpr.ShowRule("cfa");
140 res += " in";
141 // For each reg we care about, print the recovery expression.
142 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
143 res += mXipExpr.ShowRule(" RA");
144 res += mXspExpr.ShowRule(" SP");
145 res += mXbpExpr.ShowRule(" BP");
146 #elif defined(GP_ARCH_arm)
147 res += mR15expr.ShowRule(" R15");
148 res += mR7expr.ShowRule(" R7");
149 res += mR11expr.ShowRule(" R11");
150 res += mR12expr.ShowRule(" R12");
151 res += mR13expr.ShowRule(" R13");
152 res += mR14expr.ShowRule(" R14");
153 #elif defined(GP_ARCH_arm64)
154 res += mX29expr.ShowRule(" X29");
155 res += mX30expr.ShowRule(" X30");
156 res += mSPexpr.ShowRule(" SP");
157 #elif defined(GP_ARCH_mips64)
158 res += mPCexpr.ShowRule(" PC");
159 res += mSPexpr.ShowRule(" SP");
160 res += mFPexpr.ShowRule(" FP");
161 #else
162 # error "Unsupported arch"
163 #endif
164 aLog(res.c_str());
165 }
166
ExprForRegno(DW_REG_NUMBER aRegno)167 LExpr* RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) {
168 switch (aRegno) {
169 case DW_REG_CFA:
170 return &mCfaExpr;
171 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
172 case DW_REG_INTEL_XIP:
173 return &mXipExpr;
174 case DW_REG_INTEL_XSP:
175 return &mXspExpr;
176 case DW_REG_INTEL_XBP:
177 return &mXbpExpr;
178 #elif defined(GP_ARCH_arm)
179 case DW_REG_ARM_R15:
180 return &mR15expr;
181 case DW_REG_ARM_R14:
182 return &mR14expr;
183 case DW_REG_ARM_R13:
184 return &mR13expr;
185 case DW_REG_ARM_R12:
186 return &mR12expr;
187 case DW_REG_ARM_R11:
188 return &mR11expr;
189 case DW_REG_ARM_R7:
190 return &mR7expr;
191 #elif defined(GP_ARCH_arm64)
192 case DW_REG_AARCH64_X29:
193 return &mX29expr;
194 case DW_REG_AARCH64_X30:
195 return &mX30expr;
196 case DW_REG_AARCH64_SP:
197 return &mSPexpr;
198 #elif defined(GP_ARCH_mips64)
199 case DW_REG_MIPS_SP:
200 return &mSPexpr;
201 case DW_REG_MIPS_FP:
202 return &mFPexpr;
203 case DW_REG_MIPS_PC:
204 return &mPCexpr;
205 #else
206 # error "Unknown arch"
207 #endif
208 default:
209 return nullptr;
210 }
211 }
212
RuleSet()213 RuleSet::RuleSet() {
214 mAddr = 0;
215 mLen = 0;
216 // The only other fields are of type LExpr and those are initialised
217 // by LExpr::LExpr().
218 }
219
220 ////////////////////////////////////////////////////////////////
221 // SecMap //
222 ////////////////////////////////////////////////////////////////
223
224 // See header file LulMainInt.h for comments about invariants.
225
SecMap(void (* aLog)(const char *))226 SecMap::SecMap(void (*aLog)(const char*))
227 : mSummaryMinAddr(1), mSummaryMaxAddr(0), mUsable(true), mLog(aLog) {}
228
~SecMap()229 SecMap::~SecMap() { mRuleSets.clear(); }
230
231 // RUNS IN NO-MALLOC CONTEXT
FindRuleSet(uintptr_t ia)232 RuleSet* SecMap::FindRuleSet(uintptr_t ia) {
233 // Binary search mRuleSets to find one that brackets |ia|.
234 // lo and hi need to be signed, else the loop termination tests
235 // don't work properly. Note that this works correctly even when
236 // mRuleSets.size() == 0.
237
238 // Can't do this until the array has been sorted and preened.
239 MOZ_ASSERT(mUsable);
240
241 long int lo = 0;
242 long int hi = (long int)mRuleSets.size() - 1;
243 while (true) {
244 // current unsearched space is from lo to hi, inclusive.
245 if (lo > hi) {
246 // not found
247 return nullptr;
248 }
249 long int mid = lo + ((hi - lo) / 2);
250 RuleSet* mid_ruleSet = &mRuleSets[mid];
251 uintptr_t mid_minAddr = mid_ruleSet->mAddr;
252 uintptr_t mid_maxAddr = mid_minAddr + mid_ruleSet->mLen - 1;
253 if (ia < mid_minAddr) {
254 hi = mid - 1;
255 continue;
256 }
257 if (ia > mid_maxAddr) {
258 lo = mid + 1;
259 continue;
260 }
261 MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
262 return mid_ruleSet;
263 }
264 // NOTREACHED
265 }
266
267 // Add a RuleSet to the collection. The rule is copied in. Calling
268 // this makes the map non-searchable.
AddRuleSet(const RuleSet * rs)269 void SecMap::AddRuleSet(const RuleSet* rs) {
270 mUsable = false;
271 mRuleSets.push_back(*rs);
272 }
273
274 // Add a PfxInstr to the vector of such instrs, and return the index
275 // in the vector. Calling this makes the map non-searchable.
AddPfxInstr(PfxInstr pfxi)276 uint32_t SecMap::AddPfxInstr(PfxInstr pfxi) {
277 mUsable = false;
278 mPfxInstrs.push_back(pfxi);
279 return mPfxInstrs.size() - 1;
280 }
281
CmpRuleSetsByAddrLE(const RuleSet & rs1,const RuleSet & rs2)282 static bool CmpRuleSetsByAddrLE(const RuleSet& rs1, const RuleSet& rs2) {
283 return rs1.mAddr < rs2.mAddr;
284 }
285
286 // Prepare the map for searching. Completely remove any which don't
287 // fall inside the specified range [start, +len).
PrepareRuleSets(uintptr_t aStart,size_t aLen)288 void SecMap::PrepareRuleSets(uintptr_t aStart, size_t aLen) {
289 if (mRuleSets.empty()) {
290 return;
291 }
292
293 MOZ_ASSERT(aLen > 0);
294 if (aLen == 0) {
295 // This should never happen.
296 mRuleSets.clear();
297 return;
298 }
299
300 // Sort by start addresses.
301 std::sort(mRuleSets.begin(), mRuleSets.end(), CmpRuleSetsByAddrLE);
302
303 // Detect any entry not completely contained within [start, +len).
304 // Set its length to zero, so that the next pass will remove it.
305 for (size_t i = 0; i < mRuleSets.size(); ++i) {
306 RuleSet* rs = &mRuleSets[i];
307 if (rs->mLen > 0 &&
308 (rs->mAddr < aStart || rs->mAddr + rs->mLen > aStart + aLen)) {
309 rs->mLen = 0;
310 }
311 }
312
313 // Iteratively truncate any overlaps and remove any zero length
314 // entries that might result, or that may have been present
315 // initially. Unless the input is seriously screwy, this is
316 // expected to iterate only once.
317 while (true) {
318 size_t i;
319 size_t n = mRuleSets.size();
320 size_t nZeroLen = 0;
321
322 if (n == 0) {
323 break;
324 }
325
326 for (i = 1; i < n; ++i) {
327 RuleSet* prev = &mRuleSets[i - 1];
328 RuleSet* here = &mRuleSets[i];
329 MOZ_ASSERT(prev->mAddr <= here->mAddr);
330 if (prev->mAddr + prev->mLen > here->mAddr) {
331 prev->mLen = here->mAddr - prev->mAddr;
332 }
333 if (prev->mLen == 0) nZeroLen++;
334 }
335
336 if (mRuleSets[n - 1].mLen == 0) {
337 nZeroLen++;
338 }
339
340 // At this point, the entries are in-order and non-overlapping.
341 // If none of them are zero-length, we are done.
342 if (nZeroLen == 0) {
343 break;
344 }
345
346 // Slide back the entries to remove the zero length ones.
347 size_t j = 0; // The write-point.
348 for (i = 0; i < n; ++i) {
349 if (mRuleSets[i].mLen == 0) {
350 continue;
351 }
352 if (j != i) mRuleSets[j] = mRuleSets[i];
353 ++j;
354 }
355 MOZ_ASSERT(i == n);
356 MOZ_ASSERT(nZeroLen <= n);
357 MOZ_ASSERT(j == n - nZeroLen);
358 while (nZeroLen > 0) {
359 mRuleSets.pop_back();
360 nZeroLen--;
361 }
362
363 MOZ_ASSERT(mRuleSets.size() == j);
364 }
365
366 size_t n = mRuleSets.size();
367
368 #ifdef DEBUG
369 // Do a final check on the rules: their address ranges must be
370 // ascending, non overlapping, non zero sized.
371 if (n > 0) {
372 MOZ_ASSERT(mRuleSets[0].mLen > 0);
373 for (size_t i = 1; i < n; ++i) {
374 RuleSet* prev = &mRuleSets[i - 1];
375 RuleSet* here = &mRuleSets[i];
376 MOZ_ASSERT(prev->mAddr < here->mAddr);
377 MOZ_ASSERT(here->mLen > 0);
378 MOZ_ASSERT(prev->mAddr + prev->mLen <= here->mAddr);
379 }
380 }
381 #endif
382
383 // Set the summary min and max address values.
384 if (n == 0) {
385 // Use the values defined in comments in the class declaration.
386 mSummaryMinAddr = 1;
387 mSummaryMaxAddr = 0;
388 } else {
389 mSummaryMinAddr = mRuleSets[0].mAddr;
390 mSummaryMaxAddr = mRuleSets[n - 1].mAddr + mRuleSets[n - 1].mLen - 1;
391 }
392 char buf[150];
393 SprintfLiteral(buf, "PrepareRuleSets: %d entries, smin/smax 0x%llx, 0x%llx\n",
394 (int)n, (unsigned long long int)mSummaryMinAddr,
395 (unsigned long long int)mSummaryMaxAddr);
396 buf[sizeof(buf) - 1] = 0;
397 mLog(buf);
398
399 // Is now usable for binary search.
400 mUsable = true;
401
402 #if 0
403 mLog("\nRulesets after preening\n");
404 for (size_t i = 0; i < mRuleSets.size(); ++i) {
405 mRuleSets[i].Print(mLog);
406 mLog("\n");
407 }
408 mLog("\n");
409 #endif
410 }
411
IsEmpty()412 bool SecMap::IsEmpty() { return mRuleSets.empty(); }
413
SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const414 size_t SecMap::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
415 size_t n = aMallocSizeOf(this);
416
417 // It's conceivable that these calls would be unsafe with some
418 // implementations of std::vector, but it seems to be working for now...
419 n += aMallocSizeOf(mRuleSets.data());
420 n += aMallocSizeOf(mPfxInstrs.data());
421
422 return n;
423 }
424
425 ////////////////////////////////////////////////////////////////
426 // SegArray //
427 ////////////////////////////////////////////////////////////////
428
429 // A SegArray holds a set of address ranges that together exactly
430 // cover an address range, with no overlaps or holes. Each range has
431 // an associated value, which in this case has been specialised to be
432 // a simple boolean. The representation is kept to minimal canonical
433 // form in which adjacent ranges with the same associated value are
434 // merged together. Each range is represented by a |struct Seg|.
435 //
436 // SegArrays are used to keep track of which parts of the address
437 // space are known to contain instructions.
438 class SegArray {
439 public:
add(uintptr_t lo,uintptr_t hi,bool val)440 void add(uintptr_t lo, uintptr_t hi, bool val) {
441 if (lo > hi) {
442 return;
443 }
444 split_at(lo);
445 if (hi < UINTPTR_MAX) {
446 split_at(hi + 1);
447 }
448 std::vector<Seg>::size_type iLo, iHi, i;
449 iLo = find(lo);
450 iHi = find(hi);
451 for (i = iLo; i <= iHi; ++i) {
452 mSegs[i].val = val;
453 }
454 preen();
455 }
456
457 // RUNS IN NO-MALLOC CONTEXT
getBoundingCodeSegment(uintptr_t * rx_min,uintptr_t * rx_max,uintptr_t addr)458 bool getBoundingCodeSegment(/*OUT*/ uintptr_t* rx_min,
459 /*OUT*/ uintptr_t* rx_max, uintptr_t addr) {
460 std::vector<Seg>::size_type i = find(addr);
461 if (!mSegs[i].val) {
462 return false;
463 }
464 *rx_min = mSegs[i].lo;
465 *rx_max = mSegs[i].hi;
466 return true;
467 }
468
SegArray()469 SegArray() {
470 Seg s(0, UINTPTR_MAX, false);
471 mSegs.push_back(s);
472 }
473
474 private:
475 struct Seg {
Seglul::SegArray::Seg476 Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {}
477 uintptr_t lo;
478 uintptr_t hi;
479 bool val;
480 };
481
preen()482 void preen() {
483 for (std::vector<Seg>::iterator iter = mSegs.begin();
484 iter < mSegs.end() - 1; ++iter) {
485 if (iter[0].val != iter[1].val) {
486 continue;
487 }
488 iter[0].hi = iter[1].hi;
489 mSegs.erase(iter + 1);
490 // Back up one, so as not to miss an opportunity to merge
491 // with the entry after this one.
492 --iter;
493 }
494 }
495
496 // RUNS IN NO-MALLOC CONTEXT
find(uintptr_t a)497 std::vector<Seg>::size_type find(uintptr_t a) {
498 long int lo = 0;
499 long int hi = (long int)mSegs.size();
500 while (true) {
501 // The unsearched space is lo .. hi inclusive.
502 if (lo > hi) {
503 // Not found. This can't happen.
504 return (std::vector<Seg>::size_type)(-1);
505 }
506 long int mid = lo + ((hi - lo) / 2);
507 uintptr_t mid_lo = mSegs[mid].lo;
508 uintptr_t mid_hi = mSegs[mid].hi;
509 if (a < mid_lo) {
510 hi = mid - 1;
511 continue;
512 }
513 if (a > mid_hi) {
514 lo = mid + 1;
515 continue;
516 }
517 return (std::vector<Seg>::size_type)mid;
518 }
519 }
520
split_at(uintptr_t a)521 void split_at(uintptr_t a) {
522 std::vector<Seg>::size_type i = find(a);
523 if (mSegs[i].lo == a) {
524 return;
525 }
526 mSegs.insert(mSegs.begin() + i + 1, mSegs[i]);
527 mSegs[i].hi = a - 1;
528 mSegs[i + 1].lo = a;
529 }
530
show()531 void show() {
532 printf("<< %d entries:\n", (int)mSegs.size());
533 for (std::vector<Seg>::iterator iter = mSegs.begin(); iter < mSegs.end();
534 ++iter) {
535 printf(" %016llx %016llx %s\n", (unsigned long long int)(*iter).lo,
536 (unsigned long long int)(*iter).hi,
537 (*iter).val ? "true" : "false");
538 }
539 printf(">>\n");
540 }
541
542 std::vector<Seg> mSegs;
543 };
544
545 ////////////////////////////////////////////////////////////////
546 // PriMap //
547 ////////////////////////////////////////////////////////////////
548
549 class PriMap {
550 public:
PriMap(void (* aLog)(const char *))551 explicit PriMap(void (*aLog)(const char*)) : mLog(aLog) {}
552
553 // RUNS IN NO-MALLOC CONTEXT
Lookup(uintptr_t ia)554 pair<const RuleSet*, const vector<PfxInstr>*> Lookup(uintptr_t ia) {
555 SecMap* sm = FindSecMap(ia);
556 return pair<const RuleSet*, const vector<PfxInstr>*>(
557 sm ? sm->FindRuleSet(ia) : nullptr, sm ? sm->GetPfxInstrs() : nullptr);
558 }
559
560 // Add a secondary map. No overlaps allowed w.r.t. existing
561 // secondary maps.
AddSecMap(mozilla::UniquePtr<SecMap> && aSecMap)562 void AddSecMap(mozilla::UniquePtr<SecMap>&& aSecMap) {
563 // We can't add an empty SecMap to the PriMap. But that's OK
564 // since we'd never be able to find anything in it anyway.
565 if (aSecMap->IsEmpty()) {
566 return;
567 }
568
569 // Iterate through the SecMaps and find the right place for this
570 // one. At the same time, ensure that the in-order
571 // non-overlapping invariant is preserved (and, generally, holds).
572 // FIXME: this gives a cost that is O(N^2) in the total number of
573 // shared objects in the system. ToDo: better.
574 MOZ_ASSERT(aSecMap->mSummaryMinAddr <= aSecMap->mSummaryMaxAddr);
575
576 size_t num_secMaps = mSecMaps.size();
577 uintptr_t i;
578 for (i = 0; i < num_secMaps; ++i) {
579 mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
580 MOZ_ASSERT(sm_i->mSummaryMinAddr <= sm_i->mSummaryMaxAddr);
581 if (aSecMap->mSummaryMinAddr < sm_i->mSummaryMaxAddr) {
582 // |aSecMap| needs to be inserted immediately before mSecMaps[i].
583 break;
584 }
585 }
586 MOZ_ASSERT(i <= num_secMaps);
587 if (i == num_secMaps) {
588 // It goes at the end.
589 mSecMaps.push_back(std::move(aSecMap));
590 } else {
591 std::vector<mozilla::UniquePtr<SecMap>>::iterator iter =
592 mSecMaps.begin() + i;
593 mSecMaps.insert(iter, std::move(aSecMap));
594 }
595 char buf[100];
596 SprintfLiteral(buf, "AddSecMap: now have %d SecMaps\n",
597 (int)mSecMaps.size());
598 buf[sizeof(buf) - 1] = 0;
599 mLog(buf);
600 }
601
602 // Remove and delete any SecMaps in the mapping, that intersect
603 // with the specified address range.
RemoveSecMapsInRange(uintptr_t avma_min,uintptr_t avma_max)604 void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) {
605 MOZ_ASSERT(avma_min <= avma_max);
606 size_t num_secMaps = mSecMaps.size();
607 if (num_secMaps > 0) {
608 intptr_t i;
609 // Iterate from end to start over the vector, so as to ensure
610 // that the special case where |avma_min| and |avma_max| denote
611 // the entire address space, can be completed in time proportional
612 // to the number of elements in the map.
613 for (i = (intptr_t)num_secMaps - 1; i >= 0; i--) {
614 mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
615 if (sm_i->mSummaryMaxAddr < avma_min ||
616 avma_max < sm_i->mSummaryMinAddr) {
617 // There's no overlap. Move on.
618 continue;
619 }
620 // We need to remove mSecMaps[i] and slide all those above it
621 // downwards to cover the hole.
622 mSecMaps.erase(mSecMaps.begin() + i);
623 }
624 }
625 }
626
627 // Return the number of currently contained SecMaps.
CountSecMaps()628 size_t CountSecMaps() { return mSecMaps.size(); }
629
SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const630 size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
631 size_t n = aMallocSizeOf(this);
632
633 // It's conceivable that this call would be unsafe with some
634 // implementations of std::vector, but it seems to be working for now...
635 n += aMallocSizeOf(mSecMaps.data());
636
637 for (size_t i = 0; i < mSecMaps.size(); i++) {
638 n += mSecMaps[i]->SizeOfIncludingThis(aMallocSizeOf);
639 }
640
641 return n;
642 }
643
644 private:
645 // RUNS IN NO-MALLOC CONTEXT
FindSecMap(uintptr_t ia)646 SecMap* FindSecMap(uintptr_t ia) {
647 // Binary search mSecMaps to find one that brackets |ia|.
648 // lo and hi need to be signed, else the loop termination tests
649 // don't work properly.
650 long int lo = 0;
651 long int hi = (long int)mSecMaps.size() - 1;
652 while (true) {
653 // current unsearched space is from lo to hi, inclusive.
654 if (lo > hi) {
655 // not found
656 return nullptr;
657 }
658 long int mid = lo + ((hi - lo) / 2);
659 mozilla::UniquePtr<SecMap>& mid_secMap = mSecMaps[mid];
660 uintptr_t mid_minAddr = mid_secMap->mSummaryMinAddr;
661 uintptr_t mid_maxAddr = mid_secMap->mSummaryMaxAddr;
662 if (ia < mid_minAddr) {
663 hi = mid - 1;
664 continue;
665 }
666 if (ia > mid_maxAddr) {
667 lo = mid + 1;
668 continue;
669 }
670 MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
671 return mid_secMap.get();
672 }
673 // NOTREACHED
674 }
675
676 private:
677 // sorted array of per-object ranges, non overlapping, non empty
678 std::vector<mozilla::UniquePtr<SecMap>> mSecMaps;
679
680 // a logging sink, for debugging.
681 void (*mLog)(const char*);
682 };
683
684 ////////////////////////////////////////////////////////////////
685 // LUL //
686 ////////////////////////////////////////////////////////////////
687
688 #define LUL_LOG(_str) \
689 do { \
690 char buf[200]; \
691 SprintfLiteral(buf, "LUL: pid %d tid %d lul-obj %p: %s", \
692 profiler_current_process_id(), \
693 profiler_current_thread_id(), this, (_str)); \
694 buf[sizeof(buf) - 1] = 0; \
695 mLog(buf); \
696 } while (0)
697
LUL(void (* aLog)(const char *))698 LUL::LUL(void (*aLog)(const char*))
699 : mLog(aLog),
700 mAdminMode(true),
701 mAdminThreadId(profiler_current_thread_id()),
702 mPriMap(new PriMap(aLog)),
703 mSegArray(new SegArray()),
704 mUSU(new UniqueStringUniverse()) {
705 LUL_LOG("LUL::LUL: Created object");
706 }
707
~LUL()708 LUL::~LUL() {
709 LUL_LOG("LUL::~LUL: Destroyed object");
710 delete mPriMap;
711 delete mSegArray;
712 mLog = nullptr;
713 delete mUSU;
714 }
715
MaybeShowStats()716 void LUL::MaybeShowStats() {
717 // This is racey in the sense that it can't guarantee that
718 // n_new == n_new_Context + n_new_CFI + n_new_Scanned
719 // if it should happen that mStats is updated by some other thread
720 // in between computation of n_new and n_new_{Context,CFI,FP}.
721 // But it's just stats printing, so we don't really care.
722 uint32_t n_new = mStats - mStatsPrevious;
723 if (n_new >= 5000) {
724 uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext;
725 uint32_t n_new_CFI = mStats.mCFI - mStatsPrevious.mCFI;
726 uint32_t n_new_FP = mStats.mFP - mStatsPrevious.mFP;
727 mStatsPrevious = mStats;
728 char buf[200];
729 SprintfLiteral(buf,
730 "LUL frame stats: TOTAL %5u"
731 " CTX %4u CFI %4u FP %4u",
732 n_new, n_new_Context, n_new_CFI, n_new_FP);
733 buf[sizeof(buf) - 1] = 0;
734 mLog(buf);
735 }
736 }
737
SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const738 size_t LUL::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
739 size_t n = aMallocSizeOf(this);
740 n += mPriMap->SizeOfIncludingThis(aMallocSizeOf);
741
742 // Measurement of the following members may be added later if DMD finds it
743 // is worthwhile:
744 // - mSegArray
745 // - mUSU
746
747 return n;
748 }
749
EnableUnwinding()750 void LUL::EnableUnwinding() {
751 LUL_LOG("LUL::EnableUnwinding");
752 // Don't assert for Admin mode here. That is, tolerate a call here
753 // if we are already in Unwinding mode.
754 MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
755
756 mAdminMode = false;
757 }
758
NotifyAfterMap(uintptr_t aRXavma,size_t aSize,const char * aFileName,const void * aMappedImage)759 void LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName,
760 const void* aMappedImage) {
761 MOZ_RELEASE_ASSERT(mAdminMode);
762 MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
763
764 mLog(":\n");
765 char buf[200];
766 SprintfLiteral(buf, "NotifyMap %llx %llu %s\n",
767 (unsigned long long int)aRXavma, (unsigned long long int)aSize,
768 aFileName);
769 buf[sizeof(buf) - 1] = 0;
770 mLog(buf);
771
772 // Ignore obviously-stupid notifications.
773 if (aSize > 0) {
774 // Here's a new mapping, for this object.
775 mozilla::UniquePtr<SecMap> smap = mozilla::MakeUnique<SecMap>(mLog);
776
777 // Read CFI or EXIDX unwind data into |smap|.
778 if (!aMappedImage) {
779 (void)lul::ReadSymbolData(string(aFileName), std::vector<string>(),
780 smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
781 } else {
782 (void)lul::ReadSymbolDataInternal(
783 (const uint8_t*)aMappedImage, string(aFileName),
784 std::vector<string>(), smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
785 }
786
787 mLog("NotifyMap .. preparing entries\n");
788
789 smap->PrepareRuleSets(aRXavma, aSize);
790
791 SprintfLiteral(buf, "NotifyMap got %lld entries\n",
792 (long long int)smap->Size());
793 buf[sizeof(buf) - 1] = 0;
794 mLog(buf);
795
796 // Add it to the primary map (the top level set of mapped objects).
797 mPriMap->AddSecMap(std::move(smap));
798
799 // Tell the segment array about the mapping, so that the stack
800 // scan and __kernel_syscall mechanisms know where valid code is.
801 mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
802 }
803 }
804
NotifyExecutableArea(uintptr_t aRXavma,size_t aSize)805 void LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize) {
806 MOZ_RELEASE_ASSERT(mAdminMode);
807 MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
808
809 mLog(":\n");
810 char buf[200];
811 SprintfLiteral(buf, "NotifyExecutableArea %llx %llu\n",
812 (unsigned long long int)aRXavma,
813 (unsigned long long int)aSize);
814 buf[sizeof(buf) - 1] = 0;
815 mLog(buf);
816
817 // Ignore obviously-stupid notifications.
818 if (aSize > 0) {
819 // Tell the segment array about the mapping, so that the stack
820 // scan and __kernel_syscall mechanisms know where valid code is.
821 mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
822 }
823 }
824
NotifyBeforeUnmap(uintptr_t aRXavmaMin,uintptr_t aRXavmaMax)825 void LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax) {
826 MOZ_RELEASE_ASSERT(mAdminMode);
827 MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
828
829 mLog(":\n");
830 char buf[100];
831 SprintfLiteral(buf, "NotifyUnmap %016llx-%016llx\n",
832 (unsigned long long int)aRXavmaMin,
833 (unsigned long long int)aRXavmaMax);
834 buf[sizeof(buf) - 1] = 0;
835 mLog(buf);
836
837 MOZ_ASSERT(aRXavmaMin <= aRXavmaMax);
838
839 // Remove from the primary map, any secondary maps that intersect
840 // with the address range. Also delete the secondary maps.
841 mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax);
842
843 // Tell the segment array that the address range no longer
844 // contains valid code.
845 mSegArray->add(aRXavmaMin, aRXavmaMax, false);
846
847 SprintfLiteral(buf, "NotifyUnmap: now have %d SecMaps\n",
848 (int)mPriMap->CountSecMaps());
849 buf[sizeof(buf) - 1] = 0;
850 mLog(buf);
851 }
852
CountMappings()853 size_t LUL::CountMappings() {
854 MOZ_RELEASE_ASSERT(mAdminMode);
855 MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
856
857 return mPriMap->CountSecMaps();
858 }
859
860 // RUNS IN NO-MALLOC CONTEXT
DerefTUW(TaggedUWord aAddr,const StackImage * aStackImg)861 static TaggedUWord DerefTUW(TaggedUWord aAddr, const StackImage* aStackImg) {
862 if (!aAddr.Valid()) {
863 return TaggedUWord();
864 }
865
866 // Lower limit check. |aAddr.Value()| is the lowest requested address
867 // and |aStackImg->mStartAvma| is the lowest address we actually have,
868 // so the comparison is straightforward.
869 if (aAddr.Value() < aStackImg->mStartAvma) {
870 return TaggedUWord();
871 }
872
873 // Upper limit check. We must compute the highest requested address
874 // and the highest address we actually have, but being careful to
875 // avoid overflow. In particular if |aAddr| is 0xFFF...FFF or the
876 // 3/7 values below that, then we will get overflow. See bug #1245477.
877 typedef CheckedInt<uintptr_t> CheckedUWord;
878 CheckedUWord highest_requested_plus_one =
879 CheckedUWord(aAddr.Value()) + CheckedUWord(sizeof(uintptr_t));
880 CheckedUWord highest_available_plus_one =
881 CheckedUWord(aStackImg->mStartAvma) + CheckedUWord(aStackImg->mLen);
882 if (!highest_requested_plus_one.isValid() // overflow?
883 || !highest_available_plus_one.isValid() // overflow?
884 || (highest_requested_plus_one.value() >
885 highest_available_plus_one.value())) { // in range?
886 return TaggedUWord();
887 }
888
889 return TaggedUWord(
890 *(uintptr_t*)(&aStackImg
891 ->mContents[aAddr.Value() - aStackImg->mStartAvma]));
892 }
893
894 // RUNS IN NO-MALLOC CONTEXT
EvaluateReg(int16_t aReg,const UnwindRegs * aOldRegs,TaggedUWord aCFA)895 static TaggedUWord EvaluateReg(int16_t aReg, const UnwindRegs* aOldRegs,
896 TaggedUWord aCFA) {
897 switch (aReg) {
898 case DW_REG_CFA:
899 return aCFA;
900 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
901 case DW_REG_INTEL_XBP:
902 return aOldRegs->xbp;
903 case DW_REG_INTEL_XSP:
904 return aOldRegs->xsp;
905 case DW_REG_INTEL_XIP:
906 return aOldRegs->xip;
907 #elif defined(GP_ARCH_arm)
908 case DW_REG_ARM_R7:
909 return aOldRegs->r7;
910 case DW_REG_ARM_R11:
911 return aOldRegs->r11;
912 case DW_REG_ARM_R12:
913 return aOldRegs->r12;
914 case DW_REG_ARM_R13:
915 return aOldRegs->r13;
916 case DW_REG_ARM_R14:
917 return aOldRegs->r14;
918 case DW_REG_ARM_R15:
919 return aOldRegs->r15;
920 #elif defined(GP_ARCH_arm64)
921 case DW_REG_AARCH64_X29:
922 return aOldRegs->x29;
923 case DW_REG_AARCH64_X30:
924 return aOldRegs->x30;
925 case DW_REG_AARCH64_SP:
926 return aOldRegs->sp;
927 #elif defined(GP_ARCH_mips64)
928 case DW_REG_MIPS_SP:
929 return aOldRegs->sp;
930 case DW_REG_MIPS_FP:
931 return aOldRegs->fp;
932 case DW_REG_MIPS_PC:
933 return aOldRegs->pc;
934 #else
935 # error "Unsupported arch"
936 #endif
937 default:
938 MOZ_ASSERT(0);
939 return TaggedUWord();
940 }
941 }
942
943 // RUNS IN NO-MALLOC CONTEXT
944 // See prototype for comment.
EvaluatePfxExpr(int32_t start,const UnwindRegs * aOldRegs,TaggedUWord aCFA,const StackImage * aStackImg,const vector<PfxInstr> & aPfxInstrs)945 TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs,
946 TaggedUWord aCFA, const StackImage* aStackImg,
947 const vector<PfxInstr>& aPfxInstrs) {
948 // A small evaluation stack, and a stack pointer, which points to
949 // the highest numbered in-use element.
950 const int N_STACK = 10;
951 TaggedUWord stack[N_STACK];
952 int stackPointer = -1;
953 for (int i = 0; i < N_STACK; i++) stack[i] = TaggedUWord();
954
955 #define PUSH(_tuw) \
956 do { \
957 if (stackPointer >= N_STACK - 1) goto fail; /* overflow */ \
958 stack[++stackPointer] = (_tuw); \
959 } while (0)
960
961 #define POP(_lval) \
962 do { \
963 if (stackPointer < 0) goto fail; /* underflow */ \
964 _lval = stack[stackPointer--]; \
965 } while (0)
966
967 // Cursor in the instruction sequence.
968 size_t curr = start + 1;
969
970 // Check the start point is sane.
971 size_t nInstrs = aPfxInstrs.size();
972 if (start < 0 || (size_t)start >= nInstrs) goto fail;
973
974 {
975 // The instruction sequence must start with PX_Start. If not,
976 // something is seriously wrong.
977 PfxInstr first = aPfxInstrs[start];
978 if (first.mOpcode != PX_Start) goto fail;
979
980 // Push the CFA on the stack to start with (or not), as required by
981 // the original DW_OP_*expression* CFI.
982 if (first.mOperand != 0) PUSH(aCFA);
983 }
984
985 while (true) {
986 if (curr >= nInstrs) goto fail; // ran off the end of the sequence
987
988 PfxInstr pfxi = aPfxInstrs[curr++];
989 if (pfxi.mOpcode == PX_End) break; // we're done
990
991 switch (pfxi.mOpcode) {
992 case PX_Start:
993 // This should appear only at the start of the sequence.
994 goto fail;
995 case PX_End:
996 // We just took care of that, so we shouldn't see it again.
997 MOZ_ASSERT(0);
998 goto fail;
999 case PX_SImm32:
1000 PUSH(TaggedUWord((intptr_t)pfxi.mOperand));
1001 break;
1002 case PX_DwReg: {
1003 DW_REG_NUMBER reg = (DW_REG_NUMBER)pfxi.mOperand;
1004 MOZ_ASSERT(reg != DW_REG_CFA);
1005 PUSH(EvaluateReg(reg, aOldRegs, aCFA));
1006 break;
1007 }
1008 case PX_Deref: {
1009 TaggedUWord addr;
1010 POP(addr);
1011 PUSH(DerefTUW(addr, aStackImg));
1012 break;
1013 }
1014 case PX_Add: {
1015 TaggedUWord x, y;
1016 POP(x);
1017 POP(y);
1018 PUSH(y + x);
1019 break;
1020 }
1021 case PX_Sub: {
1022 TaggedUWord x, y;
1023 POP(x);
1024 POP(y);
1025 PUSH(y - x);
1026 break;
1027 }
1028 case PX_And: {
1029 TaggedUWord x, y;
1030 POP(x);
1031 POP(y);
1032 PUSH(y & x);
1033 break;
1034 }
1035 case PX_Or: {
1036 TaggedUWord x, y;
1037 POP(x);
1038 POP(y);
1039 PUSH(y | x);
1040 break;
1041 }
1042 case PX_CmpGES: {
1043 TaggedUWord x, y;
1044 POP(x);
1045 POP(y);
1046 PUSH(y.CmpGEs(x));
1047 break;
1048 }
1049 case PX_Shl: {
1050 TaggedUWord x, y;
1051 POP(x);
1052 POP(y);
1053 PUSH(y << x);
1054 break;
1055 }
1056 default:
1057 MOZ_ASSERT(0);
1058 goto fail;
1059 }
1060 } // while (true)
1061
1062 // Evaluation finished. The top value on the stack is the result.
1063 if (stackPointer >= 0) {
1064 return stack[stackPointer];
1065 }
1066 // Else fall through
1067
1068 fail:
1069 return TaggedUWord();
1070
1071 #undef PUSH
1072 #undef POP
1073 }
1074
1075 // RUNS IN NO-MALLOC CONTEXT
EvaluateExpr(const UnwindRegs * aOldRegs,TaggedUWord aCFA,const StackImage * aStackImg,const vector<PfxInstr> * aPfxInstrs) const1076 TaggedUWord LExpr::EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA,
1077 const StackImage* aStackImg,
1078 const vector<PfxInstr>* aPfxInstrs) const {
1079 switch (mHow) {
1080 case UNKNOWN:
1081 return TaggedUWord();
1082 case NODEREF: {
1083 TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
1084 tuw = tuw + TaggedUWord((intptr_t)mOffset);
1085 return tuw;
1086 }
1087 case DEREF: {
1088 TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
1089 tuw = tuw + TaggedUWord((intptr_t)mOffset);
1090 return DerefTUW(tuw, aStackImg);
1091 }
1092 case PFXEXPR: {
1093 MOZ_ASSERT(aPfxInstrs);
1094 if (!aPfxInstrs) {
1095 return TaggedUWord();
1096 }
1097 return EvaluatePfxExpr(mOffset, aOldRegs, aCFA, aStackImg, *aPfxInstrs);
1098 }
1099 default:
1100 MOZ_ASSERT(0);
1101 return TaggedUWord();
1102 }
1103 }
1104
1105 // RUNS IN NO-MALLOC CONTEXT
UseRuleSet(UnwindRegs * aRegs,const StackImage * aStackImg,const RuleSet * aRS,const vector<PfxInstr> * aPfxInstrs)1106 static void UseRuleSet(/*MOD*/ UnwindRegs* aRegs, const StackImage* aStackImg,
1107 const RuleSet* aRS, const vector<PfxInstr>* aPfxInstrs) {
1108 // Take a copy of regs, since we'll need to refer to the old values
1109 // whilst computing the new ones.
1110 UnwindRegs old_regs = *aRegs;
1111
1112 // Mark all the current register values as invalid, so that the
1113 // caller can see, on our return, which ones have been computed
1114 // anew. If we don't even manage to compute a new PC value, then
1115 // the caller will have to abandon the unwind.
1116 // FIXME: Create and use instead: aRegs->SetAllInvalid();
1117 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1118 aRegs->xbp = TaggedUWord();
1119 aRegs->xsp = TaggedUWord();
1120 aRegs->xip = TaggedUWord();
1121 #elif defined(GP_ARCH_arm)
1122 aRegs->r7 = TaggedUWord();
1123 aRegs->r11 = TaggedUWord();
1124 aRegs->r12 = TaggedUWord();
1125 aRegs->r13 = TaggedUWord();
1126 aRegs->r14 = TaggedUWord();
1127 aRegs->r15 = TaggedUWord();
1128 #elif defined(GP_ARCH_arm64)
1129 aRegs->x29 = TaggedUWord();
1130 aRegs->x30 = TaggedUWord();
1131 aRegs->sp = TaggedUWord();
1132 aRegs->pc = TaggedUWord();
1133 #elif defined(GP_ARCH_mips64)
1134 aRegs->sp = TaggedUWord();
1135 aRegs->fp = TaggedUWord();
1136 aRegs->pc = TaggedUWord();
1137 #else
1138 # error "Unsupported arch"
1139 #endif
1140
1141 // This is generally useful.
1142 const TaggedUWord inval = TaggedUWord();
1143
1144 // First, compute the CFA.
1145 TaggedUWord cfa = aRS->mCfaExpr.EvaluateExpr(&old_regs, inval /*old cfa*/,
1146 aStackImg, aPfxInstrs);
1147
1148 // If we didn't manage to compute the CFA, well .. that's ungood,
1149 // but keep going anyway. It'll be OK provided none of the register
1150 // value rules mention the CFA. In any case, compute the new values
1151 // for each register that we're tracking.
1152
1153 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1154 aRegs->xbp =
1155 aRS->mXbpExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1156 aRegs->xsp =
1157 aRS->mXspExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1158 aRegs->xip =
1159 aRS->mXipExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1160 #elif defined(GP_ARCH_arm)
1161 aRegs->r7 = aRS->mR7expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1162 aRegs->r11 =
1163 aRS->mR11expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1164 aRegs->r12 =
1165 aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1166 aRegs->r13 =
1167 aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1168 aRegs->r14 =
1169 aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1170 aRegs->r15 =
1171 aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1172 #elif defined(GP_ARCH_arm64)
1173 aRegs->x29 =
1174 aRS->mX29expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1175 aRegs->x30 =
1176 aRS->mX30expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1177 aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1178 #elif defined(GP_ARCH_mips64)
1179 aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1180 aRegs->fp = aRS->mFPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1181 aRegs->pc = aRS->mPCexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1182 #else
1183 # error "Unsupported arch"
1184 #endif
1185
1186 // We're done. Any regs for which we didn't manage to compute a
1187 // new value will now be marked as invalid.
1188 }
1189
1190 // RUNS IN NO-MALLOC CONTEXT
Unwind(uintptr_t * aFramePCs,uintptr_t * aFrameSPs,size_t * aFramesUsed,size_t * aFramePointerFramesAcquired,size_t aFramesAvail,UnwindRegs * aStartRegs,StackImage * aStackImg)1191 void LUL::Unwind(/*OUT*/ uintptr_t* aFramePCs,
1192 /*OUT*/ uintptr_t* aFrameSPs,
1193 /*OUT*/ size_t* aFramesUsed,
1194 /*OUT*/ size_t* aFramePointerFramesAcquired,
1195 size_t aFramesAvail, UnwindRegs* aStartRegs,
1196 StackImage* aStackImg) {
1197 MOZ_RELEASE_ASSERT(!mAdminMode);
1198
1199 /////////////////////////////////////////////////////////
1200 // BEGIN UNWIND
1201
1202 *aFramesUsed = 0;
1203
1204 UnwindRegs regs = *aStartRegs;
1205 TaggedUWord last_valid_sp = TaggedUWord();
1206
1207 while (true) {
1208 if (DEBUG_MAIN) {
1209 char buf[300];
1210 mLog("\n");
1211 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1212 SprintfLiteral(
1213 buf, "LoopTop: rip %d/%llx rsp %d/%llx rbp %d/%llx\n",
1214 (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(),
1215 (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(),
1216 (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value());
1217 buf[sizeof(buf) - 1] = 0;
1218 mLog(buf);
1219 #elif defined(GP_ARCH_arm)
1220 SprintfLiteral(
1221 buf,
1222 "LoopTop: r15 %d/%llx r7 %d/%llx r11 %d/%llx"
1223 " r12 %d/%llx r13 %d/%llx r14 %d/%llx\n",
1224 (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(),
1225 (int)regs.r7.Valid(), (unsigned long long int)regs.r7.Value(),
1226 (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(),
1227 (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(),
1228 (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(),
1229 (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value());
1230 buf[sizeof(buf) - 1] = 0;
1231 mLog(buf);
1232 #elif defined(GP_ARCH_arm64)
1233 SprintfLiteral(
1234 buf,
1235 "LoopTop: pc %d/%llx x29 %d/%llx x30 %d/%llx"
1236 " sp %d/%llx\n",
1237 (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
1238 (int)regs.x29.Valid(), (unsigned long long int)regs.x29.Value(),
1239 (int)regs.x30.Valid(), (unsigned long long int)regs.x30.Value(),
1240 (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value());
1241 buf[sizeof(buf) - 1] = 0;
1242 mLog(buf);
1243 #elif defined(GP_ARCH_mips64)
1244 SprintfLiteral(
1245 buf, "LoopTop: pc %d/%llx sp %d/%llx fp %d/%llx\n",
1246 (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
1247 (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value(),
1248 (int)regs.fp.Valid(), (unsigned long long int)regs.fp.Value());
1249 buf[sizeof(buf) - 1] = 0;
1250 mLog(buf);
1251 #else
1252 # error "Unsupported arch"
1253 #endif
1254 }
1255
1256 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1257 TaggedUWord ia = regs.xip;
1258 TaggedUWord sp = regs.xsp;
1259 #elif defined(GP_ARCH_arm)
1260 TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14);
1261 TaggedUWord sp = regs.r13;
1262 #elif defined(GP_ARCH_arm64)
1263 TaggedUWord ia = (*aFramesUsed == 0 ? regs.pc : regs.x30);
1264 TaggedUWord sp = regs.sp;
1265 #elif defined(GP_ARCH_mips64)
1266 TaggedUWord ia = regs.pc;
1267 TaggedUWord sp = regs.sp;
1268 #else
1269 # error "Unsupported arch"
1270 #endif
1271
1272 if (*aFramesUsed >= aFramesAvail) {
1273 break;
1274 }
1275
1276 // If we don't have a valid value for the PC, give up.
1277 if (!ia.Valid()) {
1278 break;
1279 }
1280
1281 // If this is the innermost frame, record the SP value, which
1282 // presumably is valid. If this isn't the innermost frame, and we
1283 // have a valid SP value, check that its SP value isn't less that
1284 // the one we've seen so far, so as to catch potential SP value
1285 // cycles.
1286 if (*aFramesUsed == 0) {
1287 last_valid_sp = sp;
1288 } else {
1289 MOZ_ASSERT(last_valid_sp.Valid());
1290 if (sp.Valid()) {
1291 if (sp.Value() < last_valid_sp.Value()) {
1292 // Hmm, SP going in the wrong direction. Let's stop.
1293 break;
1294 }
1295 // Remember where we got to.
1296 last_valid_sp = sp;
1297 }
1298 }
1299
1300 // For the innermost frame, the IA value is what we need. For all
1301 // other frames, it's actually the return address, so back up one
1302 // byte so as to get it into the calling instruction.
1303 aFramePCs[*aFramesUsed] = ia.Value() - (*aFramesUsed == 0 ? 0 : 1);
1304 aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0;
1305 (*aFramesUsed)++;
1306
1307 // Find the RuleSet for the current IA, if any. This will also
1308 // query the backing (secondary) maps if it isn't found in the
1309 // thread-local cache.
1310
1311 // If this isn't the innermost frame, back up into the calling insn.
1312 if (*aFramesUsed > 1) {
1313 ia = ia + TaggedUWord((uintptr_t)(-1));
1314 }
1315
1316 pair<const RuleSet*, const vector<PfxInstr>*> ruleset_and_pfxinstrs =
1317 mPriMap->Lookup(ia.Value());
1318 const RuleSet* ruleset = ruleset_and_pfxinstrs.first;
1319 const vector<PfxInstr>* pfxinstrs = ruleset_and_pfxinstrs.second;
1320
1321 if (DEBUG_MAIN) {
1322 char buf[100];
1323 SprintfLiteral(buf, "ruleset for 0x%llx = %p\n",
1324 (unsigned long long int)ia.Value(), ruleset);
1325 buf[sizeof(buf) - 1] = 0;
1326 mLog(buf);
1327 }
1328
1329 #if defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
1330 /////////////////////////////////////////////
1331 ////
1332 // On 32 bit x86-linux, syscalls are often done via the VDSO
1333 // function __kernel_vsyscall, which doesn't have a corresponding
1334 // object that we can read debuginfo from. That effectively kills
1335 // off all stack traces for threads blocked in syscalls. Hence
1336 // special-case by looking at the code surrounding the program
1337 // counter.
1338 //
1339 // 0xf7757420 <__kernel_vsyscall+0>: push %ecx
1340 // 0xf7757421 <__kernel_vsyscall+1>: push %edx
1341 // 0xf7757422 <__kernel_vsyscall+2>: push %ebp
1342 // 0xf7757423 <__kernel_vsyscall+3>: mov %esp,%ebp
1343 // 0xf7757425 <__kernel_vsyscall+5>: sysenter
1344 // 0xf7757427 <__kernel_vsyscall+7>: nop
1345 // 0xf7757428 <__kernel_vsyscall+8>: nop
1346 // 0xf7757429 <__kernel_vsyscall+9>: nop
1347 // 0xf775742a <__kernel_vsyscall+10>: nop
1348 // 0xf775742b <__kernel_vsyscall+11>: nop
1349 // 0xf775742c <__kernel_vsyscall+12>: nop
1350 // 0xf775742d <__kernel_vsyscall+13>: nop
1351 // 0xf775742e <__kernel_vsyscall+14>: int $0x80
1352 // 0xf7757430 <__kernel_vsyscall+16>: pop %ebp
1353 // 0xf7757431 <__kernel_vsyscall+17>: pop %edx
1354 // 0xf7757432 <__kernel_vsyscall+18>: pop %ecx
1355 // 0xf7757433 <__kernel_vsyscall+19>: ret
1356 //
1357 // In cases where the sampled thread is blocked in a syscall, its
1358 // program counter will point at "pop %ebp". Hence we look for
1359 // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and
1360 // the corresponding register-recovery actions are:
1361 // new_ebp = *(old_esp + 0)
1362 // new eip = *(old_esp + 12)
1363 // new_esp = old_esp + 16
1364 //
1365 // It may also be the case that the program counter points two
1366 // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in
1367 // the case where the syscall has been restarted but the thread
1368 // hasn't been rescheduled. The code below doesn't handle that;
1369 // it could easily be made to.
1370 //
1371 if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) {
1372 uintptr_t insns_min, insns_max;
1373 uintptr_t eip = ia.Value();
1374 bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip);
1375 if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) {
1376 uint8_t* eipC = (uint8_t*)eip;
1377 if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D &&
1378 eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) {
1379 TaggedUWord sp_plus_0 = sp;
1380 TaggedUWord sp_plus_12 = sp;
1381 TaggedUWord sp_plus_16 = sp;
1382 sp_plus_12 = sp_plus_12 + TaggedUWord(12);
1383 sp_plus_16 = sp_plus_16 + TaggedUWord(16);
1384 TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg);
1385 TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg);
1386 TaggedUWord new_esp = sp_plus_16;
1387 if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) {
1388 regs.xbp = new_ebp;
1389 regs.xip = new_eip;
1390 regs.xsp = new_esp;
1391 continue;
1392 }
1393 }
1394 }
1395 }
1396 ////
1397 /////////////////////////////////////////////
1398 #endif // defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
1399
1400 // So, do we have a ruleset for this address? If so, use it now.
1401 if (ruleset) {
1402 if (DEBUG_MAIN) {
1403 ruleset->Print(mLog);
1404 mLog("\n");
1405 }
1406 // Use the RuleSet to compute the registers for the previous
1407 // frame. |regs| is modified in-place.
1408 UseRuleSet(®s, aStackImg, ruleset, pfxinstrs);
1409 continue;
1410 }
1411
1412 #if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \
1413 defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \
1414 defined(GP_PLAT_amd64_freebsd)
1415 // There's no RuleSet for the specified address. On amd64/x86_linux, see if
1416 // it's possible to recover the caller's frame by using the frame pointer.
1417
1418 // We seek to compute (new_IP, new_SP, new_BP) from (old_BP, stack image),
1419 // and assume the following layout:
1420 //
1421 // <--- new_SP
1422 // +----------+
1423 // | new_IP | (return address)
1424 // +----------+
1425 // | new_BP | <--- old_BP
1426 // +----------+
1427 // | .... |
1428 // | .... |
1429 // | .... |
1430 // +----------+ <---- old_SP (arbitrary, but must be <= old_BP)
1431
1432 const size_t wordSzB = sizeof(uintptr_t);
1433 TaggedUWord old_xsp = regs.xsp;
1434
1435 // points at new_BP ?
1436 TaggedUWord old_xbp = regs.xbp;
1437 // points at new_IP ?
1438 TaggedUWord old_xbp_plus1 = regs.xbp + TaggedUWord(1 * wordSzB);
1439 // is the new_SP ?
1440 TaggedUWord old_xbp_plus2 = regs.xbp + TaggedUWord(2 * wordSzB);
1441
1442 if (old_xbp.Valid() && old_xbp.IsAligned() && old_xsp.Valid() &&
1443 old_xsp.IsAligned() && old_xsp.Value() <= old_xbp.Value()) {
1444 // We don't need to do any range, alignment or validity checks for
1445 // addresses passed to DerefTUW, since that performs them itself, and
1446 // returns an invalid value on failure. Any such value will poison
1447 // subsequent uses, and we do a final check for validity before putting
1448 // the computed values into |regs|.
1449 TaggedUWord new_xbp = DerefTUW(old_xbp, aStackImg);
1450 if (new_xbp.Valid() && new_xbp.IsAligned() &&
1451 old_xbp.Value() < new_xbp.Value()) {
1452 TaggedUWord new_xip = DerefTUW(old_xbp_plus1, aStackImg);
1453 TaggedUWord new_xsp = old_xbp_plus2;
1454 if (new_xbp.Valid() && new_xip.Valid() && new_xsp.Valid()) {
1455 regs.xbp = new_xbp;
1456 regs.xip = new_xip;
1457 regs.xsp = new_xsp;
1458 (*aFramePointerFramesAcquired)++;
1459 continue;
1460 }
1461 }
1462 }
1463 #elif defined(GP_ARCH_arm64)
1464 // Here is an example of generated code for prologue and epilogue..
1465 //
1466 // stp x29, x30, [sp, #-16]!
1467 // mov x29, sp
1468 // ...
1469 // ldp x29, x30, [sp], #16
1470 // ret
1471 //
1472 // Next is another example of generated code.
1473 //
1474 // stp x20, x19, [sp, #-32]!
1475 // stp x29, x30, [sp, #16]
1476 // add x29, sp, #0x10
1477 // ...
1478 // ldp x29, x30, [sp, #16]
1479 // ldp x20, x19, [sp], #32
1480 // ret
1481 //
1482 // Previous x29 and x30 register are stored in the address of x29 register.
1483 // But since sp register value depends on local variables, we cannot compute
1484 // previous sp register from current sp/fp/lr register and there is no
1485 // regular rule for sp register in prologue. But since return address is lr
1486 // register, if x29 is valid, we will get return address without sp
1487 // register.
1488 //
1489 // So we assume the following layout that if no rule set. x29 is frame
1490 // pointer, so we will be able to compute x29 and x30 .
1491 //
1492 // +----------+ <--- new_sp (cannot compute)
1493 // | .... |
1494 // +----------+
1495 // | new_lr | (return address)
1496 // +----------+
1497 // | new_fp | <--- old_fp
1498 // +----------+
1499 // | .... |
1500 // | .... |
1501 // +----------+ <---- old_sp (arbitrary, but unused)
1502
1503 TaggedUWord old_fp = regs.x29;
1504 if (old_fp.Valid() && old_fp.IsAligned() && last_valid_sp.Valid() &&
1505 last_valid_sp.Value() <= old_fp.Value()) {
1506 TaggedUWord new_fp = DerefTUW(old_fp, aStackImg);
1507 if (new_fp.Valid() && new_fp.IsAligned() &&
1508 old_fp.Value() < new_fp.Value()) {
1509 TaggedUWord old_fp_plus1 = old_fp + TaggedUWord(8);
1510 TaggedUWord new_lr = DerefTUW(old_fp_plus1, aStackImg);
1511 if (new_lr.Valid()) {
1512 regs.x29 = new_fp;
1513 regs.x30 = new_lr;
1514 // When using frame pointer to walk stack, we cannot compute sp
1515 // register since we cannot compute sp register from fp/lr/sp
1516 // register, and there is no regular rule to compute previous sp
1517 // register. So mark as invalid.
1518 regs.sp = TaggedUWord();
1519 (*aFramePointerFramesAcquired)++;
1520 continue;
1521 }
1522 }
1523 }
1524 #endif // defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||
1525 // defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android)
1526
1527 // We failed to recover a frame either using CFI or FP chasing, and we
1528 // have no other ways to recover the frame. So we have to give up.
1529 break;
1530
1531 } // top level unwind loop
1532
1533 // END UNWIND
1534 /////////////////////////////////////////////////////////
1535 }
1536
1537 ////////////////////////////////////////////////////////////////
1538 // LUL Unit Testing //
1539 ////////////////////////////////////////////////////////////////
1540
1541 static const int LUL_UNIT_TEST_STACK_SIZE = 32768;
1542
1543 #if defined(GP_ARCH_mips64)
__getpc(void)1544 static __attribute__((noinline)) unsigned long __getpc(void) {
1545 unsigned long rtaddr;
1546 __asm__ volatile("move %0, $31" : "=r"(rtaddr));
1547 return rtaddr;
1548 }
1549 #endif
1550
1551 // This function is innermost in the test call sequence. It uses LUL
1552 // to unwind, and compares the result with the sequence specified in
1553 // the director string. These need to agree in order for the test to
1554 // pass. In order not to screw up the results, this function needs
1555 // to have a not-very big stack frame, since we're only presenting
1556 // the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and
1557 // that chunk unavoidably includes the frame for this function.
1558 //
1559 // This function must not be inlined into its callers. Doing so will
1560 // cause the expected-vs-actual backtrace consistency checking to
1561 // fail. Prints summary results to |aLUL|'s logging sink and also
1562 // returns a boolean indicating whether or not the test passed.
GetAndCheckStackTrace(LUL * aLUL,const char * dstring)1563 static __attribute__((noinline)) bool GetAndCheckStackTrace(
1564 LUL* aLUL, const char* dstring) {
1565 // Get hold of the current unwind-start registers.
1566 UnwindRegs startRegs;
1567 memset(&startRegs, 0, sizeof(startRegs));
1568 #if defined(GP_ARCH_amd64)
1569 volatile uintptr_t block[3];
1570 MOZ_ASSERT(sizeof(block) == 24);
1571 __asm__ __volatile__(
1572 "leaq 0(%%rip), %%r15"
1573 "\n\t"
1574 "movq %%r15, 0(%0)"
1575 "\n\t"
1576 "movq %%rsp, 8(%0)"
1577 "\n\t"
1578 "movq %%rbp, 16(%0)"
1579 "\n"
1580 :
1581 : "r"(&block[0])
1582 : "memory", "r15");
1583 startRegs.xip = TaggedUWord(block[0]);
1584 startRegs.xsp = TaggedUWord(block[1]);
1585 startRegs.xbp = TaggedUWord(block[2]);
1586 const uintptr_t REDZONE_SIZE = 128;
1587 uintptr_t start = block[1] - REDZONE_SIZE;
1588 #elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
1589 volatile uintptr_t block[3];
1590 MOZ_ASSERT(sizeof(block) == 12);
1591 __asm__ __volatile__(
1592 ".byte 0xE8,0x00,0x00,0x00,0x00" /*call next insn*/
1593 "\n\t"
1594 "popl %%edi"
1595 "\n\t"
1596 "movl %%edi, 0(%0)"
1597 "\n\t"
1598 "movl %%esp, 4(%0)"
1599 "\n\t"
1600 "movl %%ebp, 8(%0)"
1601 "\n"
1602 :
1603 : "r"(&block[0])
1604 : "memory", "edi");
1605 startRegs.xip = TaggedUWord(block[0]);
1606 startRegs.xsp = TaggedUWord(block[1]);
1607 startRegs.xbp = TaggedUWord(block[2]);
1608 const uintptr_t REDZONE_SIZE = 0;
1609 uintptr_t start = block[1] - REDZONE_SIZE;
1610 #elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
1611 volatile uintptr_t block[6];
1612 MOZ_ASSERT(sizeof(block) == 24);
1613 __asm__ __volatile__(
1614 "mov r0, r15"
1615 "\n\t"
1616 "str r0, [%0, #0]"
1617 "\n\t"
1618 "str r14, [%0, #4]"
1619 "\n\t"
1620 "str r13, [%0, #8]"
1621 "\n\t"
1622 "str r12, [%0, #12]"
1623 "\n\t"
1624 "str r11, [%0, #16]"
1625 "\n\t"
1626 "str r7, [%0, #20]"
1627 "\n"
1628 :
1629 : "r"(&block[0])
1630 : "memory", "r0");
1631 startRegs.r15 = TaggedUWord(block[0]);
1632 startRegs.r14 = TaggedUWord(block[1]);
1633 startRegs.r13 = TaggedUWord(block[2]);
1634 startRegs.r12 = TaggedUWord(block[3]);
1635 startRegs.r11 = TaggedUWord(block[4]);
1636 startRegs.r7 = TaggedUWord(block[5]);
1637 const uintptr_t REDZONE_SIZE = 0;
1638 uintptr_t start = block[1] - REDZONE_SIZE;
1639 #elif defined(GP_ARCH_arm64)
1640 volatile uintptr_t block[4];
1641 MOZ_ASSERT(sizeof(block) == 32);
1642 __asm__ __volatile__(
1643 "adr x0, . \n\t"
1644 "str x0, [%0, #0] \n\t"
1645 "str x29, [%0, #8] \n\t"
1646 "str x30, [%0, #16] \n\t"
1647 "mov x0, sp \n\t"
1648 "str x0, [%0, #24] \n\t"
1649 :
1650 : "r"(&block[0])
1651 : "memory", "x0");
1652 startRegs.pc = TaggedUWord(block[0]);
1653 startRegs.x29 = TaggedUWord(block[1]);
1654 startRegs.x30 = TaggedUWord(block[2]);
1655 startRegs.sp = TaggedUWord(block[3]);
1656 const uintptr_t REDZONE_SIZE = 0;
1657 uintptr_t start = block[1] - REDZONE_SIZE;
1658 #elif defined(GP_ARCH_mips64)
1659 volatile uintptr_t block[3];
1660 MOZ_ASSERT(sizeof(block) == 24);
1661 __asm__ __volatile__(
1662 "sd $29, 8(%0) \n"
1663 "sd $30, 16(%0) \n"
1664 :
1665 : "r"(block)
1666 : "memory");
1667 block[0] = __getpc();
1668 startRegs.pc = TaggedUWord(block[0]);
1669 startRegs.sp = TaggedUWord(block[1]);
1670 startRegs.fp = TaggedUWord(block[2]);
1671 const uintptr_t REDZONE_SIZE = 0;
1672 uintptr_t start = block[1] - REDZONE_SIZE;
1673 #else
1674 # error "Unsupported platform"
1675 #endif
1676
1677 // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the
1678 // stack.
1679 uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE;
1680 uintptr_t ws = sizeof(void*);
1681 start &= ~(ws - 1);
1682 end &= ~(ws - 1);
1683 uintptr_t nToCopy = end - start;
1684 if (nToCopy > lul::N_STACK_BYTES) {
1685 nToCopy = lul::N_STACK_BYTES;
1686 }
1687 MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
1688 StackImage* stackImg = new StackImage();
1689 stackImg->mLen = nToCopy;
1690 stackImg->mStartAvma = start;
1691 if (nToCopy > 0) {
1692 MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy);
1693 memcpy(&stackImg->mContents[0], (void*)start, nToCopy);
1694 }
1695
1696 // Unwind it.
1697 const int MAX_TEST_FRAMES = 64;
1698 uintptr_t framePCs[MAX_TEST_FRAMES];
1699 uintptr_t frameSPs[MAX_TEST_FRAMES];
1700 size_t framesAvail = mozilla::ArrayLength(framePCs);
1701 size_t framesUsed = 0;
1702 size_t framePointerFramesAcquired = 0;
1703 aLUL->Unwind(&framePCs[0], &frameSPs[0], &framesUsed,
1704 &framePointerFramesAcquired, framesAvail, &startRegs, stackImg);
1705
1706 delete stackImg;
1707
1708 // if (0) {
1709 // // Show what we have.
1710 // fprintf(stderr, "Got %d frames:\n", (int)framesUsed);
1711 // for (size_t i = 0; i < framesUsed; i++) {
1712 // fprintf(stderr, " [%2d] SP %p PC %p\n",
1713 // (int)i, (void*)frameSPs[i], (void*)framePCs[i]);
1714 // }
1715 // fprintf(stderr, "\n");
1716 //}
1717
1718 // Check to see if there's a consistent binding between digits in
1719 // the director string ('1' .. '8') and the PC values acquired by
1720 // the unwind. If there isn't, the unwinding has failed somehow.
1721 uintptr_t binding[8]; // binding for '1' .. binding for '8'
1722 memset((void*)binding, 0, sizeof(binding));
1723
1724 // The general plan is to work backwards along the director string
1725 // and forwards along the framePCs array. Doing so corresponds to
1726 // working outwards from the innermost frame of the recursive test set.
1727 const char* cursor = dstring;
1728
1729 // Find the end. This leaves |cursor| two bytes past the first
1730 // character we want to look at -- see comment below.
1731 while (*cursor) cursor++;
1732
1733 // Counts the number of consistent frames.
1734 size_t nConsistent = 0;
1735
1736 // Iterate back to the start of the director string. The starting
1737 // points are a bit complex. We can't use framePCs[0] because that
1738 // contains the PC in this frame (above). We can't use framePCs[1]
1739 // because that will contain the PC at return point in the recursive
1740 // test group (TestFn[1-8]) for their call "out" to this function,
1741 // GetAndCheckStackTrace. Although LUL will compute a correct
1742 // return address, that will not be the same return address as for a
1743 // recursive call out of the the function to another function in the
1744 // group. Hence we can only start consistency checking at
1745 // framePCs[2].
1746 //
1747 // To be consistent, then, we must ignore the last element in the
1748 // director string as that corresponds to framePCs[1]. Hence the
1749 // start points are: framePCs[2] and the director string 2 bytes
1750 // before the terminating zero.
1751 //
1752 // Also as a result of this, the number of consistent frames counted
1753 // will always be one less than the length of the director string
1754 // (not including its terminating zero).
1755 size_t frameIx;
1756 for (cursor = cursor - 2, frameIx = 2;
1757 cursor >= dstring && frameIx < framesUsed; cursor--, frameIx++) {
1758 char c = *cursor;
1759 uintptr_t pc = framePCs[frameIx];
1760 // If this doesn't hold, the director string is ill-formed.
1761 MOZ_ASSERT(c >= '1' && c <= '8');
1762 int n = ((int)c) - ((int)'1');
1763 if (binding[n] == 0) {
1764 // There's no binding for |c| yet, so install |pc| and carry on.
1765 binding[n] = pc;
1766 nConsistent++;
1767 continue;
1768 }
1769 // There's a pre-existing binding for |c|. Check it's consistent.
1770 if (binding[n] != pc) {
1771 // Not consistent. Give up now.
1772 break;
1773 }
1774 // Consistent. Keep going.
1775 nConsistent++;
1776 }
1777
1778 // So, did we succeed?
1779 bool passed = nConsistent + 1 == strlen(dstring);
1780
1781 // Show the results.
1782 char buf[200];
1783 SprintfLiteral(buf, "LULUnitTest: dstring = %s\n", dstring);
1784 buf[sizeof(buf) - 1] = 0;
1785 aLUL->mLog(buf);
1786 SprintfLiteral(buf, "LULUnitTest: %d consistent, %d in dstring: %s\n",
1787 (int)nConsistent, (int)strlen(dstring),
1788 passed ? "PASS" : "FAIL");
1789 buf[sizeof(buf) - 1] = 0;
1790 aLUL->mLog(buf);
1791
1792 return passed;
1793 }
1794
1795 // Macro magic to create a set of 8 mutually recursive functions with
1796 // varying frame sizes. These will recurse amongst themselves as
1797 // specified by |strP|, the directory string, and call
1798 // GetAndCheckStackTrace when the string becomes empty, passing it the
1799 // original value of the string. This checks the result, printing
1800 // results on |aLUL|'s logging sink, and also returns a boolean
1801 // indicating whether or not the results are acceptable (correct).
1802
1803 #define DECL_TEST_FN(NAME) \
1804 bool NAME(LUL* aLUL, const char* strPorig, const char* strP);
1805
1806 #define GEN_TEST_FN(NAME, FRAMESIZE) \
1807 bool NAME(LUL* aLUL, const char* strPorig, const char* strP) { \
1808 /* Create a frame of size (at least) FRAMESIZE, so that the */ \
1809 /* 8 functions created by this macro offer some variation in frame */ \
1810 /* sizes. This isn't as simple as it might seem, since a clever */ \
1811 /* optimizing compiler (eg, clang-5) detects that the array is unused */ \
1812 /* and removes it. We try to defeat this by passing it to a function */ \
1813 /* in a different compilation unit, and hoping that clang does not */ \
1814 /* notice that the call is a no-op. */ \
1815 char space[FRAMESIZE]; \
1816 Unused << write(1, space, 0); /* write zero bytes of |space| to stdout */ \
1817 \
1818 if (*strP == '\0') { \
1819 /* We've come to the end of the director string. */ \
1820 /* Take a stack snapshot. */ \
1821 return GetAndCheckStackTrace(aLUL, strPorig); \
1822 } else { \
1823 /* Recurse onwards. This is a bit subtle. The obvious */ \
1824 /* thing to do here is call onwards directly, from within the */ \
1825 /* arms of the case statement. That gives a problem in that */ \
1826 /* there will be multiple return points inside each function when */ \
1827 /* unwinding, so it will be difficult to check for consistency */ \
1828 /* against the director string. Instead, we make an indirect */ \
1829 /* call, so as to guarantee that there is only one call site */ \
1830 /* within each function. This does assume that the compiler */ \
1831 /* won't transform it back to the simple direct-call form. */ \
1832 /* To discourage it from doing so, the call is bracketed with */ \
1833 /* __asm__ __volatile__ sections so as to make it not-movable. */ \
1834 bool (*nextFn)(LUL*, const char*, const char*) = NULL; \
1835 switch (*strP) { \
1836 case '1': \
1837 nextFn = TestFn1; \
1838 break; \
1839 case '2': \
1840 nextFn = TestFn2; \
1841 break; \
1842 case '3': \
1843 nextFn = TestFn3; \
1844 break; \
1845 case '4': \
1846 nextFn = TestFn4; \
1847 break; \
1848 case '5': \
1849 nextFn = TestFn5; \
1850 break; \
1851 case '6': \
1852 nextFn = TestFn6; \
1853 break; \
1854 case '7': \
1855 nextFn = TestFn7; \
1856 break; \
1857 case '8': \
1858 nextFn = TestFn8; \
1859 break; \
1860 default: \
1861 nextFn = TestFn8; \
1862 break; \
1863 } \
1864 /* "use" |space| immediately after the recursive call, */ \
1865 /* so as to dissuade clang from deallocating the space while */ \
1866 /* the call is active, or otherwise messing with the stack frame. */ \
1867 __asm__ __volatile__("" ::: "cc", "memory"); \
1868 bool passed = nextFn(aLUL, strPorig, strP + 1); \
1869 Unused << write(1, space, 0); \
1870 __asm__ __volatile__("" ::: "cc", "memory"); \
1871 return passed; \
1872 } \
1873 }
1874
1875 // The test functions are mutually recursive, so it is necessary to
1876 // declare them before defining them.
1877 DECL_TEST_FN(TestFn1)
DECL_TEST_FN(TestFn2)1878 DECL_TEST_FN(TestFn2)
1879 DECL_TEST_FN(TestFn3)
1880 DECL_TEST_FN(TestFn4)
1881 DECL_TEST_FN(TestFn5)
1882 DECL_TEST_FN(TestFn6)
1883 DECL_TEST_FN(TestFn7)
1884 DECL_TEST_FN(TestFn8)
1885
1886 GEN_TEST_FN(TestFn1, 123)
1887 GEN_TEST_FN(TestFn2, 456)
1888 GEN_TEST_FN(TestFn3, 789)
1889 GEN_TEST_FN(TestFn4, 23)
1890 GEN_TEST_FN(TestFn5, 47)
1891 GEN_TEST_FN(TestFn6, 117)
1892 GEN_TEST_FN(TestFn7, 1)
1893 GEN_TEST_FN(TestFn8, 99)
1894
1895 // This starts the test sequence going. Call here to generate a
1896 // sequence of calls as directed by the string |dstring|. The call
1897 // sequence will, from its innermost frame, finish by calling
1898 // GetAndCheckStackTrace() and passing it |dstring|.
1899 // GetAndCheckStackTrace() will unwind the stack, check consistency
1900 // of those results against |dstring|, and print a pass/fail message
1901 // to aLUL's logging sink. It also updates the counters in *aNTests
1902 // and aNTestsPassed.
1903 __attribute__((noinline)) void TestUnw(/*OUT*/ int* aNTests,
1904 /*OUT*/ int* aNTestsPassed, LUL* aLUL,
1905 const char* dstring) {
1906 // Ensure that the stack has at least this much space on it. This
1907 // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes
1908 // and hand it to LUL. Safe in the sense that no segfault can
1909 // happen because the stack is at least this big. This is all
1910 // somewhat dubious in the sense that a sufficiently clever compiler
1911 // (clang, for one) can figure out that space[] is unused and delete
1912 // it from the frame. Hence the somewhat elaborate hoop jumping to
1913 // fill it up before the call and to at least appear to use the
1914 // value afterwards.
1915 int i;
1916 volatile char space[LUL_UNIT_TEST_STACK_SIZE];
1917 for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
1918 space[i] = (char)(i & 0x7F);
1919 }
1920
1921 // Really run the test.
1922 bool passed = TestFn1(aLUL, dstring, dstring);
1923
1924 // Appear to use space[], by visiting the value to compute some kind
1925 // of checksum, and then (apparently) using the checksum.
1926 int sum = 0;
1927 for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
1928 // If this doesn't fool LLVM, I don't know what will.
1929 sum += space[i] - 3 * i;
1930 }
1931 __asm__ __volatile__("" : : "r"(sum));
1932
1933 // Update the counters.
1934 (*aNTests)++;
1935 if (passed) {
1936 (*aNTestsPassed)++;
1937 }
1938 }
1939
RunLulUnitTests(int * aNTests,int * aNTestsPassed,LUL * aLUL)1940 void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed,
1941 LUL* aLUL) {
1942 aLUL->mLog(":\n");
1943 aLUL->mLog("LULUnitTest: BEGIN\n");
1944 *aNTests = *aNTestsPassed = 0;
1945 TestUnw(aNTests, aNTestsPassed, aLUL, "11111111");
1946 TestUnw(aNTests, aNTestsPassed, aLUL, "11222211");
1947 TestUnw(aNTests, aNTestsPassed, aLUL, "111222333");
1948 TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212");
1949 TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258");
1950 TestUnw(aNTests, aNTestsPassed, aLUL,
1951 "123456781122334455667788777777777777777777777");
1952 aLUL->mLog("LULUnitTest: END\n");
1953 aLUL->mLog(":\n");
1954 }
1955
1956 } // namespace lul
1957