1 //===-- tsan_rtl.h ----------------------------------------------*- C++ -*-===//
2 //
3 // This file is distributed under the University of Illinois Open Source
4 // License. See LICENSE.TXT for details.
5 //
6 //===----------------------------------------------------------------------===//
7 //
8 // This file is a part of ThreadSanitizer (TSan), a race detector.
9 //
10 // Main internal TSan header file.
11 //
12 // Ground rules:
13 //   - C++ run-time should not be used (static CTORs, RTTI, exceptions, static
14 //     function-scope locals)
15 //   - All functions/classes/etc reside in namespace __tsan, except for those
16 //     declared in tsan_interface.h.
17 //   - Platform-specific files should be used instead of ifdefs (*).
18 //   - No system headers included in header files (*).
19 //   - Platform specific headres included only into platform-specific files (*).
20 //
21 //  (*) Except when inlining is critical for performance.
22 //===----------------------------------------------------------------------===//
23 
24 #ifndef TSAN_RTL_H
25 #define TSAN_RTL_H
26 
27 #include "sanitizer_common/sanitizer_allocator.h"
28 #include "sanitizer_common/sanitizer_allocator_internal.h"
29 #include "sanitizer_common/sanitizer_asm.h"
30 #include "sanitizer_common/sanitizer_common.h"
31 #include "sanitizer_common/sanitizer_deadlock_detector_interface.h"
32 #include "sanitizer_common/sanitizer_libignore.h"
33 #include "sanitizer_common/sanitizer_suppressions.h"
34 #include "sanitizer_common/sanitizer_thread_registry.h"
35 #include "tsan_clock.h"
36 #include "tsan_defs.h"
37 #include "tsan_flags.h"
38 #include "tsan_sync.h"
39 #include "tsan_trace.h"
40 #include "tsan_vector.h"
41 #include "tsan_report.h"
42 #include "tsan_platform.h"
43 #include "tsan_mutexset.h"
44 #include "tsan_ignoreset.h"
45 #include "tsan_stack_trace.h"
46 
47 #if SANITIZER_WORDSIZE != 64
48 # error "ThreadSanitizer is supported only on 64-bit platforms"
49 #endif
50 
51 namespace __tsan {
52 
53 #if !SANITIZER_GO
54 struct MapUnmapCallback;
55 #if defined(__mips64) || defined(__aarch64__) || defined(__powerpc__)
56 static const uptr kAllocatorRegionSizeLog = 20;
57 static const uptr kAllocatorNumRegions =
58     SANITIZER_MMAP_RANGE_SIZE >> kAllocatorRegionSizeLog;
59 typedef TwoLevelByteMap<(kAllocatorNumRegions >> 12), 1 << 12,
60     MapUnmapCallback> ByteMap;
61 struct AP32 {
62   static const uptr kSpaceBeg = 0;
63   static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE;
64   static const uptr kMetadataSize = 0;
65   typedef __sanitizer::CompactSizeClassMap SizeClassMap;
66   static const uptr kRegionSizeLog = kAllocatorRegionSizeLog;
67   typedef __tsan::ByteMap ByteMap;
68   typedef __tsan::MapUnmapCallback MapUnmapCallback;
69   static const uptr kFlags = 0;
70 };
71 typedef SizeClassAllocator32<AP32> PrimaryAllocator;
72 #else
73 struct AP64 {  // Allocator64 parameters. Deliberately using a short name.
74   static const uptr kSpaceBeg = Mapping::kHeapMemBeg;
75   static const uptr kSpaceSize = Mapping::kHeapMemEnd - Mapping::kHeapMemBeg;
76   static const uptr kMetadataSize = 0;
77   typedef DefaultSizeClassMap SizeClassMap;
78   typedef __tsan::MapUnmapCallback MapUnmapCallback;
79   static const uptr kFlags = 0;
80 };
81 typedef SizeClassAllocator64<AP64> PrimaryAllocator;
82 #endif
83 typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache;
84 typedef LargeMmapAllocator<MapUnmapCallback> SecondaryAllocator;
85 typedef CombinedAllocator<PrimaryAllocator, AllocatorCache,
86     SecondaryAllocator> Allocator;
87 Allocator *allocator();
88 #endif
89 
90 void TsanCheckFailed(const char *file, int line, const char *cond,
91                      u64 v1, u64 v2);
92 
93 const u64 kShadowRodata = (u64)-1;  // .rodata shadow marker
94 
95 // FastState (from most significant bit):
96 //   ignore          : 1
97 //   tid             : kTidBits
98 //   unused          : -
99 //   history_size    : 3
100 //   epoch           : kClkBits
101 class FastState {
102  public:
FastState(u64 tid,u64 epoch)103   FastState(u64 tid, u64 epoch) {
104     x_ = tid << kTidShift;
105     x_ |= epoch;
106     DCHECK_EQ(tid, this->tid());
107     DCHECK_EQ(epoch, this->epoch());
108     DCHECK_EQ(GetIgnoreBit(), false);
109   }
110 
FastState(u64 x)111   explicit FastState(u64 x)
112       : x_(x) {
113   }
114 
raw()115   u64 raw() const {
116     return x_;
117   }
118 
tid()119   u64 tid() const {
120     u64 res = (x_ & ~kIgnoreBit) >> kTidShift;
121     return res;
122   }
123 
TidWithIgnore()124   u64 TidWithIgnore() const {
125     u64 res = x_ >> kTidShift;
126     return res;
127   }
128 
epoch()129   u64 epoch() const {
130     u64 res = x_ & ((1ull << kClkBits) - 1);
131     return res;
132   }
133 
IncrementEpoch()134   void IncrementEpoch() {
135     u64 old_epoch = epoch();
136     x_ += 1;
137     DCHECK_EQ(old_epoch + 1, epoch());
138     (void)old_epoch;
139   }
140 
SetIgnoreBit()141   void SetIgnoreBit() { x_ |= kIgnoreBit; }
ClearIgnoreBit()142   void ClearIgnoreBit() { x_ &= ~kIgnoreBit; }
GetIgnoreBit()143   bool GetIgnoreBit() const { return (s64)x_ < 0; }
144 
SetHistorySize(int hs)145   void SetHistorySize(int hs) {
146     CHECK_GE(hs, 0);
147     CHECK_LE(hs, 7);
148     x_ = (x_ & ~(kHistoryMask << kHistoryShift)) | (u64(hs) << kHistoryShift);
149   }
150 
151   ALWAYS_INLINE
GetHistorySize()152   int GetHistorySize() const {
153     return (int)((x_ >> kHistoryShift) & kHistoryMask);
154   }
155 
ClearHistorySize()156   void ClearHistorySize() {
157     SetHistorySize(0);
158   }
159 
160   ALWAYS_INLINE
GetTracePos()161   u64 GetTracePos() const {
162     const int hs = GetHistorySize();
163     // When hs == 0, the trace consists of 2 parts.
164     const u64 mask = (1ull << (kTracePartSizeBits + hs + 1)) - 1;
165     return epoch() & mask;
166   }
167 
168  private:
169   friend class Shadow;
170   static const int kTidShift = 64 - kTidBits - 1;
171   static const u64 kIgnoreBit = 1ull << 63;
172   static const u64 kFreedBit = 1ull << 63;
173   static const u64 kHistoryShift = kClkBits;
174   static const u64 kHistoryMask = 7;
175   u64 x_;
176 };
177 
178 // Shadow (from most significant bit):
179 //   freed           : 1
180 //   tid             : kTidBits
181 //   is_atomic       : 1
182 //   is_read         : 1
183 //   size_log        : 2
184 //   addr0           : 3
185 //   epoch           : kClkBits
186 class Shadow : public FastState {
187  public:
Shadow(u64 x)188   explicit Shadow(u64 x)
189       : FastState(x) {
190   }
191 
Shadow(const FastState & s)192   explicit Shadow(const FastState &s)
193       : FastState(s.x_) {
194     ClearHistorySize();
195   }
196 
SetAddr0AndSizeLog(u64 addr0,unsigned kAccessSizeLog)197   void SetAddr0AndSizeLog(u64 addr0, unsigned kAccessSizeLog) {
198     DCHECK_EQ((x_ >> kClkBits) & 31, 0);
199     DCHECK_LE(addr0, 7);
200     DCHECK_LE(kAccessSizeLog, 3);
201     x_ |= ((kAccessSizeLog << 3) | addr0) << kClkBits;
202     DCHECK_EQ(kAccessSizeLog, size_log());
203     DCHECK_EQ(addr0, this->addr0());
204   }
205 
SetWrite(unsigned kAccessIsWrite)206   void SetWrite(unsigned kAccessIsWrite) {
207     DCHECK_EQ(x_ & kReadBit, 0);
208     if (!kAccessIsWrite)
209       x_ |= kReadBit;
210     DCHECK_EQ(kAccessIsWrite, IsWrite());
211   }
212 
SetAtomic(bool kIsAtomic)213   void SetAtomic(bool kIsAtomic) {
214     DCHECK(!IsAtomic());
215     if (kIsAtomic)
216       x_ |= kAtomicBit;
217     DCHECK_EQ(IsAtomic(), kIsAtomic);
218   }
219 
IsAtomic()220   bool IsAtomic() const {
221     return x_ & kAtomicBit;
222   }
223 
IsZero()224   bool IsZero() const {
225     return x_ == 0;
226   }
227 
TidsAreEqual(const Shadow s1,const Shadow s2)228   static inline bool TidsAreEqual(const Shadow s1, const Shadow s2) {
229     u64 shifted_xor = (s1.x_ ^ s2.x_) >> kTidShift;
230     DCHECK_EQ(shifted_xor == 0, s1.TidWithIgnore() == s2.TidWithIgnore());
231     return shifted_xor == 0;
232   }
233 
234   static ALWAYS_INLINE
Addr0AndSizeAreEqual(const Shadow s1,const Shadow s2)235   bool Addr0AndSizeAreEqual(const Shadow s1, const Shadow s2) {
236     u64 masked_xor = ((s1.x_ ^ s2.x_) >> kClkBits) & 31;
237     return masked_xor == 0;
238   }
239 
TwoRangesIntersect(Shadow s1,Shadow s2,unsigned kS2AccessSize)240   static ALWAYS_INLINE bool TwoRangesIntersect(Shadow s1, Shadow s2,
241       unsigned kS2AccessSize) {
242     bool res = false;
243     u64 diff = s1.addr0() - s2.addr0();
244     if ((s64)diff < 0) {  // s1.addr0 < s2.addr0  // NOLINT
245       // if (s1.addr0() + size1) > s2.addr0()) return true;
246       if (s1.size() > -diff)
247         res = true;
248     } else {
249       // if (s2.addr0() + kS2AccessSize > s1.addr0()) return true;
250       if (kS2AccessSize > diff)
251         res = true;
252     }
253     DCHECK_EQ(res, TwoRangesIntersectSlow(s1, s2));
254     DCHECK_EQ(res, TwoRangesIntersectSlow(s2, s1));
255     return res;
256   }
257 
addr0()258   u64 ALWAYS_INLINE addr0() const { return (x_ >> kClkBits) & 7; }
size()259   u64 ALWAYS_INLINE size() const { return 1ull << size_log(); }
IsWrite()260   bool ALWAYS_INLINE IsWrite() const { return !IsRead(); }
IsRead()261   bool ALWAYS_INLINE IsRead() const { return x_ & kReadBit; }
262 
263   // The idea behind the freed bit is as follows.
264   // When the memory is freed (or otherwise unaccessible) we write to the shadow
265   // values with tid/epoch related to the free and the freed bit set.
266   // During memory accesses processing the freed bit is considered
267   // as msb of tid. So any access races with shadow with freed bit set
268   // (it is as if write from a thread with which we never synchronized before).
269   // This allows us to detect accesses to freed memory w/o additional
270   // overheads in memory access processing and at the same time restore
271   // tid/epoch of free.
MarkAsFreed()272   void MarkAsFreed() {
273      x_ |= kFreedBit;
274   }
275 
IsFreed()276   bool IsFreed() const {
277     return x_ & kFreedBit;
278   }
279 
GetFreedAndReset()280   bool GetFreedAndReset() {
281     bool res = x_ & kFreedBit;
282     x_ &= ~kFreedBit;
283     return res;
284   }
285 
IsBothReadsOrAtomic(bool kIsWrite,bool kIsAtomic)286   bool ALWAYS_INLINE IsBothReadsOrAtomic(bool kIsWrite, bool kIsAtomic) const {
287     bool v = x_ & ((u64(kIsWrite ^ 1) << kReadShift)
288         | (u64(kIsAtomic) << kAtomicShift));
289     DCHECK_EQ(v, (!IsWrite() && !kIsWrite) || (IsAtomic() && kIsAtomic));
290     return v;
291   }
292 
IsRWNotWeaker(bool kIsWrite,bool kIsAtomic)293   bool ALWAYS_INLINE IsRWNotWeaker(bool kIsWrite, bool kIsAtomic) const {
294     bool v = ((x_ >> kReadShift) & 3)
295         <= u64((kIsWrite ^ 1) | (kIsAtomic << 1));
296     DCHECK_EQ(v, (IsAtomic() < kIsAtomic) ||
297         (IsAtomic() == kIsAtomic && !IsWrite() <= !kIsWrite));
298     return v;
299   }
300 
IsRWWeakerOrEqual(bool kIsWrite,bool kIsAtomic)301   bool ALWAYS_INLINE IsRWWeakerOrEqual(bool kIsWrite, bool kIsAtomic) const {
302     bool v = ((x_ >> kReadShift) & 3)
303         >= u64((kIsWrite ^ 1) | (kIsAtomic << 1));
304     DCHECK_EQ(v, (IsAtomic() > kIsAtomic) ||
305         (IsAtomic() == kIsAtomic && !IsWrite() >= !kIsWrite));
306     return v;
307   }
308 
309  private:
310   static const u64 kReadShift   = 5 + kClkBits;
311   static const u64 kReadBit     = 1ull << kReadShift;
312   static const u64 kAtomicShift = 6 + kClkBits;
313   static const u64 kAtomicBit   = 1ull << kAtomicShift;
314 
size_log()315   u64 size_log() const { return (x_ >> (3 + kClkBits)) & 3; }
316 
TwoRangesIntersectSlow(const Shadow s1,const Shadow s2)317   static bool TwoRangesIntersectSlow(const Shadow s1, const Shadow s2) {
318     if (s1.addr0() == s2.addr0()) return true;
319     if (s1.addr0() < s2.addr0() && s1.addr0() + s1.size() > s2.addr0())
320       return true;
321     if (s2.addr0() < s1.addr0() && s2.addr0() + s2.size() > s1.addr0())
322       return true;
323     return false;
324   }
325 };
326 
327 struct ThreadSignalContext;
328 
329 struct JmpBuf {
330   uptr sp;
331   uptr mangled_sp;
332   int int_signal_send;
333   bool in_blocking_func;
334   uptr in_signal_handler;
335   uptr *shadow_stack_pos;
336 };
337 
338 // A Processor represents a physical thread, or a P for Go.
339 // It is used to store internal resources like allocate cache, and does not
340 // participate in race-detection logic (invisible to end user).
341 // In C++ it is tied to an OS thread just like ThreadState, however ideally
342 // it should be tied to a CPU (this way we will have fewer allocator caches).
343 // In Go it is tied to a P, so there are significantly fewer Processor's than
344 // ThreadState's (which are tied to Gs).
345 // A ThreadState must be wired with a Processor to handle events.
346 struct Processor {
347   ThreadState *thr; // currently wired thread, or nullptr
348 #if !SANITIZER_GO
349   AllocatorCache alloc_cache;
350   InternalAllocatorCache internal_alloc_cache;
351 #endif
352   DenseSlabAllocCache block_cache;
353   DenseSlabAllocCache sync_cache;
354   DenseSlabAllocCache clock_cache;
355   DDPhysicalThread *dd_pt;
356 };
357 
358 #if !SANITIZER_GO
359 // ScopedGlobalProcessor temporary setups a global processor for the current
360 // thread, if it does not have one. Intended for interceptors that can run
361 // at the very thread end, when we already destroyed the thread processor.
362 struct ScopedGlobalProcessor {
363   ScopedGlobalProcessor();
364   ~ScopedGlobalProcessor();
365 };
366 #endif
367 
368 // This struct is stored in TLS.
369 struct ThreadState {
370   FastState fast_state;
371   // Synch epoch represents the threads's epoch before the last synchronization
372   // action. It allows to reduce number of shadow state updates.
373   // For example, fast_synch_epoch=100, last write to addr X was at epoch=150,
374   // if we are processing write to X from the same thread at epoch=200,
375   // we do nothing, because both writes happen in the same 'synch epoch'.
376   // That is, if another memory access does not race with the former write,
377   // it does not race with the latter as well.
378   // QUESTION: can we can squeeze this into ThreadState::Fast?
379   // E.g. ThreadState::Fast is a 44-bit, 32 are taken by synch_epoch and 12 are
380   // taken by epoch between synchs.
381   // This way we can save one load from tls.
382   u64 fast_synch_epoch;
383   // This is a slow path flag. On fast path, fast_state.GetIgnoreBit() is read.
384   // We do not distinguish beteween ignoring reads and writes
385   // for better performance.
386   int ignore_reads_and_writes;
387   int ignore_sync;
388   int suppress_reports;
389   // Go does not support ignores.
390 #if !SANITIZER_GO
391   IgnoreSet mop_ignore_set;
392   IgnoreSet sync_ignore_set;
393 #endif
394   // C/C++ uses fixed size shadow stack embed into Trace.
395   // Go uses malloc-allocated shadow stack with dynamic size.
396   uptr *shadow_stack;
397   uptr *shadow_stack_end;
398   uptr *shadow_stack_pos;
399   u64 *racy_shadow_addr;
400   u64 racy_state[2];
401   MutexSet mset;
402   ThreadClock clock;
403 #if !SANITIZER_GO
404   Vector<JmpBuf> jmp_bufs;
405   int ignore_interceptors;
406 #endif
407 #if TSAN_COLLECT_STATS
408   u64 stat[StatCnt];
409 #endif
410   const int tid;
411   const int unique_id;
412   bool in_symbolizer;
413   bool in_ignored_lib;
414   bool is_inited;
415   bool is_dead;
416   bool is_freeing;
417   bool is_vptr_access;
418   const uptr stk_addr;
419   const uptr stk_size;
420   const uptr tls_addr;
421   const uptr tls_size;
422   ThreadContext *tctx;
423 
424 #if SANITIZER_DEBUG && !SANITIZER_GO
425   InternalDeadlockDetector internal_deadlock_detector;
426 #endif
427   DDLogicalThread *dd_lt;
428 
429   // Current wired Processor, or nullptr. Required to handle any events.
430   Processor *proc1;
431 #if !SANITIZER_GO
procThreadState432   Processor *proc() { return proc1; }
433 #else
434   Processor *proc();
435 #endif
436 
437   atomic_uintptr_t in_signal_handler;
438   ThreadSignalContext *signal_ctx;
439 
440 #if !SANITIZER_GO
441   u32 last_sleep_stack_id;
442   ThreadClock last_sleep_clock;
443 #endif
444 
445   // Set in regions of runtime that must be signal-safe and fork-safe.
446   // If set, malloc must not be called.
447   int nomalloc;
448 
449   const ReportDesc *current_report;
450 
451   explicit ThreadState(Context *ctx, int tid, int unique_id, u64 epoch,
452                        unsigned reuse_count,
453                        uptr stk_addr, uptr stk_size,
454                        uptr tls_addr, uptr tls_size);
455 };
456 
457 #if !SANITIZER_GO
458 #if SANITIZER_MAC || SANITIZER_ANDROID
459 ThreadState *cur_thread();
460 void cur_thread_finalize();
461 #else
462 __attribute__((tls_model("initial-exec")))
463 extern THREADLOCAL char cur_thread_placeholder[];
cur_thread()464 INLINE ThreadState *cur_thread() {
465   return reinterpret_cast<ThreadState *>(&cur_thread_placeholder);
466 }
cur_thread_finalize()467 INLINE void cur_thread_finalize() { }
468 #endif  // SANITIZER_MAC || SANITIZER_ANDROID
469 #endif  // SANITIZER_GO
470 
471 class ThreadContext : public ThreadContextBase {
472  public:
473   explicit ThreadContext(int tid);
474   ~ThreadContext();
475   ThreadState *thr;
476   u32 creation_stack_id;
477   SyncClock sync;
478   // Epoch at which the thread had started.
479   // If we see an event from the thread stamped by an older epoch,
480   // the event is from a dead thread that shared tid with this thread.
481   u64 epoch0;
482   u64 epoch1;
483 
484   // Override superclass callbacks.
485   void OnDead() override;
486   void OnJoined(void *arg) override;
487   void OnFinished() override;
488   void OnStarted(void *arg) override;
489   void OnCreated(void *arg) override;
490   void OnReset() override;
491   void OnDetached(void *arg) override;
492 };
493 
494 struct RacyStacks {
495   MD5Hash hash[2];
496   bool operator==(const RacyStacks &other) const {
497     if (hash[0] == other.hash[0] && hash[1] == other.hash[1])
498       return true;
499     if (hash[0] == other.hash[1] && hash[1] == other.hash[0])
500       return true;
501     return false;
502   }
503 };
504 
505 struct RacyAddress {
506   uptr addr_min;
507   uptr addr_max;
508 };
509 
510 struct FiredSuppression {
511   ReportType type;
512   uptr pc_or_addr;
513   Suppression *supp;
514 };
515 
516 struct Context {
517   Context();
518 
519   bool initialized;
520   bool after_multithreaded_fork;
521 
522   MetaMap metamap;
523 
524   Mutex report_mtx;
525   int nreported;
526   int nmissed_expected;
527   atomic_uint64_t last_symbolize_time_ns;
528 
529   void *background_thread;
530   atomic_uint32_t stop_background_thread;
531 
532   ThreadRegistry *thread_registry;
533 
534   Mutex racy_mtx;
535   Vector<RacyStacks> racy_stacks;
536   Vector<RacyAddress> racy_addresses;
537   // Number of fired suppressions may be large enough.
538   Mutex fired_suppressions_mtx;
539   InternalMmapVector<FiredSuppression> fired_suppressions;
540   DDetector *dd;
541 
542   ClockAlloc clock_alloc;
543 
544   Flags flags;
545 
546   u64 stat[StatCnt];
547   u64 int_alloc_cnt[MBlockTypeCount];
548   u64 int_alloc_siz[MBlockTypeCount];
549 };
550 
551 extern Context *ctx;  // The one and the only global runtime context.
552 
flags()553 ALWAYS_INLINE Flags *flags() {
554   return &ctx->flags;
555 }
556 
557 struct ScopedIgnoreInterceptors {
ScopedIgnoreInterceptorsScopedIgnoreInterceptors558   ScopedIgnoreInterceptors() {
559 #if !SANITIZER_GO
560     cur_thread()->ignore_interceptors++;
561 #endif
562   }
563 
~ScopedIgnoreInterceptorsScopedIgnoreInterceptors564   ~ScopedIgnoreInterceptors() {
565 #if !SANITIZER_GO
566     cur_thread()->ignore_interceptors--;
567 #endif
568   }
569 };
570 
571 const char *GetObjectTypeFromTag(uptr tag);
572 const char *GetReportHeaderFromTag(uptr tag);
573 uptr TagFromShadowStackFrame(uptr pc);
574 
575 class ScopedReport {
576  public:
577   explicit ScopedReport(ReportType typ, uptr tag = kExternalTagNone);
578   ~ScopedReport();
579 
580   void AddMemoryAccess(uptr addr, uptr external_tag, Shadow s, StackTrace stack,
581                        const MutexSet *mset);
582   void AddStack(StackTrace stack, bool suppressable = false);
583   void AddThread(const ThreadContext *tctx, bool suppressable = false);
584   void AddThread(int unique_tid, bool suppressable = false);
585   void AddUniqueTid(int unique_tid);
586   void AddMutex(const SyncVar *s);
587   u64 AddMutex(u64 id);
588   void AddLocation(uptr addr, uptr size);
589   void AddSleep(u32 stack_id);
590   void SetCount(int count);
591 
592   const ReportDesc *GetReport() const;
593 
594  private:
595   ReportDesc *rep_;
596   // Symbolizer makes lots of intercepted calls. If we try to process them,
597   // at best it will cause deadlocks on internal mutexes.
598   ScopedIgnoreInterceptors ignore_interceptors_;
599 
600   void AddDeadMutex(u64 id);
601 
602   ScopedReport(const ScopedReport&);
603   void operator = (const ScopedReport&);
604 };
605 
606 ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack);
607 void RestoreStack(int tid, const u64 epoch, VarSizeStackTrace *stk,
608                   MutexSet *mset, uptr *tag = nullptr);
609 
610 // The stack could look like:
611 //   <start> | <main> | <foo> | tag | <bar>
612 // This will extract the tag and keep:
613 //   <start> | <main> | <foo> | <bar>
614 template<typename StackTraceTy>
615 void ExtractTagFromStack(StackTraceTy *stack, uptr *tag = nullptr) {
616   if (stack->size < 2) return;
617   uptr possible_tag_pc = stack->trace[stack->size - 2];
618   uptr possible_tag = TagFromShadowStackFrame(possible_tag_pc);
619   if (possible_tag == kExternalTagNone) return;
620   stack->trace_buffer[stack->size - 2] = stack->trace_buffer[stack->size - 1];
621   stack->size -= 1;
622   if (tag) *tag = possible_tag;
623 }
624 
625 template<typename StackTraceTy>
626 void ObtainCurrentStack(ThreadState *thr, uptr toppc, StackTraceTy *stack,
627                         uptr *tag = nullptr) {
628   uptr size = thr->shadow_stack_pos - thr->shadow_stack;
629   uptr start = 0;
630   if (size + !!toppc > kStackTraceMax) {
631     start = size + !!toppc - kStackTraceMax;
632     size = kStackTraceMax - !!toppc;
633   }
634   stack->Init(&thr->shadow_stack[start], size, toppc);
635   ExtractTagFromStack(stack, tag);
636 }
637 
638 
639 #if TSAN_COLLECT_STATS
640 void StatAggregate(u64 *dst, u64 *src);
641 void StatOutput(u64 *stat);
642 #endif
643 
644 void ALWAYS_INLINE StatInc(ThreadState *thr, StatType typ, u64 n = 1) {
645 #if TSAN_COLLECT_STATS
646   thr->stat[typ] += n;
647 #endif
648 }
StatSet(ThreadState * thr,StatType typ,u64 n)649 void ALWAYS_INLINE StatSet(ThreadState *thr, StatType typ, u64 n) {
650 #if TSAN_COLLECT_STATS
651   thr->stat[typ] = n;
652 #endif
653 }
654 
655 void MapShadow(uptr addr, uptr size);
656 void MapThreadTrace(uptr addr, uptr size, const char *name);
657 void DontNeedShadowFor(uptr addr, uptr size);
658 void InitializeShadowMemory();
659 void InitializeInterceptors();
660 void InitializeLibIgnore();
661 void InitializeDynamicAnnotations();
662 
663 void ForkBefore(ThreadState *thr, uptr pc);
664 void ForkParentAfter(ThreadState *thr, uptr pc);
665 void ForkChildAfter(ThreadState *thr, uptr pc);
666 
667 void ReportRace(ThreadState *thr);
668 bool OutputReport(ThreadState *thr, const ScopedReport &srep);
669 bool IsFiredSuppression(Context *ctx, ReportType type, StackTrace trace);
670 bool IsExpectedReport(uptr addr, uptr size);
671 void PrintMatchedBenignRaces();
672 
673 #if defined(TSAN_DEBUG_OUTPUT) && TSAN_DEBUG_OUTPUT >= 1
674 # define DPrintf Printf
675 #else
676 # define DPrintf(...)
677 #endif
678 
679 #if defined(TSAN_DEBUG_OUTPUT) && TSAN_DEBUG_OUTPUT >= 2
680 # define DPrintf2 Printf
681 #else
682 # define DPrintf2(...)
683 #endif
684 
685 u32 CurrentStackId(ThreadState *thr, uptr pc);
686 ReportStack *SymbolizeStackId(u32 stack_id);
687 void PrintCurrentStack(ThreadState *thr, uptr pc);
688 void PrintCurrentStackSlow(uptr pc);  // uses libunwind
689 
690 void Initialize(ThreadState *thr);
691 int Finalize(ThreadState *thr);
692 
693 void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write);
694 void OnUserFree(ThreadState *thr, uptr pc, uptr p, bool write);
695 
696 void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
697     int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic);
698 void MemoryAccessImpl(ThreadState *thr, uptr addr,
699     int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic,
700     u64 *shadow_mem, Shadow cur);
701 void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr,
702     uptr size, bool is_write);
703 void MemoryAccessRangeStep(ThreadState *thr, uptr pc, uptr addr,
704     uptr size, uptr step, bool is_write);
705 void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr,
706     int size, bool kAccessIsWrite, bool kIsAtomic);
707 
708 const int kSizeLog1 = 0;
709 const int kSizeLog2 = 1;
710 const int kSizeLog4 = 2;
711 const int kSizeLog8 = 3;
712 
MemoryRead(ThreadState * thr,uptr pc,uptr addr,int kAccessSizeLog)713 void ALWAYS_INLINE MemoryRead(ThreadState *thr, uptr pc,
714                                      uptr addr, int kAccessSizeLog) {
715   MemoryAccess(thr, pc, addr, kAccessSizeLog, false, false);
716 }
717 
MemoryWrite(ThreadState * thr,uptr pc,uptr addr,int kAccessSizeLog)718 void ALWAYS_INLINE MemoryWrite(ThreadState *thr, uptr pc,
719                                       uptr addr, int kAccessSizeLog) {
720   MemoryAccess(thr, pc, addr, kAccessSizeLog, true, false);
721 }
722 
MemoryReadAtomic(ThreadState * thr,uptr pc,uptr addr,int kAccessSizeLog)723 void ALWAYS_INLINE MemoryReadAtomic(ThreadState *thr, uptr pc,
724                                            uptr addr, int kAccessSizeLog) {
725   MemoryAccess(thr, pc, addr, kAccessSizeLog, false, true);
726 }
727 
MemoryWriteAtomic(ThreadState * thr,uptr pc,uptr addr,int kAccessSizeLog)728 void ALWAYS_INLINE MemoryWriteAtomic(ThreadState *thr, uptr pc,
729                                             uptr addr, int kAccessSizeLog) {
730   MemoryAccess(thr, pc, addr, kAccessSizeLog, true, true);
731 }
732 
733 void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size);
734 void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size);
735 void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size);
736 
737 void ThreadIgnoreBegin(ThreadState *thr, uptr pc, bool save_stack = true);
738 void ThreadIgnoreEnd(ThreadState *thr, uptr pc);
739 void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc, bool save_stack = true);
740 void ThreadIgnoreSyncEnd(ThreadState *thr, uptr pc);
741 
742 void FuncEntry(ThreadState *thr, uptr pc);
743 void FuncExit(ThreadState *thr);
744 
745 int ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached);
746 void ThreadStart(ThreadState *thr, int tid, tid_t os_id, bool workerthread);
747 void ThreadFinish(ThreadState *thr);
748 int ThreadTid(ThreadState *thr, uptr pc, uptr uid);
749 void ThreadJoin(ThreadState *thr, uptr pc, int tid);
750 void ThreadDetach(ThreadState *thr, uptr pc, int tid);
751 void ThreadFinalize(ThreadState *thr);
752 void ThreadSetName(ThreadState *thr, const char *name);
753 int ThreadCount(ThreadState *thr);
754 void ProcessPendingSignals(ThreadState *thr);
755 
756 Processor *ProcCreate();
757 void ProcDestroy(Processor *proc);
758 void ProcWire(Processor *proc, ThreadState *thr);
759 void ProcUnwire(Processor *proc, ThreadState *thr);
760 
761 // Note: the parameter is called flagz, because flags is already taken
762 // by the global function that returns flags.
763 void MutexCreate(ThreadState *thr, uptr pc, uptr addr, u32 flagz = 0);
764 void MutexDestroy(ThreadState *thr, uptr pc, uptr addr, u32 flagz = 0);
765 void MutexPreLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz = 0);
766 void MutexPostLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz = 0,
767     int rec = 1);
768 int  MutexUnlock(ThreadState *thr, uptr pc, uptr addr, u32 flagz = 0);
769 void MutexPreReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz = 0);
770 void MutexPostReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz = 0);
771 void MutexReadUnlock(ThreadState *thr, uptr pc, uptr addr);
772 void MutexReadOrWriteUnlock(ThreadState *thr, uptr pc, uptr addr);
773 void MutexRepair(ThreadState *thr, uptr pc, uptr addr);  // call on EOWNERDEAD
774 void MutexInvalidAccess(ThreadState *thr, uptr pc, uptr addr);
775 
776 void Acquire(ThreadState *thr, uptr pc, uptr addr);
777 // AcquireGlobal synchronizes the current thread with all other threads.
778 // In terms of happens-before relation, it draws a HB edge from all threads
779 // (where they happen to execute right now) to the current thread. We use it to
780 // handle Go finalizers. Namely, finalizer goroutine executes AcquireGlobal
781 // right before executing finalizers. This provides a coarse, but simple
782 // approximation of the actual required synchronization.
783 void AcquireGlobal(ThreadState *thr, uptr pc);
784 void Release(ThreadState *thr, uptr pc, uptr addr);
785 void ReleaseStore(ThreadState *thr, uptr pc, uptr addr);
786 void AfterSleep(ThreadState *thr, uptr pc);
787 void AcquireImpl(ThreadState *thr, uptr pc, SyncClock *c);
788 void ReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c);
789 void ReleaseStoreImpl(ThreadState *thr, uptr pc, SyncClock *c);
790 void AcquireReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c);
791 
792 // The hacky call uses custom calling convention and an assembly thunk.
793 // It is considerably faster that a normal call for the caller
794 // if it is not executed (it is intended for slow paths from hot functions).
795 // The trick is that the call preserves all registers and the compiler
796 // does not treat it as a call.
797 // If it does not work for you, use normal call.
798 #if !SANITIZER_DEBUG && defined(__x86_64__) && !SANITIZER_MAC
799 // The caller may not create the stack frame for itself at all,
800 // so we create a reserve stack frame for it (1024b must be enough).
801 #define HACKY_CALL(f) \
802   __asm__ __volatile__("sub $1024, %%rsp;" \
803                        CFI_INL_ADJUST_CFA_OFFSET(1024) \
804                        ".hidden " #f "_thunk;" \
805                        "call " #f "_thunk;" \
806                        "add $1024, %%rsp;" \
807                        CFI_INL_ADJUST_CFA_OFFSET(-1024) \
808                        ::: "memory", "cc");
809 #else
810 #define HACKY_CALL(f) f()
811 #endif
812 
813 void TraceSwitch(ThreadState *thr);
814 uptr TraceTopPC(ThreadState *thr);
815 uptr TraceSize();
816 uptr TraceParts();
817 Trace *ThreadTrace(int tid);
818 
819 extern "C" void __tsan_trace_switch();
TraceAddEvent(ThreadState * thr,FastState fs,EventType typ,u64 addr)820 void ALWAYS_INLINE TraceAddEvent(ThreadState *thr, FastState fs,
821                                         EventType typ, u64 addr) {
822   if (!kCollectHistory)
823     return;
824   DCHECK_GE((int)typ, 0);
825   DCHECK_LE((int)typ, 7);
826   DCHECK_EQ(GetLsb(addr, kEventPCBits), addr);
827   StatInc(thr, StatEvents);
828   u64 pos = fs.GetTracePos();
829   if (UNLIKELY((pos % kTracePartSize) == 0)) {
830 #if !SANITIZER_GO
831     HACKY_CALL(__tsan_trace_switch);
832 #else
833     TraceSwitch(thr);
834 #endif
835   }
836   Event *trace = (Event*)GetThreadTrace(fs.tid());
837   Event *evp = &trace[pos];
838   Event ev = (u64)addr | ((u64)typ << kEventPCBits);
839   *evp = ev;
840 }
841 
842 #if !SANITIZER_GO
HeapEnd()843 uptr ALWAYS_INLINE HeapEnd() {
844   return HeapMemEnd() + PrimaryAllocator::AdditionalSize();
845 }
846 #endif
847 
848 }  // namespace __tsan
849 
850 #endif  // TSAN_RTL_H
851