110d565efSmrg //===-- tsan_clock.cc -----------------------------------------------------===//
210d565efSmrg //
310d565efSmrg // This file is distributed under the University of Illinois Open Source
410d565efSmrg // License. See LICENSE.TXT for details.
510d565efSmrg //
610d565efSmrg //===----------------------------------------------------------------------===//
710d565efSmrg //
810d565efSmrg // This file is a part of ThreadSanitizer (TSan), a race detector.
910d565efSmrg //
1010d565efSmrg //===----------------------------------------------------------------------===//
1110d565efSmrg #include "tsan_clock.h"
1210d565efSmrg #include "tsan_rtl.h"
1310d565efSmrg #include "sanitizer_common/sanitizer_placement_new.h"
1410d565efSmrg
1510d565efSmrg // SyncClock and ThreadClock implement vector clocks for sync variables
1610d565efSmrg // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
1710d565efSmrg // ThreadClock contains fixed-size vector clock for maximum number of threads.
1810d565efSmrg // SyncClock contains growable vector clock for currently necessary number of
1910d565efSmrg // threads.
2010d565efSmrg // Together they implement very simple model of operations, namely:
2110d565efSmrg //
2210d565efSmrg // void ThreadClock::acquire(const SyncClock *src) {
2310d565efSmrg // for (int i = 0; i < kMaxThreads; i++)
2410d565efSmrg // clock[i] = max(clock[i], src->clock[i]);
2510d565efSmrg // }
2610d565efSmrg //
2710d565efSmrg // void ThreadClock::release(SyncClock *dst) const {
2810d565efSmrg // for (int i = 0; i < kMaxThreads; i++)
2910d565efSmrg // dst->clock[i] = max(dst->clock[i], clock[i]);
3010d565efSmrg // }
3110d565efSmrg //
3210d565efSmrg // void ThreadClock::ReleaseStore(SyncClock *dst) const {
3310d565efSmrg // for (int i = 0; i < kMaxThreads; i++)
3410d565efSmrg // dst->clock[i] = clock[i];
3510d565efSmrg // }
3610d565efSmrg //
3710d565efSmrg // void ThreadClock::acq_rel(SyncClock *dst) {
3810d565efSmrg // acquire(dst);
3910d565efSmrg // release(dst);
4010d565efSmrg // }
4110d565efSmrg //
4210d565efSmrg // Conformance to this model is extensively verified in tsan_clock_test.cc.
4310d565efSmrg // However, the implementation is significantly more complex. The complexity
4410d565efSmrg // allows to implement important classes of use cases in O(1) instead of O(N).
4510d565efSmrg //
4610d565efSmrg // The use cases are:
4710d565efSmrg // 1. Singleton/once atomic that has a single release-store operation followed
4810d565efSmrg // by zillions of acquire-loads (the acquire-load is O(1)).
4910d565efSmrg // 2. Thread-local mutex (both lock and unlock can be O(1)).
5010d565efSmrg // 3. Leaf mutex (unlock is O(1)).
5110d565efSmrg // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
5210d565efSmrg // 5. An atomic with a single writer (writes can be O(1)).
5310d565efSmrg // The implementation dynamically adopts to workload. So if an atomic is in
5410d565efSmrg // read-only phase, these reads will be O(1); if it later switches to read/write
5510d565efSmrg // phase, the implementation will correctly handle that by switching to O(N).
5610d565efSmrg //
5710d565efSmrg // Thread-safety note: all const operations on SyncClock's are conducted under
5810d565efSmrg // a shared lock; all non-const operations on SyncClock's are conducted under
5910d565efSmrg // an exclusive lock; ThreadClock's are private to respective threads and so
6010d565efSmrg // do not need any protection.
6110d565efSmrg //
6210d565efSmrg // Description of SyncClock state:
6310d565efSmrg // clk_ - variable size vector clock, low kClkBits hold timestamp,
6410d565efSmrg // the remaining bits hold "acquired" flag (the actual value is thread's
6510d565efSmrg // reused counter);
6610d565efSmrg // if acquried == thr->reused_, then the respective thread has already
67*c7a68eb7Smrg // acquired this clock (except possibly for dirty elements).
68*c7a68eb7Smrg // dirty_ - holds up to two indeces in the vector clock that other threads
6910d565efSmrg // need to acquire regardless of "acquired" flag value;
7010d565efSmrg // release_store_tid_ - denotes that the clock state is a result of
7110d565efSmrg // release-store operation by the thread with release_store_tid_ index.
7210d565efSmrg // release_store_reused_ - reuse count of release_store_tid_.
7310d565efSmrg
7410d565efSmrg // We don't have ThreadState in these methods, so this is an ugly hack that
7510d565efSmrg // works only in C++.
7610d565efSmrg #if !SANITIZER_GO
7710d565efSmrg # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
7810d565efSmrg #else
7910d565efSmrg # define CPP_STAT_INC(typ) (void)0
8010d565efSmrg #endif
8110d565efSmrg
8210d565efSmrg namespace __tsan {
8310d565efSmrg
ref_ptr(ClockBlock * cb)84*c7a68eb7Smrg static atomic_uint32_t *ref_ptr(ClockBlock *cb) {
85*c7a68eb7Smrg return reinterpret_cast<atomic_uint32_t *>(&cb->table[ClockBlock::kRefIdx]);
86*c7a68eb7Smrg }
87*c7a68eb7Smrg
88*c7a68eb7Smrg // Drop reference to the first level block idx.
UnrefClockBlock(ClockCache * c,u32 idx,uptr blocks)89*c7a68eb7Smrg static void UnrefClockBlock(ClockCache *c, u32 idx, uptr blocks) {
90*c7a68eb7Smrg ClockBlock *cb = ctx->clock_alloc.Map(idx);
91*c7a68eb7Smrg atomic_uint32_t *ref = ref_ptr(cb);
92*c7a68eb7Smrg u32 v = atomic_load(ref, memory_order_acquire);
93*c7a68eb7Smrg for (;;) {
94*c7a68eb7Smrg CHECK_GT(v, 0);
95*c7a68eb7Smrg if (v == 1)
96*c7a68eb7Smrg break;
97*c7a68eb7Smrg if (atomic_compare_exchange_strong(ref, &v, v - 1, memory_order_acq_rel))
98*c7a68eb7Smrg return;
99*c7a68eb7Smrg }
100*c7a68eb7Smrg // First level block owns second level blocks, so them as well.
101*c7a68eb7Smrg for (uptr i = 0; i < blocks; i++)
102*c7a68eb7Smrg ctx->clock_alloc.Free(c, cb->table[ClockBlock::kBlockIdx - i]);
103*c7a68eb7Smrg ctx->clock_alloc.Free(c, idx);
104*c7a68eb7Smrg }
105*c7a68eb7Smrg
ThreadClock(unsigned tid,unsigned reused)10610d565efSmrg ThreadClock::ThreadClock(unsigned tid, unsigned reused)
10710d565efSmrg : tid_(tid)
108*c7a68eb7Smrg , reused_(reused + 1) // 0 has special meaning
109*c7a68eb7Smrg , cached_idx_()
110*c7a68eb7Smrg , cached_size_()
111*c7a68eb7Smrg , cached_blocks_() {
11210d565efSmrg CHECK_LT(tid, kMaxTidInClock);
11310d565efSmrg CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
11410d565efSmrg nclk_ = tid_ + 1;
11510d565efSmrg last_acquire_ = 0;
11610d565efSmrg internal_memset(clk_, 0, sizeof(clk_));
11710d565efSmrg }
11810d565efSmrg
ResetCached(ClockCache * c)119*c7a68eb7Smrg void ThreadClock::ResetCached(ClockCache *c) {
120*c7a68eb7Smrg if (cached_idx_) {
121*c7a68eb7Smrg UnrefClockBlock(c, cached_idx_, cached_blocks_);
122*c7a68eb7Smrg cached_idx_ = 0;
123*c7a68eb7Smrg cached_size_ = 0;
124*c7a68eb7Smrg cached_blocks_ = 0;
125*c7a68eb7Smrg }
126*c7a68eb7Smrg }
127*c7a68eb7Smrg
acquire(ClockCache * c,SyncClock * src)128*c7a68eb7Smrg void ThreadClock::acquire(ClockCache *c, SyncClock *src) {
12910d565efSmrg DCHECK_LE(nclk_, kMaxTid);
13010d565efSmrg DCHECK_LE(src->size_, kMaxTid);
13110d565efSmrg CPP_STAT_INC(StatClockAcquire);
13210d565efSmrg
13310d565efSmrg // Check if it's empty -> no need to do anything.
13410d565efSmrg const uptr nclk = src->size_;
13510d565efSmrg if (nclk == 0) {
13610d565efSmrg CPP_STAT_INC(StatClockAcquireEmpty);
13710d565efSmrg return;
13810d565efSmrg }
13910d565efSmrg
14010d565efSmrg bool acquired = false;
14110d565efSmrg for (unsigned i = 0; i < kDirtyTids; i++) {
142*c7a68eb7Smrg SyncClock::Dirty dirty = src->dirty_[i];
143*c7a68eb7Smrg unsigned tid = dirty.tid;
14410d565efSmrg if (tid != kInvalidTid) {
145*c7a68eb7Smrg if (clk_[tid] < dirty.epoch) {
146*c7a68eb7Smrg clk_[tid] = dirty.epoch;
14710d565efSmrg acquired = true;
14810d565efSmrg }
14910d565efSmrg }
15010d565efSmrg }
15110d565efSmrg
152*c7a68eb7Smrg // Check if we've already acquired src after the last release operation on src
153*c7a68eb7Smrg if (tid_ >= nclk || src->elem(tid_).reused != reused_) {
15410d565efSmrg // O(N) acquire.
15510d565efSmrg CPP_STAT_INC(StatClockAcquireFull);
15610d565efSmrg nclk_ = max(nclk_, nclk);
157*c7a68eb7Smrg u64 *dst_pos = &clk_[0];
158*c7a68eb7Smrg for (ClockElem &src_elem : *src) {
159*c7a68eb7Smrg u64 epoch = src_elem.epoch;
160*c7a68eb7Smrg if (*dst_pos < epoch) {
161*c7a68eb7Smrg *dst_pos = epoch;
16210d565efSmrg acquired = true;
16310d565efSmrg }
164*c7a68eb7Smrg dst_pos++;
16510d565efSmrg }
16610d565efSmrg
16710d565efSmrg // Remember that this thread has acquired this clock.
16810d565efSmrg if (nclk > tid_)
16910d565efSmrg src->elem(tid_).reused = reused_;
170*c7a68eb7Smrg }
17110d565efSmrg
17210d565efSmrg if (acquired) {
17310d565efSmrg CPP_STAT_INC(StatClockAcquiredSomething);
174*c7a68eb7Smrg last_acquire_ = clk_[tid_];
175*c7a68eb7Smrg ResetCached(c);
17610d565efSmrg }
17710d565efSmrg }
17810d565efSmrg
release(ClockCache * c,SyncClock * dst)179*c7a68eb7Smrg void ThreadClock::release(ClockCache *c, SyncClock *dst) {
18010d565efSmrg DCHECK_LE(nclk_, kMaxTid);
18110d565efSmrg DCHECK_LE(dst->size_, kMaxTid);
18210d565efSmrg
18310d565efSmrg if (dst->size_ == 0) {
18410d565efSmrg // ReleaseStore will correctly set release_store_tid_,
18510d565efSmrg // which can be important for future operations.
18610d565efSmrg ReleaseStore(c, dst);
18710d565efSmrg return;
18810d565efSmrg }
18910d565efSmrg
19010d565efSmrg CPP_STAT_INC(StatClockRelease);
19110d565efSmrg // Check if we need to resize dst.
19210d565efSmrg if (dst->size_ < nclk_)
19310d565efSmrg dst->Resize(c, nclk_);
19410d565efSmrg
19510d565efSmrg // Check if we had not acquired anything from other threads
19610d565efSmrg // since the last release on dst. If so, we need to update
19710d565efSmrg // only dst->elem(tid_).
19810d565efSmrg if (dst->elem(tid_).epoch > last_acquire_) {
199*c7a68eb7Smrg UpdateCurrentThread(c, dst);
20010d565efSmrg if (dst->release_store_tid_ != tid_ ||
20110d565efSmrg dst->release_store_reused_ != reused_)
20210d565efSmrg dst->release_store_tid_ = kInvalidTid;
20310d565efSmrg return;
20410d565efSmrg }
20510d565efSmrg
20610d565efSmrg // O(N) release.
20710d565efSmrg CPP_STAT_INC(StatClockReleaseFull);
208*c7a68eb7Smrg dst->Unshare(c);
20910d565efSmrg // First, remember whether we've acquired dst.
21010d565efSmrg bool acquired = IsAlreadyAcquired(dst);
21110d565efSmrg if (acquired)
21210d565efSmrg CPP_STAT_INC(StatClockReleaseAcquired);
21310d565efSmrg // Update dst->clk_.
214*c7a68eb7Smrg dst->FlushDirty();
215*c7a68eb7Smrg uptr i = 0;
216*c7a68eb7Smrg for (ClockElem &ce : *dst) {
217*c7a68eb7Smrg ce.epoch = max(ce.epoch, clk_[i]);
21810d565efSmrg ce.reused = 0;
219*c7a68eb7Smrg i++;
22010d565efSmrg }
22110d565efSmrg // Clear 'acquired' flag in the remaining elements.
22210d565efSmrg if (nclk_ < dst->size_)
22310d565efSmrg CPP_STAT_INC(StatClockReleaseClearTail);
22410d565efSmrg for (uptr i = nclk_; i < dst->size_; i++)
22510d565efSmrg dst->elem(i).reused = 0;
22610d565efSmrg dst->release_store_tid_ = kInvalidTid;
22710d565efSmrg dst->release_store_reused_ = 0;
22810d565efSmrg // If we've acquired dst, remember this fact,
22910d565efSmrg // so that we don't need to acquire it on next acquire.
23010d565efSmrg if (acquired)
23110d565efSmrg dst->elem(tid_).reused = reused_;
23210d565efSmrg }
23310d565efSmrg
ReleaseStore(ClockCache * c,SyncClock * dst)234*c7a68eb7Smrg void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) {
23510d565efSmrg DCHECK_LE(nclk_, kMaxTid);
23610d565efSmrg DCHECK_LE(dst->size_, kMaxTid);
23710d565efSmrg CPP_STAT_INC(StatClockStore);
23810d565efSmrg
239*c7a68eb7Smrg if (dst->size_ == 0 && cached_idx_ != 0) {
240*c7a68eb7Smrg // Reuse the cached clock.
241*c7a68eb7Smrg // Note: we could reuse/cache the cached clock in more cases:
242*c7a68eb7Smrg // we could update the existing clock and cache it, or replace it with the
243*c7a68eb7Smrg // currently cached clock and release the old one. And for a shared
244*c7a68eb7Smrg // existing clock, we could replace it with the currently cached;
245*c7a68eb7Smrg // or unshare, update and cache. But, for simplicity, we currnetly reuse
246*c7a68eb7Smrg // cached clock only when the target clock is empty.
247*c7a68eb7Smrg dst->tab_ = ctx->clock_alloc.Map(cached_idx_);
248*c7a68eb7Smrg dst->tab_idx_ = cached_idx_;
249*c7a68eb7Smrg dst->size_ = cached_size_;
250*c7a68eb7Smrg dst->blocks_ = cached_blocks_;
251*c7a68eb7Smrg CHECK_EQ(dst->dirty_[0].tid, kInvalidTid);
252*c7a68eb7Smrg // The cached clock is shared (immutable),
253*c7a68eb7Smrg // so this is where we store the current clock.
254*c7a68eb7Smrg dst->dirty_[0].tid = tid_;
255*c7a68eb7Smrg dst->dirty_[0].epoch = clk_[tid_];
256*c7a68eb7Smrg dst->release_store_tid_ = tid_;
257*c7a68eb7Smrg dst->release_store_reused_ = reused_;
258*c7a68eb7Smrg // Rememeber that we don't need to acquire it in future.
259*c7a68eb7Smrg dst->elem(tid_).reused = reused_;
260*c7a68eb7Smrg // Grab a reference.
261*c7a68eb7Smrg atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
262*c7a68eb7Smrg return;
263*c7a68eb7Smrg }
264*c7a68eb7Smrg
26510d565efSmrg // Check if we need to resize dst.
26610d565efSmrg if (dst->size_ < nclk_)
26710d565efSmrg dst->Resize(c, nclk_);
26810d565efSmrg
26910d565efSmrg if (dst->release_store_tid_ == tid_ &&
27010d565efSmrg dst->release_store_reused_ == reused_ &&
27110d565efSmrg dst->elem(tid_).epoch > last_acquire_) {
27210d565efSmrg CPP_STAT_INC(StatClockStoreFast);
273*c7a68eb7Smrg UpdateCurrentThread(c, dst);
27410d565efSmrg return;
27510d565efSmrg }
27610d565efSmrg
27710d565efSmrg // O(N) release-store.
27810d565efSmrg CPP_STAT_INC(StatClockStoreFull);
279*c7a68eb7Smrg dst->Unshare(c);
280*c7a68eb7Smrg // Note: dst can be larger than this ThreadClock.
281*c7a68eb7Smrg // This is fine since clk_ beyond size is all zeros.
282*c7a68eb7Smrg uptr i = 0;
283*c7a68eb7Smrg for (ClockElem &ce : *dst) {
284*c7a68eb7Smrg ce.epoch = clk_[i];
28510d565efSmrg ce.reused = 0;
286*c7a68eb7Smrg i++;
28710d565efSmrg }
288*c7a68eb7Smrg for (uptr i = 0; i < kDirtyTids; i++)
289*c7a68eb7Smrg dst->dirty_[i].tid = kInvalidTid;
29010d565efSmrg dst->release_store_tid_ = tid_;
29110d565efSmrg dst->release_store_reused_ = reused_;
29210d565efSmrg // Rememeber that we don't need to acquire it in future.
29310d565efSmrg dst->elem(tid_).reused = reused_;
294*c7a68eb7Smrg
295*c7a68eb7Smrg // If the resulting clock is cachable, cache it for future release operations.
296*c7a68eb7Smrg // The clock is always cachable if we released to an empty sync object.
297*c7a68eb7Smrg if (cached_idx_ == 0 && dst->Cachable()) {
298*c7a68eb7Smrg // Grab a reference to the ClockBlock.
299*c7a68eb7Smrg atomic_uint32_t *ref = ref_ptr(dst->tab_);
300*c7a68eb7Smrg if (atomic_load(ref, memory_order_acquire) == 1)
301*c7a68eb7Smrg atomic_store_relaxed(ref, 2);
302*c7a68eb7Smrg else
303*c7a68eb7Smrg atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
304*c7a68eb7Smrg cached_idx_ = dst->tab_idx_;
305*c7a68eb7Smrg cached_size_ = dst->size_;
306*c7a68eb7Smrg cached_blocks_ = dst->blocks_;
307*c7a68eb7Smrg }
30810d565efSmrg }
30910d565efSmrg
acq_rel(ClockCache * c,SyncClock * dst)31010d565efSmrg void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
31110d565efSmrg CPP_STAT_INC(StatClockAcquireRelease);
31210d565efSmrg acquire(c, dst);
31310d565efSmrg ReleaseStore(c, dst);
31410d565efSmrg }
31510d565efSmrg
31610d565efSmrg // Updates only single element related to the current thread in dst->clk_.
UpdateCurrentThread(ClockCache * c,SyncClock * dst) const317*c7a68eb7Smrg void ThreadClock::UpdateCurrentThread(ClockCache *c, SyncClock *dst) const {
31810d565efSmrg // Update the threads time, but preserve 'acquired' flag.
31910d565efSmrg for (unsigned i = 0; i < kDirtyTids; i++) {
320*c7a68eb7Smrg SyncClock::Dirty *dirty = &dst->dirty_[i];
321*c7a68eb7Smrg const unsigned tid = dirty->tid;
322*c7a68eb7Smrg if (tid == tid_ || tid == kInvalidTid) {
323*c7a68eb7Smrg CPP_STAT_INC(StatClockReleaseFast);
324*c7a68eb7Smrg dirty->tid = tid_;
325*c7a68eb7Smrg dirty->epoch = clk_[tid_];
32610d565efSmrg return;
32710d565efSmrg }
32810d565efSmrg }
32910d565efSmrg // Reset all 'acquired' flags, O(N).
330*c7a68eb7Smrg // We are going to touch dst elements, so we need to unshare it.
331*c7a68eb7Smrg dst->Unshare(c);
33210d565efSmrg CPP_STAT_INC(StatClockReleaseSlow);
333*c7a68eb7Smrg dst->elem(tid_).epoch = clk_[tid_];
33410d565efSmrg for (uptr i = 0; i < dst->size_; i++)
33510d565efSmrg dst->elem(i).reused = 0;
336*c7a68eb7Smrg dst->FlushDirty();
33710d565efSmrg }
33810d565efSmrg
339*c7a68eb7Smrg // Checks whether the current thread has already acquired src.
IsAlreadyAcquired(const SyncClock * src) const34010d565efSmrg bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
34110d565efSmrg if (src->elem(tid_).reused != reused_)
34210d565efSmrg return false;
34310d565efSmrg for (unsigned i = 0; i < kDirtyTids; i++) {
344*c7a68eb7Smrg SyncClock::Dirty dirty = src->dirty_[i];
345*c7a68eb7Smrg if (dirty.tid != kInvalidTid) {
346*c7a68eb7Smrg if (clk_[dirty.tid] < dirty.epoch)
34710d565efSmrg return false;
34810d565efSmrg }
34910d565efSmrg }
35010d565efSmrg return true;
35110d565efSmrg }
35210d565efSmrg
35310d565efSmrg // Sets a single element in the vector clock.
35410d565efSmrg // This function is called only from weird places like AcquireGlobal.
set(ClockCache * c,unsigned tid,u64 v)355*c7a68eb7Smrg void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) {
35610d565efSmrg DCHECK_LT(tid, kMaxTid);
357*c7a68eb7Smrg DCHECK_GE(v, clk_[tid]);
358*c7a68eb7Smrg clk_[tid] = v;
35910d565efSmrg if (nclk_ <= tid)
36010d565efSmrg nclk_ = tid + 1;
361*c7a68eb7Smrg last_acquire_ = clk_[tid_];
362*c7a68eb7Smrg ResetCached(c);
36310d565efSmrg }
36410d565efSmrg
DebugDump(int (* printf)(const char * s,...))36510d565efSmrg void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
36610d565efSmrg printf("clock=[");
36710d565efSmrg for (uptr i = 0; i < nclk_; i++)
368*c7a68eb7Smrg printf("%s%llu", i == 0 ? "" : ",", clk_[i]);
369*c7a68eb7Smrg printf("] tid=%u/%u last_acq=%llu", tid_, reused_, last_acquire_);
37010d565efSmrg }
37110d565efSmrg
SyncClock()372*c7a68eb7Smrg SyncClock::SyncClock() {
373*c7a68eb7Smrg ResetImpl();
37410d565efSmrg }
37510d565efSmrg
~SyncClock()37610d565efSmrg SyncClock::~SyncClock() {
37710d565efSmrg // Reset must be called before dtor.
37810d565efSmrg CHECK_EQ(size_, 0);
379*c7a68eb7Smrg CHECK_EQ(blocks_, 0);
38010d565efSmrg CHECK_EQ(tab_, 0);
38110d565efSmrg CHECK_EQ(tab_idx_, 0);
38210d565efSmrg }
38310d565efSmrg
Reset(ClockCache * c)38410d565efSmrg void SyncClock::Reset(ClockCache *c) {
385*c7a68eb7Smrg if (size_)
386*c7a68eb7Smrg UnrefClockBlock(c, tab_idx_, blocks_);
387*c7a68eb7Smrg ResetImpl();
38810d565efSmrg }
389*c7a68eb7Smrg
ResetImpl()390*c7a68eb7Smrg void SyncClock::ResetImpl() {
39110d565efSmrg tab_ = 0;
39210d565efSmrg tab_idx_ = 0;
39310d565efSmrg size_ = 0;
394*c7a68eb7Smrg blocks_ = 0;
39510d565efSmrg release_store_tid_ = kInvalidTid;
39610d565efSmrg release_store_reused_ = 0;
39710d565efSmrg for (uptr i = 0; i < kDirtyTids; i++)
398*c7a68eb7Smrg dirty_[i].tid = kInvalidTid;
39910d565efSmrg }
40010d565efSmrg
Resize(ClockCache * c,uptr nclk)401*c7a68eb7Smrg void SyncClock::Resize(ClockCache *c, uptr nclk) {
402*c7a68eb7Smrg CPP_STAT_INC(StatClockReleaseResize);
403*c7a68eb7Smrg Unshare(c);
404*c7a68eb7Smrg if (nclk <= capacity()) {
405*c7a68eb7Smrg // Memory is already allocated, just increase the size.
406*c7a68eb7Smrg size_ = nclk;
407*c7a68eb7Smrg return;
408*c7a68eb7Smrg }
409*c7a68eb7Smrg if (size_ == 0) {
410*c7a68eb7Smrg // Grow from 0 to one-level table.
411*c7a68eb7Smrg CHECK_EQ(size_, 0);
412*c7a68eb7Smrg CHECK_EQ(blocks_, 0);
413*c7a68eb7Smrg CHECK_EQ(tab_, 0);
414*c7a68eb7Smrg CHECK_EQ(tab_idx_, 0);
415*c7a68eb7Smrg tab_idx_ = ctx->clock_alloc.Alloc(c);
416*c7a68eb7Smrg tab_ = ctx->clock_alloc.Map(tab_idx_);
417*c7a68eb7Smrg internal_memset(tab_, 0, sizeof(*tab_));
418*c7a68eb7Smrg atomic_store_relaxed(ref_ptr(tab_), 1);
419*c7a68eb7Smrg size_ = 1;
420*c7a68eb7Smrg } else if (size_ > blocks_ * ClockBlock::kClockCount) {
421*c7a68eb7Smrg u32 idx = ctx->clock_alloc.Alloc(c);
422*c7a68eb7Smrg ClockBlock *new_cb = ctx->clock_alloc.Map(idx);
423*c7a68eb7Smrg uptr top = size_ - blocks_ * ClockBlock::kClockCount;
424*c7a68eb7Smrg CHECK_LT(top, ClockBlock::kClockCount);
425*c7a68eb7Smrg const uptr move = top * sizeof(tab_->clock[0]);
426*c7a68eb7Smrg internal_memcpy(&new_cb->clock[0], tab_->clock, move);
427*c7a68eb7Smrg internal_memset(&new_cb->clock[top], 0, sizeof(*new_cb) - move);
428*c7a68eb7Smrg internal_memset(tab_->clock, 0, move);
429*c7a68eb7Smrg append_block(idx);
430*c7a68eb7Smrg }
431*c7a68eb7Smrg // At this point we have first level table allocated and all clock elements
432*c7a68eb7Smrg // are evacuated from it to a second level block.
433*c7a68eb7Smrg // Add second level tables as necessary.
434*c7a68eb7Smrg while (nclk > capacity()) {
435*c7a68eb7Smrg u32 idx = ctx->clock_alloc.Alloc(c);
43610d565efSmrg ClockBlock *cb = ctx->clock_alloc.Map(idx);
437*c7a68eb7Smrg internal_memset(cb, 0, sizeof(*cb));
438*c7a68eb7Smrg append_block(idx);
439*c7a68eb7Smrg }
440*c7a68eb7Smrg size_ = nclk;
441*c7a68eb7Smrg }
442*c7a68eb7Smrg
443*c7a68eb7Smrg // Flushes all dirty elements into the main clock array.
FlushDirty()444*c7a68eb7Smrg void SyncClock::FlushDirty() {
445*c7a68eb7Smrg for (unsigned i = 0; i < kDirtyTids; i++) {
446*c7a68eb7Smrg Dirty *dirty = &dirty_[i];
447*c7a68eb7Smrg if (dirty->tid != kInvalidTid) {
448*c7a68eb7Smrg CHECK_LT(dirty->tid, size_);
449*c7a68eb7Smrg elem(dirty->tid).epoch = dirty->epoch;
450*c7a68eb7Smrg dirty->tid = kInvalidTid;
451*c7a68eb7Smrg }
452*c7a68eb7Smrg }
453*c7a68eb7Smrg }
454*c7a68eb7Smrg
IsShared() const455*c7a68eb7Smrg bool SyncClock::IsShared() const {
456*c7a68eb7Smrg if (size_ == 0)
457*c7a68eb7Smrg return false;
458*c7a68eb7Smrg atomic_uint32_t *ref = ref_ptr(tab_);
459*c7a68eb7Smrg u32 v = atomic_load(ref, memory_order_acquire);
460*c7a68eb7Smrg CHECK_GT(v, 0);
461*c7a68eb7Smrg return v > 1;
462*c7a68eb7Smrg }
463*c7a68eb7Smrg
464*c7a68eb7Smrg // Unshares the current clock if it's shared.
465*c7a68eb7Smrg // Shared clocks are immutable, so they need to be unshared before any updates.
466*c7a68eb7Smrg // Note: this does not apply to dirty entries as they are not shared.
Unshare(ClockCache * c)467*c7a68eb7Smrg void SyncClock::Unshare(ClockCache *c) {
468*c7a68eb7Smrg if (!IsShared())
469*c7a68eb7Smrg return;
470*c7a68eb7Smrg // First, copy current state into old.
471*c7a68eb7Smrg SyncClock old;
472*c7a68eb7Smrg old.tab_ = tab_;
473*c7a68eb7Smrg old.tab_idx_ = tab_idx_;
474*c7a68eb7Smrg old.size_ = size_;
475*c7a68eb7Smrg old.blocks_ = blocks_;
476*c7a68eb7Smrg old.release_store_tid_ = release_store_tid_;
477*c7a68eb7Smrg old.release_store_reused_ = release_store_reused_;
478*c7a68eb7Smrg for (unsigned i = 0; i < kDirtyTids; i++)
479*c7a68eb7Smrg old.dirty_[i] = dirty_[i];
480*c7a68eb7Smrg // Then, clear current object.
481*c7a68eb7Smrg ResetImpl();
482*c7a68eb7Smrg // Allocate brand new clock in the current object.
483*c7a68eb7Smrg Resize(c, old.size_);
484*c7a68eb7Smrg // Now copy state back into this object.
485*c7a68eb7Smrg Iter old_iter(&old);
486*c7a68eb7Smrg for (ClockElem &ce : *this) {
487*c7a68eb7Smrg ce = *old_iter;
488*c7a68eb7Smrg ++old_iter;
489*c7a68eb7Smrg }
490*c7a68eb7Smrg release_store_tid_ = old.release_store_tid_;
491*c7a68eb7Smrg release_store_reused_ = old.release_store_reused_;
492*c7a68eb7Smrg for (unsigned i = 0; i < kDirtyTids; i++)
493*c7a68eb7Smrg dirty_[i] = old.dirty_[i];
494*c7a68eb7Smrg // Drop reference to old and delete if necessary.
495*c7a68eb7Smrg old.Reset(c);
496*c7a68eb7Smrg }
497*c7a68eb7Smrg
498*c7a68eb7Smrg // Can we cache this clock for future release operations?
Cachable() const499*c7a68eb7Smrg ALWAYS_INLINE bool SyncClock::Cachable() const {
500*c7a68eb7Smrg if (size_ == 0)
501*c7a68eb7Smrg return false;
502*c7a68eb7Smrg for (unsigned i = 0; i < kDirtyTids; i++) {
503*c7a68eb7Smrg if (dirty_[i].tid != kInvalidTid)
504*c7a68eb7Smrg return false;
505*c7a68eb7Smrg }
506*c7a68eb7Smrg return atomic_load_relaxed(ref_ptr(tab_)) == 1;
507*c7a68eb7Smrg }
508*c7a68eb7Smrg
509*c7a68eb7Smrg // elem linearizes the two-level structure into linear array.
510*c7a68eb7Smrg // Note: this is used only for one time accesses, vector operations use
511*c7a68eb7Smrg // the iterator as it is much faster.
elem(unsigned tid) const512*c7a68eb7Smrg ALWAYS_INLINE ClockElem &SyncClock::elem(unsigned tid) const {
513*c7a68eb7Smrg DCHECK_LT(tid, size_);
514*c7a68eb7Smrg const uptr block = tid / ClockBlock::kClockCount;
515*c7a68eb7Smrg DCHECK_LE(block, blocks_);
516*c7a68eb7Smrg tid %= ClockBlock::kClockCount;
517*c7a68eb7Smrg if (block == blocks_)
518*c7a68eb7Smrg return tab_->clock[tid];
519*c7a68eb7Smrg u32 idx = get_block(block);
520*c7a68eb7Smrg ClockBlock *cb = ctx->clock_alloc.Map(idx);
521*c7a68eb7Smrg return cb->clock[tid];
522*c7a68eb7Smrg }
523*c7a68eb7Smrg
capacity() const524*c7a68eb7Smrg ALWAYS_INLINE uptr SyncClock::capacity() const {
525*c7a68eb7Smrg if (size_ == 0)
526*c7a68eb7Smrg return 0;
527*c7a68eb7Smrg uptr ratio = sizeof(ClockBlock::clock[0]) / sizeof(ClockBlock::table[0]);
528*c7a68eb7Smrg // How many clock elements we can fit into the first level block.
529*c7a68eb7Smrg // +1 for ref counter.
530*c7a68eb7Smrg uptr top = ClockBlock::kClockCount - RoundUpTo(blocks_ + 1, ratio) / ratio;
531*c7a68eb7Smrg return blocks_ * ClockBlock::kClockCount + top;
532*c7a68eb7Smrg }
533*c7a68eb7Smrg
get_block(uptr bi) const534*c7a68eb7Smrg ALWAYS_INLINE u32 SyncClock::get_block(uptr bi) const {
535*c7a68eb7Smrg DCHECK(size_);
536*c7a68eb7Smrg DCHECK_LT(bi, blocks_);
537*c7a68eb7Smrg return tab_->table[ClockBlock::kBlockIdx - bi];
538*c7a68eb7Smrg }
539*c7a68eb7Smrg
append_block(u32 idx)540*c7a68eb7Smrg ALWAYS_INLINE void SyncClock::append_block(u32 idx) {
541*c7a68eb7Smrg uptr bi = blocks_++;
542*c7a68eb7Smrg CHECK_EQ(get_block(bi), 0);
543*c7a68eb7Smrg tab_->table[ClockBlock::kBlockIdx - bi] = idx;
544*c7a68eb7Smrg }
545*c7a68eb7Smrg
546*c7a68eb7Smrg // Used only by tests.
get(unsigned tid) const547*c7a68eb7Smrg u64 SyncClock::get(unsigned tid) const {
548*c7a68eb7Smrg for (unsigned i = 0; i < kDirtyTids; i++) {
549*c7a68eb7Smrg Dirty dirty = dirty_[i];
550*c7a68eb7Smrg if (dirty.tid == tid)
551*c7a68eb7Smrg return dirty.epoch;
552*c7a68eb7Smrg }
553*c7a68eb7Smrg return elem(tid).epoch;
554*c7a68eb7Smrg }
555*c7a68eb7Smrg
556*c7a68eb7Smrg // Used only by Iter test.
get_clean(unsigned tid) const557*c7a68eb7Smrg u64 SyncClock::get_clean(unsigned tid) const {
558*c7a68eb7Smrg return elem(tid).epoch;
55910d565efSmrg }
56010d565efSmrg
DebugDump(int (* printf)(const char * s,...))56110d565efSmrg void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
56210d565efSmrg printf("clock=[");
56310d565efSmrg for (uptr i = 0; i < size_; i++)
56410d565efSmrg printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
56510d565efSmrg printf("] reused=[");
56610d565efSmrg for (uptr i = 0; i < size_; i++)
56710d565efSmrg printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
568*c7a68eb7Smrg printf("] release_store_tid=%d/%d dirty_tids=%d[%llu]/%d[%llu]",
56910d565efSmrg release_store_tid_, release_store_reused_,
570*c7a68eb7Smrg dirty_[0].tid, dirty_[0].epoch,
571*c7a68eb7Smrg dirty_[1].tid, dirty_[1].epoch);
572*c7a68eb7Smrg }
573*c7a68eb7Smrg
Next()574*c7a68eb7Smrg void SyncClock::Iter::Next() {
575*c7a68eb7Smrg // Finished with the current block, move on to the next one.
576*c7a68eb7Smrg block_++;
577*c7a68eb7Smrg if (block_ < parent_->blocks_) {
578*c7a68eb7Smrg // Iterate over the next second level block.
579*c7a68eb7Smrg u32 idx = parent_->get_block(block_);
580*c7a68eb7Smrg ClockBlock *cb = ctx->clock_alloc.Map(idx);
581*c7a68eb7Smrg pos_ = &cb->clock[0];
582*c7a68eb7Smrg end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
583*c7a68eb7Smrg ClockBlock::kClockCount);
584*c7a68eb7Smrg return;
585*c7a68eb7Smrg }
586*c7a68eb7Smrg if (block_ == parent_->blocks_ &&
587*c7a68eb7Smrg parent_->size_ > parent_->blocks_ * ClockBlock::kClockCount) {
588*c7a68eb7Smrg // Iterate over elements in the first level block.
589*c7a68eb7Smrg pos_ = &parent_->tab_->clock[0];
590*c7a68eb7Smrg end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
591*c7a68eb7Smrg ClockBlock::kClockCount);
592*c7a68eb7Smrg return;
593*c7a68eb7Smrg }
594*c7a68eb7Smrg parent_ = nullptr; // denotes end
59510d565efSmrg }
59610d565efSmrg } // namespace __tsan
597