1*490215a3Smrg //===-- tsan_sync.cc ------------------------------------------------------===//
2*490215a3Smrg //
3*490215a3Smrg // This file is distributed under the University of Illinois Open Source
4*490215a3Smrg // License. See LICENSE.TXT for details.
5*490215a3Smrg //
6*490215a3Smrg //===----------------------------------------------------------------------===//
7*490215a3Smrg //
8*490215a3Smrg // This file is a part of ThreadSanitizer (TSan), a race detector.
9*490215a3Smrg //
10*490215a3Smrg //===----------------------------------------------------------------------===//
11*490215a3Smrg #include "sanitizer_common/sanitizer_placement_new.h"
12*490215a3Smrg #include "tsan_sync.h"
13*490215a3Smrg #include "tsan_rtl.h"
14*490215a3Smrg #include "tsan_mman.h"
15*490215a3Smrg 
16*490215a3Smrg namespace __tsan {
17*490215a3Smrg 
18*490215a3Smrg void DDMutexInit(ThreadState *thr, uptr pc, SyncVar *s);
19*490215a3Smrg 
SyncVar()20*490215a3Smrg SyncVar::SyncVar()
21*490215a3Smrg     : mtx(MutexTypeSyncVar, StatMtxSyncVar) {
22*490215a3Smrg   Reset(0);
23*490215a3Smrg }
24*490215a3Smrg 
Init(ThreadState * thr,uptr pc,uptr addr,u64 uid)25*490215a3Smrg void SyncVar::Init(ThreadState *thr, uptr pc, uptr addr, u64 uid) {
26*490215a3Smrg   this->addr = addr;
27*490215a3Smrg   this->uid = uid;
28*490215a3Smrg   this->next = 0;
29*490215a3Smrg 
30*490215a3Smrg   creation_stack_id = 0;
31*490215a3Smrg   if (!SANITIZER_GO)  // Go does not use them
32*490215a3Smrg     creation_stack_id = CurrentStackId(thr, pc);
33*490215a3Smrg   if (common_flags()->detect_deadlocks)
34*490215a3Smrg     DDMutexInit(thr, pc, this);
35*490215a3Smrg }
36*490215a3Smrg 
Reset(Processor * proc)37*490215a3Smrg void SyncVar::Reset(Processor *proc) {
38*490215a3Smrg   uid = 0;
39*490215a3Smrg   creation_stack_id = 0;
40*490215a3Smrg   owner_tid = kInvalidTid;
41*490215a3Smrg   last_lock = 0;
42*490215a3Smrg   recursion = 0;
43*490215a3Smrg   atomic_store_relaxed(&flags, 0);
44*490215a3Smrg 
45*490215a3Smrg   if (proc == 0) {
46*490215a3Smrg     CHECK_EQ(clock.size(), 0);
47*490215a3Smrg     CHECK_EQ(read_clock.size(), 0);
48*490215a3Smrg   } else {
49*490215a3Smrg     clock.Reset(&proc->clock_cache);
50*490215a3Smrg     read_clock.Reset(&proc->clock_cache);
51*490215a3Smrg   }
52*490215a3Smrg }
53*490215a3Smrg 
MetaMap()54*490215a3Smrg MetaMap::MetaMap()
55*490215a3Smrg     : block_alloc_("heap block allocator")
56*490215a3Smrg     , sync_alloc_("sync allocator") {
57*490215a3Smrg   atomic_store(&uid_gen_, 0, memory_order_relaxed);
58*490215a3Smrg }
59*490215a3Smrg 
AllocBlock(ThreadState * thr,uptr pc,uptr p,uptr sz)60*490215a3Smrg void MetaMap::AllocBlock(ThreadState *thr, uptr pc, uptr p, uptr sz) {
61*490215a3Smrg   u32 idx = block_alloc_.Alloc(&thr->proc()->block_cache);
62*490215a3Smrg   MBlock *b = block_alloc_.Map(idx);
63*490215a3Smrg   b->siz = sz;
64*490215a3Smrg   b->tag = 0;
65*490215a3Smrg   b->tid = thr->tid;
66*490215a3Smrg   b->stk = CurrentStackId(thr, pc);
67*490215a3Smrg   u32 *meta = MemToMeta(p);
68*490215a3Smrg   DCHECK_EQ(*meta, 0);
69*490215a3Smrg   *meta = idx | kFlagBlock;
70*490215a3Smrg }
71*490215a3Smrg 
FreeBlock(Processor * proc,uptr p)72*490215a3Smrg uptr MetaMap::FreeBlock(Processor *proc, uptr p) {
73*490215a3Smrg   MBlock* b = GetBlock(p);
74*490215a3Smrg   if (b == 0)
75*490215a3Smrg     return 0;
76*490215a3Smrg   uptr sz = RoundUpTo(b->siz, kMetaShadowCell);
77*490215a3Smrg   FreeRange(proc, p, sz);
78*490215a3Smrg   return sz;
79*490215a3Smrg }
80*490215a3Smrg 
FreeRange(Processor * proc,uptr p,uptr sz)81*490215a3Smrg bool MetaMap::FreeRange(Processor *proc, uptr p, uptr sz) {
82*490215a3Smrg   bool has_something = false;
83*490215a3Smrg   u32 *meta = MemToMeta(p);
84*490215a3Smrg   u32 *end = MemToMeta(p + sz);
85*490215a3Smrg   if (end == meta)
86*490215a3Smrg     end++;
87*490215a3Smrg   for (; meta < end; meta++) {
88*490215a3Smrg     u32 idx = *meta;
89*490215a3Smrg     if (idx == 0) {
90*490215a3Smrg       // Note: don't write to meta in this case -- the block can be huge.
91*490215a3Smrg       continue;
92*490215a3Smrg     }
93*490215a3Smrg     *meta = 0;
94*490215a3Smrg     has_something = true;
95*490215a3Smrg     while (idx != 0) {
96*490215a3Smrg       if (idx & kFlagBlock) {
97*490215a3Smrg         block_alloc_.Free(&proc->block_cache, idx & ~kFlagMask);
98*490215a3Smrg         break;
99*490215a3Smrg       } else if (idx & kFlagSync) {
100*490215a3Smrg         DCHECK(idx & kFlagSync);
101*490215a3Smrg         SyncVar *s = sync_alloc_.Map(idx & ~kFlagMask);
102*490215a3Smrg         u32 next = s->next;
103*490215a3Smrg         s->Reset(proc);
104*490215a3Smrg         sync_alloc_.Free(&proc->sync_cache, idx & ~kFlagMask);
105*490215a3Smrg         idx = next;
106*490215a3Smrg       } else {
107*490215a3Smrg         CHECK(0);
108*490215a3Smrg       }
109*490215a3Smrg     }
110*490215a3Smrg   }
111*490215a3Smrg   return has_something;
112*490215a3Smrg }
113*490215a3Smrg 
114*490215a3Smrg // ResetRange removes all meta objects from the range.
115*490215a3Smrg // It is called for large mmap-ed regions. The function is best-effort wrt
116*490215a3Smrg // freeing of meta objects, because we don't want to page in the whole range
117*490215a3Smrg // which can be huge. The function probes pages one-by-one until it finds a page
118*490215a3Smrg // without meta objects, at this point it stops freeing meta objects. Because
119*490215a3Smrg // thread stacks grow top-down, we do the same starting from end as well.
ResetRange(Processor * proc,uptr p,uptr sz)120*490215a3Smrg void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz) {
121*490215a3Smrg   if (SANITIZER_GO) {
122*490215a3Smrg     // UnmapOrDie/MmapFixedNoReserve does not work on Windows,
123*490215a3Smrg     // so we do the optimization only for C/C++.
124*490215a3Smrg     FreeRange(proc, p, sz);
125*490215a3Smrg     return;
126*490215a3Smrg   }
127*490215a3Smrg   const uptr kMetaRatio = kMetaShadowCell / kMetaShadowSize;
128*490215a3Smrg   const uptr kPageSize = GetPageSizeCached() * kMetaRatio;
129*490215a3Smrg   if (sz <= 4 * kPageSize) {
130*490215a3Smrg     // If the range is small, just do the normal free procedure.
131*490215a3Smrg     FreeRange(proc, p, sz);
132*490215a3Smrg     return;
133*490215a3Smrg   }
134*490215a3Smrg   // First, round both ends of the range to page size.
135*490215a3Smrg   uptr diff = RoundUp(p, kPageSize) - p;
136*490215a3Smrg   if (diff != 0) {
137*490215a3Smrg     FreeRange(proc, p, diff);
138*490215a3Smrg     p += diff;
139*490215a3Smrg     sz -= diff;
140*490215a3Smrg   }
141*490215a3Smrg   diff = p + sz - RoundDown(p + sz, kPageSize);
142*490215a3Smrg   if (diff != 0) {
143*490215a3Smrg     FreeRange(proc, p + sz - diff, diff);
144*490215a3Smrg     sz -= diff;
145*490215a3Smrg   }
146*490215a3Smrg   // Now we must have a non-empty page-aligned range.
147*490215a3Smrg   CHECK_GT(sz, 0);
148*490215a3Smrg   CHECK_EQ(p, RoundUp(p, kPageSize));
149*490215a3Smrg   CHECK_EQ(sz, RoundUp(sz, kPageSize));
150*490215a3Smrg   const uptr p0 = p;
151*490215a3Smrg   const uptr sz0 = sz;
152*490215a3Smrg   // Probe start of the range.
153*490215a3Smrg   for (uptr checked = 0; sz > 0; checked += kPageSize) {
154*490215a3Smrg     bool has_something = FreeRange(proc, p, kPageSize);
155*490215a3Smrg     p += kPageSize;
156*490215a3Smrg     sz -= kPageSize;
157*490215a3Smrg     if (!has_something && checked > (128 << 10))
158*490215a3Smrg       break;
159*490215a3Smrg   }
160*490215a3Smrg   // Probe end of the range.
161*490215a3Smrg   for (uptr checked = 0; sz > 0; checked += kPageSize) {
162*490215a3Smrg     bool has_something = FreeRange(proc, p + sz - kPageSize, kPageSize);
163*490215a3Smrg     sz -= kPageSize;
164*490215a3Smrg     // Stacks grow down, so sync object are most likely at the end of the region
165*490215a3Smrg     // (if it is a stack). The very end of the stack is TLS and tsan increases
166*490215a3Smrg     // TLS by at least 256K, so check at least 512K.
167*490215a3Smrg     if (!has_something && checked > (512 << 10))
168*490215a3Smrg       break;
169*490215a3Smrg   }
170*490215a3Smrg   // Finally, page out the whole range (including the parts that we've just
171*490215a3Smrg   // freed). Note: we can't simply madvise, because we need to leave a zeroed
172*490215a3Smrg   // range (otherwise __tsan_java_move can crash if it encounters a left-over
173*490215a3Smrg   // meta objects in java heap).
174*490215a3Smrg   uptr metap = (uptr)MemToMeta(p0);
175*490215a3Smrg   uptr metasz = sz0 / kMetaRatio;
176*490215a3Smrg   UnmapOrDie((void*)metap, metasz);
177*490215a3Smrg   if (!MmapFixedNoReserve(metap, metasz))
178*490215a3Smrg     Die();
179*490215a3Smrg }
180*490215a3Smrg 
GetBlock(uptr p)181*490215a3Smrg MBlock* MetaMap::GetBlock(uptr p) {
182*490215a3Smrg   u32 *meta = MemToMeta(p);
183*490215a3Smrg   u32 idx = *meta;
184*490215a3Smrg   for (;;) {
185*490215a3Smrg     if (idx == 0)
186*490215a3Smrg       return 0;
187*490215a3Smrg     if (idx & kFlagBlock)
188*490215a3Smrg       return block_alloc_.Map(idx & ~kFlagMask);
189*490215a3Smrg     DCHECK(idx & kFlagSync);
190*490215a3Smrg     SyncVar * s = sync_alloc_.Map(idx & ~kFlagMask);
191*490215a3Smrg     idx = s->next;
192*490215a3Smrg   }
193*490215a3Smrg }
194*490215a3Smrg 
GetOrCreateAndLock(ThreadState * thr,uptr pc,uptr addr,bool write_lock)195*490215a3Smrg SyncVar* MetaMap::GetOrCreateAndLock(ThreadState *thr, uptr pc,
196*490215a3Smrg                               uptr addr, bool write_lock) {
197*490215a3Smrg   return GetAndLock(thr, pc, addr, write_lock, true);
198*490215a3Smrg }
199*490215a3Smrg 
GetIfExistsAndLock(uptr addr,bool write_lock)200*490215a3Smrg SyncVar* MetaMap::GetIfExistsAndLock(uptr addr, bool write_lock) {
201*490215a3Smrg   return GetAndLock(0, 0, addr, write_lock, false);
202*490215a3Smrg }
203*490215a3Smrg 
GetAndLock(ThreadState * thr,uptr pc,uptr addr,bool write_lock,bool create)204*490215a3Smrg SyncVar* MetaMap::GetAndLock(ThreadState *thr, uptr pc,
205*490215a3Smrg                              uptr addr, bool write_lock, bool create) {
206*490215a3Smrg   u32 *meta = MemToMeta(addr);
207*490215a3Smrg   u32 idx0 = *meta;
208*490215a3Smrg   u32 myidx = 0;
209*490215a3Smrg   SyncVar *mys = 0;
210*490215a3Smrg   for (;;) {
211*490215a3Smrg     u32 idx = idx0;
212*490215a3Smrg     for (;;) {
213*490215a3Smrg       if (idx == 0)
214*490215a3Smrg         break;
215*490215a3Smrg       if (idx & kFlagBlock)
216*490215a3Smrg         break;
217*490215a3Smrg       DCHECK(idx & kFlagSync);
218*490215a3Smrg       SyncVar * s = sync_alloc_.Map(idx & ~kFlagMask);
219*490215a3Smrg       if (s->addr == addr) {
220*490215a3Smrg         if (myidx != 0) {
221*490215a3Smrg           mys->Reset(thr->proc());
222*490215a3Smrg           sync_alloc_.Free(&thr->proc()->sync_cache, myidx);
223*490215a3Smrg         }
224*490215a3Smrg         if (write_lock)
225*490215a3Smrg           s->mtx.Lock();
226*490215a3Smrg         else
227*490215a3Smrg           s->mtx.ReadLock();
228*490215a3Smrg         return s;
229*490215a3Smrg       }
230*490215a3Smrg       idx = s->next;
231*490215a3Smrg     }
232*490215a3Smrg     if (!create)
233*490215a3Smrg       return 0;
234*490215a3Smrg     if (*meta != idx0) {
235*490215a3Smrg       idx0 = *meta;
236*490215a3Smrg       continue;
237*490215a3Smrg     }
238*490215a3Smrg 
239*490215a3Smrg     if (myidx == 0) {
240*490215a3Smrg       const u64 uid = atomic_fetch_add(&uid_gen_, 1, memory_order_relaxed);
241*490215a3Smrg       myidx = sync_alloc_.Alloc(&thr->proc()->sync_cache);
242*490215a3Smrg       mys = sync_alloc_.Map(myidx);
243*490215a3Smrg       mys->Init(thr, pc, addr, uid);
244*490215a3Smrg     }
245*490215a3Smrg     mys->next = idx0;
246*490215a3Smrg     if (atomic_compare_exchange_strong((atomic_uint32_t*)meta, &idx0,
247*490215a3Smrg         myidx | kFlagSync, memory_order_release)) {
248*490215a3Smrg       if (write_lock)
249*490215a3Smrg         mys->mtx.Lock();
250*490215a3Smrg       else
251*490215a3Smrg         mys->mtx.ReadLock();
252*490215a3Smrg       return mys;
253*490215a3Smrg     }
254*490215a3Smrg   }
255*490215a3Smrg }
256*490215a3Smrg 
MoveMemory(uptr src,uptr dst,uptr sz)257*490215a3Smrg void MetaMap::MoveMemory(uptr src, uptr dst, uptr sz) {
258*490215a3Smrg   // src and dst can overlap,
259*490215a3Smrg   // there are no concurrent accesses to the regions (e.g. stop-the-world).
260*490215a3Smrg   CHECK_NE(src, dst);
261*490215a3Smrg   CHECK_NE(sz, 0);
262*490215a3Smrg   uptr diff = dst - src;
263*490215a3Smrg   u32 *src_meta = MemToMeta(src);
264*490215a3Smrg   u32 *dst_meta = MemToMeta(dst);
265*490215a3Smrg   u32 *src_meta_end = MemToMeta(src + sz);
266*490215a3Smrg   uptr inc = 1;
267*490215a3Smrg   if (dst > src) {
268*490215a3Smrg     src_meta = MemToMeta(src + sz) - 1;
269*490215a3Smrg     dst_meta = MemToMeta(dst + sz) - 1;
270*490215a3Smrg     src_meta_end = MemToMeta(src) - 1;
271*490215a3Smrg     inc = -1;
272*490215a3Smrg   }
273*490215a3Smrg   for (; src_meta != src_meta_end; src_meta += inc, dst_meta += inc) {
274*490215a3Smrg     CHECK_EQ(*dst_meta, 0);
275*490215a3Smrg     u32 idx = *src_meta;
276*490215a3Smrg     *src_meta = 0;
277*490215a3Smrg     *dst_meta = idx;
278*490215a3Smrg     // Patch the addresses in sync objects.
279*490215a3Smrg     while (idx != 0) {
280*490215a3Smrg       if (idx & kFlagBlock)
281*490215a3Smrg         break;
282*490215a3Smrg       CHECK(idx & kFlagSync);
283*490215a3Smrg       SyncVar *s = sync_alloc_.Map(idx & ~kFlagMask);
284*490215a3Smrg       s->addr += diff;
285*490215a3Smrg       idx = s->next;
286*490215a3Smrg     }
287*490215a3Smrg   }
288*490215a3Smrg }
289*490215a3Smrg 
OnProcIdle(Processor * proc)290*490215a3Smrg void MetaMap::OnProcIdle(Processor *proc) {
291*490215a3Smrg   block_alloc_.FlushCache(&proc->block_cache);
292*490215a3Smrg   sync_alloc_.FlushCache(&proc->sync_cache);
293*490215a3Smrg }
294*490215a3Smrg 
295*490215a3Smrg }  // namespace __tsan
296