1 /*-
2 * Copyright (c) 2014-2018 MongoDB, Inc.
3 * Copyright (c) 2008-2014 WiredTiger, Inc.
4 * All rights reserved.
5 *
6 * See the file LICENSE for redistribution information.
7 */
8
9 #include "wt_internal.h"
10
11 #ifdef HAVE_DIAGNOSTIC
12 static void __hazard_dump(WT_SESSION_IMPL *);
13 #endif
14
15 /*
16 * hazard_grow --
17 * Grow a hazard pointer array.
18 */
19 static int
hazard_grow(WT_SESSION_IMPL * session)20 hazard_grow(WT_SESSION_IMPL *session)
21 {
22 WT_HAZARD *nhazard;
23 size_t size;
24 uint64_t hazard_gen;
25 void *ohazard;
26
27 /*
28 * Allocate a new, larger hazard pointer array and copy the contents of
29 * the original into place.
30 */
31 size = session->hazard_size;
32 WT_RET(__wt_calloc_def(session, size * 2, &nhazard));
33 memcpy(nhazard, session->hazard, size * sizeof(WT_HAZARD));
34
35 /*
36 * Swap the new hazard pointer array into place after initialization
37 * is complete (initialization must complete before eviction can see
38 * the new hazard pointer array), then schedule the original to be
39 * freed.
40 */
41 ohazard = session->hazard;
42 WT_PUBLISH(session->hazard, nhazard);
43
44 /*
45 * Increase the size of the session's pointer array after swapping it
46 * into place (the session's reference must be updated before eviction
47 * can see the new size).
48 */
49 WT_PUBLISH(session->hazard_size, (uint32_t)(size * 2));
50
51 /*
52 * Threads using the hazard pointer array from now on will use the new
53 * one. Increment the hazard pointer generation number, and schedule a
54 * future free of the old memory. Ignore any failure, leak the memory.
55 */
56 hazard_gen = __wt_gen_next(session, WT_GEN_HAZARD);
57 WT_IGNORE_RET(
58 __wt_stash_add(session, WT_GEN_HAZARD, hazard_gen, ohazard, 0));
59
60 return (0);
61 }
62
63 /*
64 * __wt_hazard_set --
65 * Set a hazard pointer.
66 */
67 int
__wt_hazard_set(WT_SESSION_IMPL * session,WT_REF * ref,bool * busyp,const char * func,int line)68 __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp
69 #ifdef HAVE_DIAGNOSTIC
70 , const char *func, int line
71 #endif
72 )
73 {
74 WT_HAZARD *hp;
75 uint32_t current_state;
76
77 *busyp = false;
78
79 /* If a file can never be evicted, hazard pointers aren't required. */
80 if (F_ISSET(S2BT(session), WT_BTREE_IN_MEMORY))
81 return (0);
82
83 /*
84 * If there isn't a valid page, we're done. This read can race with
85 * eviction and splits, we re-check it after a barrier to make sure
86 * we have a valid reference.
87 */
88 current_state = ref->state;
89 if (current_state != WT_REF_LIMBO && current_state != WT_REF_MEM) {
90 *busyp = true;
91 return (0);
92 }
93
94 /* If we have filled the current hazard pointer array, grow it. */
95 if (session->nhazard >= session->hazard_size) {
96 WT_ASSERT(session,
97 session->nhazard == session->hazard_size &&
98 session->hazard_inuse == session->hazard_size);
99 WT_RET(hazard_grow(session));
100 }
101
102 /*
103 * If there are no available hazard pointer slots, make another one
104 * visible.
105 */
106 if (session->nhazard >= session->hazard_inuse) {
107 WT_ASSERT(session,
108 session->nhazard == session->hazard_inuse &&
109 session->hazard_inuse < session->hazard_size);
110 hp = &session->hazard[session->hazard_inuse++];
111 } else {
112 WT_ASSERT(session,
113 session->nhazard < session->hazard_inuse &&
114 session->hazard_inuse <= session->hazard_size);
115
116 /*
117 * There must be an empty slot in the array, find it. Skip most
118 * of the active slots by starting after the active count slot;
119 * there may be a free slot before there, but checking is
120 * expensive. If we reach the end of the array, continue the
121 * search from the beginning of the array.
122 */
123 for (hp = session->hazard + session->nhazard;; ++hp) {
124 if (hp >= session->hazard + session->hazard_inuse)
125 hp = session->hazard;
126 if (hp->ref == NULL)
127 break;
128 }
129 }
130
131 WT_ASSERT(session, hp->ref == NULL);
132
133 /*
134 * Do the dance:
135 *
136 * The memory location which makes a page "real" is the WT_REF's state
137 * of WT_REF_LIMBO or WT_REF_MEM, which can be set to WT_REF_LOCKED
138 * at any time by the page eviction server.
139 *
140 * Add the WT_REF reference to the session's hazard list and flush the
141 * write, then see if the page's state is still valid. If so, we can
142 * use the page because the page eviction server will see our hazard
143 * pointer before it discards the page (the eviction server sets the
144 * state to WT_REF_LOCKED, then flushes memory and checks the hazard
145 * pointers).
146 */
147 hp->ref = ref;
148 #ifdef HAVE_DIAGNOSTIC
149 hp->func = func;
150 hp->line = line;
151 #endif
152 /* Publish the hazard pointer before reading page's state. */
153 WT_FULL_BARRIER();
154
155 /*
156 * Check if the page state is still valid, where valid means a
157 * state of WT_REF_LIMBO or WT_REF_MEM.
158 */
159 current_state = ref->state;
160 if (current_state == WT_REF_LIMBO || current_state == WT_REF_MEM) {
161 ++session->nhazard;
162
163 /*
164 * Callers require a barrier here so operations holding
165 * the hazard pointer see consistent data.
166 */
167 WT_READ_BARRIER();
168 return (0);
169 }
170
171 /*
172 * The page isn't available, it's being considered for eviction
173 * (or being evicted, for all we know). If the eviction server
174 * sees our hazard pointer before evicting the page, it will
175 * return the page to use, no harm done, if it doesn't, it will
176 * go ahead and complete the eviction.
177 *
178 * We don't bother publishing this update: the worst case is we
179 * prevent some random page from being evicted.
180 */
181 hp->ref = NULL;
182 *busyp = true;
183 return (0);
184 }
185
186 /*
187 * __wt_hazard_clear --
188 * Clear a hazard pointer.
189 */
190 int
__wt_hazard_clear(WT_SESSION_IMPL * session,WT_REF * ref)191 __wt_hazard_clear(WT_SESSION_IMPL *session, WT_REF *ref)
192 {
193 WT_HAZARD *hp;
194
195 /* If a file can never be evicted, hazard pointers aren't required. */
196 if (F_ISSET(S2BT(session), WT_BTREE_IN_MEMORY))
197 return (0);
198
199 /*
200 * Clear the caller's hazard pointer.
201 * The common pattern is LIFO, so do a reverse search.
202 */
203 for (hp = session->hazard + session->hazard_inuse - 1;
204 hp >= session->hazard;
205 --hp)
206 if (hp->ref == ref) {
207 /*
208 * We don't publish the hazard pointer clear in the
209 * general case. It's not required for correctness;
210 * it gives an eviction thread faster access to the
211 * page were the page selected for eviction, but the
212 * generation number was just set, it's unlikely the
213 * page will be selected for eviction.
214 */
215 hp->ref = NULL;
216
217 /*
218 * If this was the last hazard pointer in the session,
219 * reset the size so that checks can skip this session.
220 *
221 * A write-barrier() is necessary before the change to
222 * the in-use value, the number of active references
223 * can never be less than the number of in-use slots.
224 */
225 if (--session->nhazard == 0)
226 WT_PUBLISH(session->hazard_inuse, 0);
227 return (0);
228 }
229
230 /*
231 * A serious error, we should always find the hazard pointer. Panic,
232 * because using a page we didn't have pinned down implies corruption.
233 */
234 WT_PANIC_RET(session, EINVAL,
235 "session %p: clear hazard pointer: %p: not found",
236 (void *)session, (void *)ref);
237 }
238
239 /*
240 * __wt_hazard_close --
241 * Verify that no hazard pointers are set.
242 */
243 void
__wt_hazard_close(WT_SESSION_IMPL * session)244 __wt_hazard_close(WT_SESSION_IMPL *session)
245 {
246 WT_HAZARD *hp;
247 bool found;
248
249 /*
250 * Check for a set hazard pointer and complain if we find one. We could
251 * just check the session's hazard pointer count, but this is a useful
252 * diagnostic.
253 */
254 for (found = false, hp = session->hazard;
255 hp < session->hazard + session->hazard_inuse; ++hp)
256 if (hp->ref != NULL) {
257 found = true;
258 break;
259 }
260 if (session->nhazard == 0 && !found)
261 return;
262
263 __wt_errx(session,
264 "session %p: close hazard pointer table: table not empty",
265 (void *)session);
266
267 #ifdef HAVE_DIAGNOSTIC
268 __hazard_dump(session);
269 #endif
270
271 /*
272 * Clear any hazard pointers because it's not a correctness problem
273 * (any hazard pointer we find can't be real because the session is
274 * being closed when we're called). We do this work because session
275 * close isn't that common that it's an expensive check, and we don't
276 * want to let a hazard pointer lie around, keeping a page from being
277 * evicted.
278 *
279 * We don't panic: this shouldn't be a correctness issue (at least, I
280 * can't think of a reason it would be).
281 */
282 for (hp = session->hazard;
283 hp < session->hazard + session->hazard_inuse; ++hp)
284 if (hp->ref != NULL) {
285 hp->ref = NULL;
286 --session->nhazard;
287 }
288
289 if (session->nhazard != 0)
290 __wt_errx(session,
291 "session %p: close hazard pointer table: count didn't "
292 "match entries",
293 (void *)session);
294 }
295
296 /*
297 * hazard_get_reference --
298 * Return a consistent reference to a hazard pointer array.
299 */
300 static inline void
hazard_get_reference(WT_SESSION_IMPL * session,WT_HAZARD ** hazardp,uint32_t * hazard_inusep)301 hazard_get_reference(
302 WT_SESSION_IMPL *session, WT_HAZARD **hazardp, uint32_t *hazard_inusep)
303 {
304 /*
305 * Hazard pointer arrays can be swapped out from under us if they grow.
306 * First, read the current in-use value. The read must precede the read
307 * of the hazard pointer itself (so the in-use value is pessimistic
308 * should the hazard array grow), and additionally ensure we only read
309 * the in-use value once. Then, read the hazard pointer, also ensuring
310 * we only read it once.
311 *
312 * Use a barrier instead of marking the fields volatile because we don't
313 * want to slow down the rest of the hazard pointer functions that don't
314 * need special treatment.
315 */
316 WT_ORDERED_READ(*hazard_inusep, session->hazard_inuse);
317 WT_ORDERED_READ(*hazardp, session->hazard);
318 }
319
320 /*
321 * __wt_hazard_check --
322 * Return if there's a hazard pointer to the page in the system.
323 */
324 WT_HAZARD *
__wt_hazard_check(WT_SESSION_IMPL * session,WT_REF * ref)325 __wt_hazard_check(WT_SESSION_IMPL *session, WT_REF *ref)
326 {
327 WT_CONNECTION_IMPL *conn;
328 WT_HAZARD *hp;
329 WT_SESSION_IMPL *s;
330 uint32_t i, j, hazard_inuse, max, session_cnt, walk_cnt;
331
332 /* If a file can never be evicted, hazard pointers aren't required. */
333 if (F_ISSET(S2BT(session), WT_BTREE_IN_MEMORY))
334 return (NULL);
335
336 conn = S2C(session);
337
338 WT_STAT_CONN_INCR(session, cache_hazard_checks);
339
340 /*
341 * Hazard pointer arrays might grow and be freed underneath us; enter
342 * the current hazard resource generation for the duration of the walk
343 * to ensure that doesn't happen.
344 */
345 __wt_session_gen_enter(session, WT_GEN_HAZARD);
346
347 /*
348 * No lock is required because the session array is fixed size, but it
349 * may contain inactive entries. We must review any active session
350 * that might contain a hazard pointer, so insert a read barrier after
351 * reading the active session count. That way, no matter what sessions
352 * come or go, we'll check the slots for all of the sessions that could
353 * have been active when we started our check.
354 */
355 WT_ORDERED_READ(session_cnt, conn->session_cnt);
356 for (s = conn->sessions,
357 i = j = max = walk_cnt = 0; i < session_cnt; ++s, ++i) {
358 if (!s->active)
359 continue;
360
361 hazard_get_reference(s, &hp, &hazard_inuse);
362
363 if (hazard_inuse > max) {
364 max = hazard_inuse;
365 WT_STAT_CONN_SET(session, cache_hazard_max, max);
366 }
367
368 for (j = 0; j < hazard_inuse; ++hp, ++j) {
369 ++walk_cnt;
370 if (hp->ref == ref) {
371 WT_STAT_CONN_INCRV(session,
372 cache_hazard_walks, walk_cnt);
373 goto done;
374 }
375 }
376 }
377 WT_STAT_CONN_INCRV(session, cache_hazard_walks, walk_cnt);
378 hp = NULL;
379
380 done: /* Leave the current resource generation. */
381 __wt_session_gen_leave(session, WT_GEN_HAZARD);
382
383 return (hp);
384 }
385
386 /*
387 * __wt_hazard_count --
388 * Count how many hazard pointers this session has on the given page.
389 */
390 u_int
__wt_hazard_count(WT_SESSION_IMPL * session,WT_REF * ref)391 __wt_hazard_count(WT_SESSION_IMPL *session, WT_REF *ref)
392 {
393 WT_HAZARD *hp;
394 uint32_t i, hazard_inuse;
395 u_int count;
396
397 hazard_get_reference(session, &hp, &hazard_inuse);
398
399 for (count = 0, i = 0; i < hazard_inuse; ++hp, ++i)
400 if (hp->ref == ref)
401 ++count;
402
403 return (count);
404 }
405
406 #ifdef HAVE_DIAGNOSTIC
407 /*
408 * __wt_hazard_check_assert --
409 * Assert there's no hazard pointer to the page.
410 */
411 bool
__wt_hazard_check_assert(WT_SESSION_IMPL * session,void * ref,bool waitfor)412 __wt_hazard_check_assert(WT_SESSION_IMPL *session, void *ref, bool waitfor)
413 {
414 WT_HAZARD *hp;
415 int i;
416
417 for (i = 0;;) {
418 if ((hp = __wt_hazard_check(session, ref)) == NULL)
419 return (true);
420 if (!waitfor || ++i > 100)
421 break;
422 __wt_sleep(0, 10000);
423 }
424 __wt_errx(session,
425 "hazard pointer reference to discarded object: (%p: %s, line %d)",
426 (void *)hp->ref, hp->func, hp->line);
427 return (false);
428 }
429
430 /*
431 * __hazard_dump --
432 * Display the list of hazard pointers.
433 */
434 static void
__hazard_dump(WT_SESSION_IMPL * session)435 __hazard_dump(WT_SESSION_IMPL *session)
436 {
437 WT_HAZARD *hp;
438
439 for (hp = session->hazard;
440 hp < session->hazard + session->hazard_inuse; ++hp)
441 if (hp->ref != NULL)
442 __wt_errx(session,
443 "session %p: hazard pointer %p: %s, line %d",
444 (void *)session,
445 (void *)hp->ref, hp->func, hp->line);
446 }
447 #endif
448