1 /*-
2  * Copyright (c) 2014-2018 MongoDB, Inc.
3  * Copyright (c) 2008-2014 WiredTiger, Inc.
4  *	All rights reserved.
5  *
6  * See the file LICENSE for redistribution information.
7  */
8 
9 #include "wt_internal.h"
10 
11 #ifdef HAVE_DIAGNOSTIC
12 static void __hazard_dump(WT_SESSION_IMPL *);
13 #endif
14 
15 /*
16  * hazard_grow --
17  *	Grow a hazard pointer array.
18  */
19 static int
hazard_grow(WT_SESSION_IMPL * session)20 hazard_grow(WT_SESSION_IMPL *session)
21 {
22 	WT_HAZARD *nhazard;
23 	size_t size;
24 	uint64_t hazard_gen;
25 	void *ohazard;
26 
27 	/*
28 	 * Allocate a new, larger hazard pointer array and copy the contents of
29 	 * the original into place.
30 	 */
31 	size = session->hazard_size;
32 	WT_RET(__wt_calloc_def(session, size * 2, &nhazard));
33 	memcpy(nhazard, session->hazard, size * sizeof(WT_HAZARD));
34 
35 	/*
36 	 * Swap the new hazard pointer array into place after initialization
37 	 * is complete (initialization must complete before eviction can see
38 	 * the new hazard pointer array), then schedule the original to be
39 	 * freed.
40 	 */
41 	ohazard = session->hazard;
42 	WT_PUBLISH(session->hazard, nhazard);
43 
44 	/*
45 	 * Increase the size of the session's pointer array after swapping it
46 	 * into place (the session's reference must be updated before eviction
47 	 * can see the new size).
48 	 */
49 	WT_PUBLISH(session->hazard_size, (uint32_t)(size * 2));
50 
51 	/*
52 	 * Threads using the hazard pointer array from now on will use the new
53 	 * one. Increment the hazard pointer generation number, and schedule a
54 	 * future free of the old memory. Ignore any failure, leak the memory.
55 	 */
56 	hazard_gen = __wt_gen_next(session, WT_GEN_HAZARD);
57 	WT_IGNORE_RET(
58 	    __wt_stash_add(session, WT_GEN_HAZARD, hazard_gen, ohazard, 0));
59 
60 	return (0);
61 }
62 
63 /*
64  * __wt_hazard_set --
65  *	Set a hazard pointer.
66  */
67 int
__wt_hazard_set(WT_SESSION_IMPL * session,WT_REF * ref,bool * busyp,const char * func,int line)68 __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp
69 #ifdef HAVE_DIAGNOSTIC
70     , const char *func, int line
71 #endif
72     )
73 {
74 	WT_HAZARD *hp;
75 	uint32_t current_state;
76 
77 	*busyp = false;
78 
79 	/* If a file can never be evicted, hazard pointers aren't required. */
80 	if (F_ISSET(S2BT(session), WT_BTREE_IN_MEMORY))
81 		return (0);
82 
83 	/*
84 	 * If there isn't a valid page, we're done. This read can race with
85 	 * eviction and splits, we re-check it after a barrier to make sure
86 	 * we have a valid reference.
87 	 */
88 	current_state = ref->state;
89 	if (current_state != WT_REF_LIMBO && current_state != WT_REF_MEM) {
90 		*busyp = true;
91 		return (0);
92 	}
93 
94 	/* If we have filled the current hazard pointer array, grow it. */
95 	if (session->nhazard >= session->hazard_size) {
96 		WT_ASSERT(session,
97 		    session->nhazard == session->hazard_size &&
98 		    session->hazard_inuse == session->hazard_size);
99 		WT_RET(hazard_grow(session));
100 	}
101 
102 	/*
103 	 * If there are no available hazard pointer slots, make another one
104 	 * visible.
105 	 */
106 	if (session->nhazard >= session->hazard_inuse) {
107 		WT_ASSERT(session,
108 		    session->nhazard == session->hazard_inuse &&
109 		    session->hazard_inuse < session->hazard_size);
110 		hp = &session->hazard[session->hazard_inuse++];
111 	} else {
112 		WT_ASSERT(session,
113 		    session->nhazard < session->hazard_inuse &&
114 		    session->hazard_inuse <= session->hazard_size);
115 
116 		/*
117 		 * There must be an empty slot in the array, find it. Skip most
118 		 * of the active slots by starting after the active count slot;
119 		 * there may be a free slot before there, but checking is
120 		 * expensive. If we reach the end of the array, continue the
121 		 * search from the beginning of the array.
122 		 */
123 		for (hp = session->hazard + session->nhazard;; ++hp) {
124 			if (hp >= session->hazard + session->hazard_inuse)
125 				hp = session->hazard;
126 			if (hp->ref == NULL)
127 				break;
128 		}
129 	}
130 
131 	WT_ASSERT(session, hp->ref == NULL);
132 
133 	/*
134 	 * Do the dance:
135 	 *
136 	 * The memory location which makes a page "real" is the WT_REF's state
137 	 * of WT_REF_LIMBO or WT_REF_MEM, which can be set to WT_REF_LOCKED
138 	 * at any time by the page eviction server.
139 	 *
140 	 * Add the WT_REF reference to the session's hazard list and flush the
141 	 * write, then see if the page's state is still valid.  If so, we can
142 	 * use the page because the page eviction server will see our hazard
143 	 * pointer before it discards the page (the eviction server sets the
144 	 * state to WT_REF_LOCKED, then flushes memory and checks the hazard
145 	 * pointers).
146 	 */
147 	hp->ref = ref;
148 #ifdef HAVE_DIAGNOSTIC
149 	hp->func = func;
150 	hp->line = line;
151 #endif
152 	/* Publish the hazard pointer before reading page's state. */
153 	WT_FULL_BARRIER();
154 
155 	/*
156 	 * Check if the page state is still valid, where valid means a
157 	 * state of WT_REF_LIMBO or WT_REF_MEM.
158 	 */
159 	current_state = ref->state;
160 	if (current_state == WT_REF_LIMBO || current_state == WT_REF_MEM) {
161 		++session->nhazard;
162 
163 		/*
164 		 * Callers require a barrier here so operations holding
165 		 * the hazard pointer see consistent data.
166 		 */
167 		WT_READ_BARRIER();
168 		return (0);
169 	}
170 
171 	/*
172 	 * The page isn't available, it's being considered for eviction
173 	 * (or being evicted, for all we know).  If the eviction server
174 	 * sees our hazard pointer before evicting the page, it will
175 	 * return the page to use, no harm done, if it doesn't, it will
176 	 * go ahead and complete the eviction.
177 	 *
178 	 * We don't bother publishing this update: the worst case is we
179 	 * prevent some random page from being evicted.
180 	 */
181 	hp->ref = NULL;
182 	*busyp = true;
183 	return (0);
184 }
185 
186 /*
187  * __wt_hazard_clear --
188  *	Clear a hazard pointer.
189  */
190 int
__wt_hazard_clear(WT_SESSION_IMPL * session,WT_REF * ref)191 __wt_hazard_clear(WT_SESSION_IMPL *session, WT_REF *ref)
192 {
193 	WT_HAZARD *hp;
194 
195 	/* If a file can never be evicted, hazard pointers aren't required. */
196 	if (F_ISSET(S2BT(session), WT_BTREE_IN_MEMORY))
197 		return (0);
198 
199 	/*
200 	 * Clear the caller's hazard pointer.
201 	 * The common pattern is LIFO, so do a reverse search.
202 	 */
203 	for (hp = session->hazard + session->hazard_inuse - 1;
204 	    hp >= session->hazard;
205 	    --hp)
206 		if (hp->ref == ref) {
207 			/*
208 			 * We don't publish the hazard pointer clear in the
209 			 * general case.  It's not required for correctness;
210 			 * it gives an eviction thread faster access to the
211 			 * page were the page selected for eviction, but the
212 			 * generation number was just set, it's unlikely the
213 			 * page will be selected for eviction.
214 			 */
215 			hp->ref = NULL;
216 
217 			/*
218 			 * If this was the last hazard pointer in the session,
219 			 * reset the size so that checks can skip this session.
220 			 *
221 			 * A write-barrier() is necessary before the change to
222 			 * the in-use value, the number of active references
223 			 * can never be less than the number of in-use slots.
224 			 */
225 			if (--session->nhazard == 0)
226 				WT_PUBLISH(session->hazard_inuse, 0);
227 			return (0);
228 		}
229 
230 	/*
231 	 * A serious error, we should always find the hazard pointer.  Panic,
232 	 * because using a page we didn't have pinned down implies corruption.
233 	 */
234 	WT_PANIC_RET(session, EINVAL,
235 	    "session %p: clear hazard pointer: %p: not found",
236 	    (void *)session, (void *)ref);
237 }
238 
239 /*
240  * __wt_hazard_close --
241  *	Verify that no hazard pointers are set.
242  */
243 void
__wt_hazard_close(WT_SESSION_IMPL * session)244 __wt_hazard_close(WT_SESSION_IMPL *session)
245 {
246 	WT_HAZARD *hp;
247 	bool found;
248 
249 	/*
250 	 * Check for a set hazard pointer and complain if we find one.  We could
251 	 * just check the session's hazard pointer count, but this is a useful
252 	 * diagnostic.
253 	 */
254 	for (found = false, hp = session->hazard;
255 	    hp < session->hazard + session->hazard_inuse; ++hp)
256 		if (hp->ref != NULL) {
257 			found = true;
258 			break;
259 		}
260 	if (session->nhazard == 0 && !found)
261 		return;
262 
263 	__wt_errx(session,
264 	    "session %p: close hazard pointer table: table not empty",
265 	    (void *)session);
266 
267 #ifdef HAVE_DIAGNOSTIC
268 	__hazard_dump(session);
269 #endif
270 
271 	/*
272 	 * Clear any hazard pointers because it's not a correctness problem
273 	 * (any hazard pointer we find can't be real because the session is
274 	 * being closed when we're called). We do this work because session
275 	 * close isn't that common that it's an expensive check, and we don't
276 	 * want to let a hazard pointer lie around, keeping a page from being
277 	 * evicted.
278 	 *
279 	 * We don't panic: this shouldn't be a correctness issue (at least, I
280 	 * can't think of a reason it would be).
281 	 */
282 	for (hp = session->hazard;
283 	    hp < session->hazard + session->hazard_inuse; ++hp)
284 		if (hp->ref != NULL) {
285 			hp->ref = NULL;
286 			--session->nhazard;
287 		}
288 
289 	if (session->nhazard != 0)
290 		__wt_errx(session,
291 		    "session %p: close hazard pointer table: count didn't "
292 		    "match entries",
293 		    (void *)session);
294 }
295 
296 /*
297  * hazard_get_reference --
298  *	Return a consistent reference to a hazard pointer array.
299  */
300 static inline void
hazard_get_reference(WT_SESSION_IMPL * session,WT_HAZARD ** hazardp,uint32_t * hazard_inusep)301 hazard_get_reference(
302     WT_SESSION_IMPL *session, WT_HAZARD **hazardp, uint32_t *hazard_inusep)
303 {
304 	/*
305 	 * Hazard pointer arrays can be swapped out from under us if they grow.
306 	 * First, read the current in-use value. The read must precede the read
307 	 * of the hazard pointer itself (so the in-use value is pessimistic
308 	 * should the hazard array grow), and additionally ensure we only read
309 	 * the in-use value once. Then, read the hazard pointer, also ensuring
310 	 * we only read it once.
311 	 *
312 	 * Use a barrier instead of marking the fields volatile because we don't
313 	 * want to slow down the rest of the hazard pointer functions that don't
314 	 * need special treatment.
315 	 */
316 	WT_ORDERED_READ(*hazard_inusep, session->hazard_inuse);
317 	WT_ORDERED_READ(*hazardp, session->hazard);
318 }
319 
320 /*
321  * __wt_hazard_check --
322  *	Return if there's a hazard pointer to the page in the system.
323  */
324 WT_HAZARD *
__wt_hazard_check(WT_SESSION_IMPL * session,WT_REF * ref)325 __wt_hazard_check(WT_SESSION_IMPL *session, WT_REF *ref)
326 {
327 	WT_CONNECTION_IMPL *conn;
328 	WT_HAZARD *hp;
329 	WT_SESSION_IMPL *s;
330 	uint32_t i, j, hazard_inuse, max, session_cnt, walk_cnt;
331 
332 	/* If a file can never be evicted, hazard pointers aren't required. */
333 	if (F_ISSET(S2BT(session), WT_BTREE_IN_MEMORY))
334 		return (NULL);
335 
336 	conn = S2C(session);
337 
338 	WT_STAT_CONN_INCR(session, cache_hazard_checks);
339 
340 	/*
341 	 * Hazard pointer arrays might grow and be freed underneath us; enter
342 	 * the current hazard resource generation for the duration of the walk
343 	 * to ensure that doesn't happen.
344 	 */
345 	__wt_session_gen_enter(session, WT_GEN_HAZARD);
346 
347 	/*
348 	 * No lock is required because the session array is fixed size, but it
349 	 * may contain inactive entries.  We must review any active session
350 	 * that might contain a hazard pointer, so insert a read barrier after
351 	 * reading the active session count.  That way, no matter what sessions
352 	 * come or go, we'll check the slots for all of the sessions that could
353 	 * have been active when we started our check.
354 	 */
355 	WT_ORDERED_READ(session_cnt, conn->session_cnt);
356 	for (s = conn->sessions,
357 	    i = j = max = walk_cnt = 0; i < session_cnt; ++s, ++i) {
358 		if (!s->active)
359 			continue;
360 
361 		hazard_get_reference(s, &hp, &hazard_inuse);
362 
363 		if (hazard_inuse > max) {
364 			max = hazard_inuse;
365 			WT_STAT_CONN_SET(session, cache_hazard_max, max);
366 		}
367 
368 		for (j = 0; j < hazard_inuse; ++hp, ++j) {
369 			++walk_cnt;
370 			if (hp->ref == ref) {
371 				WT_STAT_CONN_INCRV(session,
372 				    cache_hazard_walks, walk_cnt);
373 				goto done;
374 			}
375 		}
376 	}
377 	WT_STAT_CONN_INCRV(session, cache_hazard_walks, walk_cnt);
378 	hp = NULL;
379 
380 done:	/* Leave the current resource generation. */
381 	__wt_session_gen_leave(session, WT_GEN_HAZARD);
382 
383 	return (hp);
384 }
385 
386 /*
387  * __wt_hazard_count --
388  *	Count how many hazard pointers this session has on the given page.
389  */
390 u_int
__wt_hazard_count(WT_SESSION_IMPL * session,WT_REF * ref)391 __wt_hazard_count(WT_SESSION_IMPL *session, WT_REF *ref)
392 {
393 	WT_HAZARD *hp;
394 	uint32_t i, hazard_inuse;
395 	u_int count;
396 
397 	hazard_get_reference(session, &hp, &hazard_inuse);
398 
399 	for (count = 0, i = 0; i < hazard_inuse; ++hp, ++i)
400 		if (hp->ref == ref)
401 			++count;
402 
403 	return (count);
404 }
405 
406 #ifdef HAVE_DIAGNOSTIC
407 /*
408  * __wt_hazard_check_assert --
409  *	Assert there's no hazard pointer to the page.
410  */
411 bool
__wt_hazard_check_assert(WT_SESSION_IMPL * session,void * ref,bool waitfor)412 __wt_hazard_check_assert(WT_SESSION_IMPL *session, void *ref, bool waitfor)
413 {
414 	WT_HAZARD *hp;
415 	int i;
416 
417 	for (i = 0;;) {
418 		if ((hp = __wt_hazard_check(session, ref)) == NULL)
419 			return (true);
420 		if (!waitfor || ++i > 100)
421 			break;
422 		__wt_sleep(0, 10000);
423 	}
424 	__wt_errx(session,
425 	    "hazard pointer reference to discarded object: (%p: %s, line %d)",
426 	    (void *)hp->ref, hp->func, hp->line);
427 	return (false);
428 }
429 
430 /*
431  * __hazard_dump --
432  *	Display the list of hazard pointers.
433  */
434 static void
__hazard_dump(WT_SESSION_IMPL * session)435 __hazard_dump(WT_SESSION_IMPL *session)
436 {
437 	WT_HAZARD *hp;
438 
439 	for (hp = session->hazard;
440 	    hp < session->hazard + session->hazard_inuse; ++hp)
441 		if (hp->ref != NULL)
442 			__wt_errx(session,
443 			    "session %p: hazard pointer %p: %s, line %d",
444 			    (void *)session,
445 			    (void *)hp->ref, hp->func, hp->line);
446 }
447 #endif
448