1 /*-
2  * Copyright (c) 2014-2018 MongoDB, Inc.
3  * Copyright (c) 2008-2014 WiredTiger, Inc.
4  *	All rights reserved.
5  *
6  * See the file LICENSE for redistribution information.
7  */
8 
9 #include "wt_internal.h"
10 
11 /*
12  * __session_add_dhandle --
13  *	Add a handle to the session's cache.
14  */
15 static int
__session_add_dhandle(WT_SESSION_IMPL * session)16 __session_add_dhandle(WT_SESSION_IMPL *session)
17 {
18 	WT_DATA_HANDLE_CACHE *dhandle_cache;
19 	uint64_t bucket;
20 
21 	/* Allocate a handle cache entry. */
22 	WT_RET(__wt_calloc_one(session, &dhandle_cache));
23 
24 	dhandle_cache->dhandle = session->dhandle;
25 
26 	bucket = dhandle_cache->dhandle->name_hash % WT_HASH_ARRAY_SIZE;
27 	TAILQ_INSERT_HEAD(&session->dhandles, dhandle_cache, q);
28 	TAILQ_INSERT_HEAD(&session->dhhash[bucket], dhandle_cache, hashq);
29 
30 	return (0);
31 }
32 
33 /*
34  * __session_discard_dhandle --
35  *	Remove a data handle from the session cache.
36  */
37 static void
__session_discard_dhandle(WT_SESSION_IMPL * session,WT_DATA_HANDLE_CACHE * dhandle_cache)38 __session_discard_dhandle(
39     WT_SESSION_IMPL *session, WT_DATA_HANDLE_CACHE *dhandle_cache)
40 {
41 	uint64_t bucket;
42 
43 	bucket = dhandle_cache->dhandle->name_hash % WT_HASH_ARRAY_SIZE;
44 	TAILQ_REMOVE(&session->dhandles, dhandle_cache, q);
45 	TAILQ_REMOVE(&session->dhhash[bucket], dhandle_cache, hashq);
46 
47 	WT_DHANDLE_RELEASE(dhandle_cache->dhandle);
48 	__wt_overwrite_and_free(session, dhandle_cache);
49 }
50 
51 /*
52  * __session_find_dhandle --
53  *	Search for a data handle in the session cache.
54  */
55 static void
__session_find_dhandle(WT_SESSION_IMPL * session,const char * uri,const char * checkpoint,WT_DATA_HANDLE_CACHE ** dhandle_cachep)56 __session_find_dhandle(WT_SESSION_IMPL *session,
57     const char *uri, const char *checkpoint,
58     WT_DATA_HANDLE_CACHE **dhandle_cachep)
59 {
60 	WT_DATA_HANDLE *dhandle;
61 	WT_DATA_HANDLE_CACHE *dhandle_cache;
62 	uint64_t bucket;
63 
64 	dhandle = NULL;
65 
66 	bucket = __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE;
67 retry:	TAILQ_FOREACH(dhandle_cache, &session->dhhash[bucket], hashq) {
68 		dhandle = dhandle_cache->dhandle;
69 		if (WT_DHANDLE_INACTIVE(dhandle) &&
70 		    !WT_IS_METADATA(dhandle)) {
71 			__session_discard_dhandle(session, dhandle_cache);
72 			/* We deleted our entry, retry from the start. */
73 			goto retry;
74 		}
75 
76 		if (strcmp(uri, dhandle->name) != 0)
77 			continue;
78 		if (checkpoint == NULL && dhandle->checkpoint == NULL)
79 			break;
80 		if (checkpoint != NULL && dhandle->checkpoint != NULL &&
81 		    strcmp(checkpoint, dhandle->checkpoint) == 0)
82 			break;
83 	}
84 
85 	*dhandle_cachep = dhandle_cache;
86 }
87 
88 /*
89  * __wt_session_lock_dhandle --
90  *	Return when the current data handle is either (a) open with the
91  *	requested lock mode; or (b) closed and write locked.  If exclusive
92  *	access is requested and cannot be granted immediately because the
93  *	handle is in use, fail with EBUSY.
94  *
95  *	Here is a brief summary of how different operations synchronize using
96  *	either the schema lock, handle locks or handle flags:
97  *
98  *	open -- one thread gets the handle exclusive, reverts to a shared
99  *		handle lock once the handle is open;
100  *	bulk load -- sets bulk and exclusive;
101  *	salvage, truncate, update, verify -- hold the schema lock,
102  *		get the handle exclusive, set a "special" flag;
103  *	sweep -- gets a write lock on the handle, doesn't set exclusive
104  *
105  *	The principle is that some application operations can cause other
106  *	application operations to fail (so attempting to open a cursor on a
107  *	file while it is being bulk-loaded will fail), but internal or
108  *	database-wide operations should not prevent application-initiated
109  *	operations.  For example, attempting to verify a file should not fail
110  *	because the sweep server happens to be in the process of closing that
111  *	file.
112  */
113 int
__wt_session_lock_dhandle(WT_SESSION_IMPL * session,uint32_t flags,bool * is_deadp)114 __wt_session_lock_dhandle(
115     WT_SESSION_IMPL *session, uint32_t flags, bool *is_deadp)
116 {
117 	WT_BTREE *btree;
118 	WT_DATA_HANDLE *dhandle;
119 	WT_DECL_RET;
120 	bool is_open, lock_busy, want_exclusive;
121 
122 	*is_deadp = false;
123 
124 	dhandle = session->dhandle;
125 	btree = dhandle->handle;
126 	lock_busy = false;
127 	want_exclusive = LF_ISSET(WT_DHANDLE_EXCLUSIVE);
128 
129 	/*
130 	 * If this session already has exclusive access to the handle, there is
131 	 * no point trying to lock it again.
132 	 *
133 	 * This should only happen if a checkpoint handle is locked multiple
134 	 * times during a checkpoint operation, or the handle is already open
135 	 * without any special flags.  In particular, it must fail if
136 	 * attempting to checkpoint a handle opened for a bulk load, even in
137 	 * the same session.
138 	 */
139 	if (dhandle->excl_session == session) {
140 		if (!LF_ISSET(WT_DHANDLE_LOCK_ONLY) &&
141 		    (!F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
142 		    (btree != NULL && F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS))))
143 			return (__wt_set_return(session, EBUSY));
144 		++dhandle->excl_ref;
145 		return (0);
146 	}
147 
148 	/*
149 	 * Check that the handle is open.  We've already incremented
150 	 * the reference count, so once the handle is open it won't be
151 	 * closed by another thread.
152 	 *
153 	 * If we can see the WT_DHANDLE_OPEN flag set while holding a
154 	 * lock on the handle, then it's really open and we can start
155 	 * using it.  Alternatively, if we can get an exclusive lock
156 	 * and WT_DHANDLE_OPEN is still not set, we need to do the open.
157 	 */
158 	for (;;) {
159 		/* If the handle is dead, give up. */
160 		if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) {
161 			*is_deadp = true;
162 			return (0);
163 		}
164 
165 		/*
166 		 * If the handle is already open for a special operation,
167 		 * give up.
168 		 */
169 		if (btree != NULL && F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS))
170 			return (__wt_set_return(session, EBUSY));
171 
172 		/*
173 		 * If the handle is open, get a read lock and recheck.
174 		 *
175 		 * Wait for a read lock if we want exclusive access and failed
176 		 * to get it: the sweep server may be closing this handle, and
177 		 * we need to wait for it to release its lock.  If we want
178 		 * exclusive access and find the handle open once we get the
179 		 * read lock, give up: some other thread has it locked for real.
180 		 */
181 		if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
182 		    (!want_exclusive || lock_busy)) {
183 			__wt_readlock(session, &dhandle->rwlock);
184 			if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) {
185 				*is_deadp = true;
186 				__wt_readunlock(session, &dhandle->rwlock);
187 				return (0);
188 			}
189 
190 			is_open = F_ISSET(dhandle, WT_DHANDLE_OPEN);
191 			if (is_open && !want_exclusive)
192 				return (0);
193 			__wt_readunlock(session, &dhandle->rwlock);
194 		} else
195 			is_open = false;
196 
197 		/*
198 		 * It isn't open or we want it exclusive: try to get an
199 		 * exclusive lock.  There is some subtlety here: if we race
200 		 * with another thread that successfully opens the file, we
201 		 * don't want to block waiting to get exclusive access.
202 		 */
203 		if ((ret =
204 		    __wt_try_writelock(session, &dhandle->rwlock)) == 0) {
205 			if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) {
206 				*is_deadp = true;
207 				__wt_writeunlock(session, &dhandle->rwlock);
208 				return (0);
209 			}
210 
211 			/*
212 			 * If it was opened while we waited, drop the write
213 			 * lock and get a read lock instead.
214 			 */
215 			if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
216 			    !want_exclusive) {
217 				lock_busy = false;
218 				__wt_writeunlock(session, &dhandle->rwlock);
219 				continue;
220 			}
221 
222 			/* We have an exclusive lock, we're done. */
223 			F_SET(dhandle, WT_DHANDLE_EXCLUSIVE);
224 			WT_ASSERT(session,
225 			    dhandle->excl_session == NULL &&
226 			    dhandle->excl_ref == 0);
227 			dhandle->excl_session = session;
228 			dhandle->excl_ref = 1;
229 			WT_ASSERT(session, !F_ISSET(dhandle, WT_DHANDLE_DEAD));
230 			return (0);
231 		}
232 		if (ret != EBUSY || (is_open && want_exclusive) ||
233 		    LF_ISSET(WT_DHANDLE_LOCK_ONLY))
234 			return (ret);
235 		lock_busy = true;
236 
237 		/* Give other threads a chance to make progress. */
238 		WT_STAT_CONN_INCR(session, dhandle_lock_blocked);
239 		__wt_yield();
240 	}
241 }
242 
243 /*
244  * __wt_session_release_dhandle --
245  *	Unlock a data handle.
246  */
247 int
__wt_session_release_dhandle(WT_SESSION_IMPL * session)248 __wt_session_release_dhandle(WT_SESSION_IMPL *session)
249 {
250 	WT_BTREE *btree;
251 	WT_DATA_HANDLE *dhandle;
252 	WT_DATA_HANDLE_CACHE *dhandle_cache;
253 	WT_DECL_RET;
254 	bool locked, write_locked;
255 
256 	dhandle = session->dhandle;
257 	btree = dhandle->handle;
258 	write_locked = F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE);
259 	locked = true;
260 
261 	/*
262 	 * If we had special flags set, close the handle so that future access
263 	 * can get a handle without special flags.
264 	 */
265 	if (F_ISSET(dhandle, WT_DHANDLE_DISCARD | WT_DHANDLE_DISCARD_KILL)) {
266 		WT_SAVE_DHANDLE(session, __session_find_dhandle(session,
267 		    dhandle->name, dhandle->checkpoint, &dhandle_cache));
268 		if (dhandle_cache != NULL)
269 			__session_discard_dhandle(session, dhandle_cache);
270 	}
271 
272 	/*
273 	 * Close the handle if we are finishing a bulk load or if the handle is
274 	 * set to discard on release.
275 	 */
276 	if (btree != NULL && F_ISSET(btree, WT_BTREE_BULK)) {
277 		WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) &&
278 		    !F_ISSET(dhandle, WT_DHANDLE_DISCARD));
279 		/*
280 		 * Acquire the schema lock while completing a bulk load.  This
281 		 * avoids racing with a checkpoint while it gathers a set
282 		 * of handles.
283 		 */
284 		WT_WITH_SCHEMA_LOCK(session, ret =
285 		    __wt_conn_dhandle_close(session, false, false));
286 	} else if ((btree != NULL && F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS)) ||
287 	    F_ISSET(dhandle, WT_DHANDLE_DISCARD | WT_DHANDLE_DISCARD_KILL)) {
288 		WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE));
289 
290 		ret = __wt_conn_dhandle_close(session, false,
291 		    F_ISSET(dhandle, WT_DHANDLE_DISCARD_KILL));
292 		F_CLR(dhandle, WT_DHANDLE_DISCARD | WT_DHANDLE_DISCARD_KILL);
293 	}
294 
295 	if (session == dhandle->excl_session) {
296 		if (--dhandle->excl_ref == 0)
297 			dhandle->excl_session = NULL;
298 		else
299 			locked = false;
300 	}
301 	if (locked) {
302 		if (write_locked) {
303 			F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE);
304 			__wt_writeunlock(session, &dhandle->rwlock);
305 		} else
306 			__wt_readunlock(session, &dhandle->rwlock);
307 	}
308 
309 	session->dhandle = NULL;
310 	return (ret);
311 }
312 
313 /*
314  * __wt_session_get_btree_ckpt --
315  *	Check the configuration strings for a checkpoint name, get a btree
316  * handle for the given name, set session->dhandle.
317  */
318 int
__wt_session_get_btree_ckpt(WT_SESSION_IMPL * session,const char * uri,const char * cfg[],uint32_t flags)319 __wt_session_get_btree_ckpt(WT_SESSION_IMPL *session,
320     const char *uri, const char *cfg[], uint32_t flags)
321 {
322 	WT_CONFIG_ITEM cval;
323 	WT_DECL_RET;
324 	const char *checkpoint;
325 	bool last_ckpt;
326 
327 	last_ckpt = false;
328 	checkpoint = NULL;
329 
330 	/*
331 	 * This function exists to handle checkpoint configuration.  Callers
332 	 * that never open a checkpoint call the underlying function directly.
333 	 */
334 	WT_RET_NOTFOUND_OK(
335 	    __wt_config_gets_def(session, cfg, "checkpoint", 0, &cval));
336 	if (cval.len != 0) {
337 		/*
338 		 * The internal checkpoint name is special, find the last
339 		 * unnamed checkpoint of the object.
340 		 */
341 		if (WT_STRING_MATCH(WT_CHECKPOINT, cval.str, cval.len)) {
342 			last_ckpt = true;
343 retry:			WT_RET(__wt_meta_checkpoint_last_name(
344 			    session, uri, &checkpoint));
345 		} else
346 			WT_RET(__wt_strndup(
347 			    session, cval.str, cval.len, &checkpoint));
348 	}
349 
350 	ret = __wt_session_get_dhandle(session, uri, checkpoint, cfg, flags);
351 	__wt_free(session, checkpoint);
352 
353 	/*
354 	 * There's a potential race: we get the name of the most recent unnamed
355 	 * checkpoint, but if it's discarded (or locked so it can be discarded)
356 	 * by the time we try to open it, we'll fail the open.  Retry in those
357 	 * cases, a new "last" checkpoint should surface, and we can't return an
358 	 * error, the application will be justifiably upset if we can't open the
359 	 * last checkpoint instance of an object.
360 	 *
361 	 * The check against WT_NOTFOUND is correct: if there was no checkpoint
362 	 * for the object (that is, the object has never been in a checkpoint),
363 	 * we returned immediately after the call to search for that name.
364 	 */
365 	if (last_ckpt && (ret == WT_NOTFOUND || ret == EBUSY))
366 		goto retry;
367 	return (ret);
368 }
369 
370 /*
371  * __wt_session_close_cache --
372  *	Close any cached handles in a session.
373  */
374 void
__wt_session_close_cache(WT_SESSION_IMPL * session)375 __wt_session_close_cache(WT_SESSION_IMPL *session)
376 {
377 	WT_DATA_HANDLE_CACHE *dhandle_cache, *dhandle_cache_tmp;
378 
379 	WT_TAILQ_SAFE_REMOVE_BEGIN(dhandle_cache,
380 	    &session->dhandles, q, dhandle_cache_tmp) {
381 		__session_discard_dhandle(session, dhandle_cache);
382 	} WT_TAILQ_SAFE_REMOVE_END
383 }
384 
385 /*
386  * __session_dhandle_sweep --
387  *	Discard any session dhandles that are not open.
388  */
389 static void
__session_dhandle_sweep(WT_SESSION_IMPL * session)390 __session_dhandle_sweep(WT_SESSION_IMPL *session)
391 {
392 	WT_CONNECTION_IMPL *conn;
393 	WT_DATA_HANDLE *dhandle;
394 	WT_DATA_HANDLE_CACHE *dhandle_cache, *dhandle_cache_tmp;
395 	time_t now;
396 
397 	conn = S2C(session);
398 
399 	/*
400 	 * Periodically sweep for dead handles; if we've swept recently, don't
401 	 * do it again.
402 	 */
403 	__wt_seconds(session, &now);
404 	if (difftime(now, session->last_sweep) < conn->sweep_interval)
405 		return;
406 	session->last_sweep = now;
407 
408 	WT_STAT_CONN_INCR(session, dh_session_sweeps);
409 
410 	TAILQ_FOREACH_SAFE(dhandle_cache,
411 	    &session->dhandles, q, dhandle_cache_tmp) {
412 		dhandle = dhandle_cache->dhandle;
413 		if (dhandle != session->dhandle &&
414 		    dhandle->session_inuse == 0 &&
415 		    (WT_DHANDLE_INACTIVE(dhandle) ||
416 		    (dhandle->timeofdeath != 0 &&
417 		    difftime(now, dhandle->timeofdeath) >
418 		    conn->sweep_idle_time))) {
419 			WT_STAT_CONN_INCR(session, dh_session_handles);
420 			WT_ASSERT(session, !WT_IS_METADATA(dhandle));
421 			__session_discard_dhandle(session, dhandle_cache);
422 		}
423 	}
424 }
425 
426 /*
427  * __session_find_shared_dhandle --
428  *	Search for a data handle in the connection and add it to a session's
429  *	cache.  We must increment the handle's reference count while holding
430  *	the handle list lock.
431  */
432 static int
__session_find_shared_dhandle(WT_SESSION_IMPL * session,const char * uri,const char * checkpoint)433 __session_find_shared_dhandle(
434     WT_SESSION_IMPL *session, const char *uri, const char *checkpoint)
435 {
436 	WT_DECL_RET;
437 
438 	WT_WITH_HANDLE_LIST_READ_LOCK(session,
439 	    if ((ret = __wt_conn_dhandle_find(session, uri, checkpoint)) == 0)
440 		    WT_DHANDLE_ACQUIRE(session->dhandle));
441 
442 	if (ret != WT_NOTFOUND)
443 		return (ret);
444 
445 	WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
446 	    if ((ret = __wt_conn_dhandle_alloc(session, uri, checkpoint)) == 0)
447 		    WT_DHANDLE_ACQUIRE(session->dhandle));
448 
449 	return (ret);
450 }
451 
452 /*
453  * __session_get_dhandle --
454  *	Search for a data handle, first in the session cache, then in the
455  *	connection.
456  */
457 static int
__session_get_dhandle(WT_SESSION_IMPL * session,const char * uri,const char * checkpoint)458 __session_get_dhandle(
459     WT_SESSION_IMPL *session, const char *uri, const char *checkpoint)
460 {
461 	WT_DATA_HANDLE_CACHE *dhandle_cache;
462 	WT_DECL_RET;
463 
464 	__session_find_dhandle(session, uri, checkpoint, &dhandle_cache);
465 	if (dhandle_cache != NULL) {
466 		session->dhandle = dhandle_cache->dhandle;
467 		return (0);
468 	}
469 
470 	/* Sweep the handle list to remove any dead handles. */
471 	__session_dhandle_sweep(session);
472 
473 	/*
474 	 * We didn't find a match in the session cache, search the shared
475 	 * handle list and cache the handle we find.
476 	 */
477 	WT_RET(__session_find_shared_dhandle(session, uri, checkpoint));
478 
479 	/*
480 	 * Fixup the reference count on failure (we incremented the reference
481 	 * count while holding the handle-list lock).
482 	 */
483 	if ((ret = __session_add_dhandle(session)) != 0) {
484 		WT_DHANDLE_RELEASE(session->dhandle);
485 		session->dhandle = NULL;
486 	}
487 
488 	return (ret);
489 }
490 
491 /*
492  * __wt_session_get_dhandle --
493  *	Get a data handle for the given name, set session->dhandle.
494  */
495 int
__wt_session_get_dhandle(WT_SESSION_IMPL * session,const char * uri,const char * checkpoint,const char * cfg[],uint32_t flags)496 __wt_session_get_dhandle(WT_SESSION_IMPL *session,
497     const char *uri, const char *checkpoint, const char *cfg[], uint32_t flags)
498 {
499 	WT_DATA_HANDLE *dhandle;
500 	WT_DECL_RET;
501 	bool is_dead;
502 
503 	WT_ASSERT(session, !F_ISSET(session, WT_SESSION_NO_DATA_HANDLES));
504 
505 	for (;;) {
506 		WT_RET(__session_get_dhandle(session, uri, checkpoint));
507 		dhandle = session->dhandle;
508 
509 		/* Try to lock the handle. */
510 		WT_RET(__wt_session_lock_dhandle(session, flags, &is_dead));
511 		if (is_dead)
512 			continue;
513 
514 		/* If the handle is open in the mode we want, we're done. */
515 		if (LF_ISSET(WT_DHANDLE_LOCK_ONLY) ||
516 		    (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
517 		    !LF_ISSET(WT_BTREE_SPECIAL_FLAGS)))
518 			break;
519 
520 		WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE));
521 
522 		/*
523 		 * For now, we need the schema lock and handle list locks to
524 		 * open a file for real.
525 		 *
526 		 * Code needing exclusive access (such as drop or verify)
527 		 * assumes that it can close all open handles, then open an
528 		 * exclusive handle on the active tree and no other threads can
529 		 * reopen handles in the meantime.  A combination of the schema
530 		 * and handle list locks are used to enforce this.
531 		 */
532 		if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)) {
533 			dhandle->excl_session = NULL;
534 			dhandle->excl_ref = 0;
535 			F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE);
536 			__wt_writeunlock(session, &dhandle->rwlock);
537 
538 			WT_WITH_SCHEMA_LOCK(session,
539 			    ret = __wt_session_get_dhandle(
540 				session, uri, checkpoint, cfg, flags));
541 
542 			return (ret);
543 		}
544 
545 		/* Open the handle. */
546 		if ((ret = __wt_conn_dhandle_open(session, cfg, flags)) == 0 &&
547 		    LF_ISSET(WT_DHANDLE_EXCLUSIVE))
548 			break;
549 
550 		/*
551 		 * If we got the handle exclusive to open it but only want
552 		 * ordinary access, drop our lock and retry the open.
553 		 */
554 		dhandle->excl_session = NULL;
555 		dhandle->excl_ref = 0;
556 		F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE);
557 		__wt_writeunlock(session, &dhandle->rwlock);
558 		WT_RET(ret);
559 	}
560 
561 	WT_ASSERT(session, !F_ISSET(dhandle, WT_DHANDLE_DEAD));
562 	WT_ASSERT(session, LF_ISSET(WT_DHANDLE_LOCK_ONLY) ||
563 	    F_ISSET(dhandle, WT_DHANDLE_OPEN));
564 
565 	WT_ASSERT(session, LF_ISSET(WT_DHANDLE_EXCLUSIVE) ==
566 	    F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) || dhandle->excl_ref > 1);
567 
568 	return (0);
569 }
570 
571 /*
572  * __wt_session_lock_checkpoint --
573  *	Lock the btree handle for the given checkpoint name.
574  */
575 int
__wt_session_lock_checkpoint(WT_SESSION_IMPL * session,const char * checkpoint)576 __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint)
577 {
578 	WT_DATA_HANDLE *saved_dhandle;
579 	WT_DECL_RET;
580 
581 	WT_ASSERT(session, WT_META_TRACKING(session));
582 	saved_dhandle = session->dhandle;
583 
584 	/*
585 	 * Get the checkpoint handle exclusive, so no one else can access it
586 	 * while we are creating the new checkpoint.  Hold the lock until the
587 	 * checkpoint completes.
588 	 */
589 	WT_ERR(__wt_session_get_dhandle(session, saved_dhandle->name,
590 	    checkpoint, NULL, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY));
591 	if ((ret = __wt_meta_track_handle_lock(session, false)) != 0) {
592 		WT_TRET(__wt_session_release_dhandle(session));
593 		goto err;
594 	}
595 
596 	/*
597 	 * Get exclusive access to the handle and then flush any pages in this
598 	 * checkpoint from the cache (we are about to re-write the checkpoint
599 	 * which will mean cached pages no longer have valid contents). This
600 	 * is especially noticeable with memory mapped files, since changes to
601 	 * the underlying file are visible to the in-memory pages.
602 	 */
603 	WT_ERR(__wt_evict_file_exclusive_on(session));
604 	ret = __wt_cache_op(session, WT_SYNC_DISCARD);
605 	__wt_evict_file_exclusive_off(session);
606 	WT_ERR(ret);
607 
608 	/*
609 	 * We lock checkpoint handles that we are overwriting, so the handle
610 	 * must be closed when we release it.
611 	 */
612 	F_SET(session->dhandle, WT_DHANDLE_DISCARD);
613 
614 	/* Restore the original data handle in the session. */
615 err:	session->dhandle = saved_dhandle;
616 	return (ret);
617 }
618