1 /*-
2  * Copyright (c) 2014-2018 MongoDB, Inc.
3  * Copyright (c) 2008-2014 WiredTiger, Inc.
4  *	All rights reserved.
5  *
6  * See the file LICENSE for redistribution information.
7  */
8 
9 #include "wt_internal.h"
10 
11 /*
12  * __conn_dhandle_config_clear --
13  *	Clear the underlying object's configuration information.
14  */
15 static void
__conn_dhandle_config_clear(WT_SESSION_IMPL * session)16 __conn_dhandle_config_clear(WT_SESSION_IMPL *session)
17 {
18 	WT_DATA_HANDLE *dhandle;
19 	const char **a;
20 
21 	dhandle = session->dhandle;
22 
23 	if (dhandle->cfg == NULL)
24 		return;
25 	for (a = dhandle->cfg; *a != NULL; ++a)
26 		__wt_free(session, *a);
27 	__wt_free(session, dhandle->cfg);
28 	__wt_free(session, dhandle->meta_base);
29 }
30 
31 /*
32  * __conn_dhandle_config_set --
33  *	Set up a btree handle's configuration information.
34  */
35 static int
__conn_dhandle_config_set(WT_SESSION_IMPL * session)36 __conn_dhandle_config_set(WT_SESSION_IMPL *session)
37 {
38 	WT_DATA_HANDLE *dhandle;
39 	WT_DECL_RET;
40 	const char *base, *cfg[3];
41 	char *metaconf, *tmp;
42 
43 	dhandle = session->dhandle;
44 	base = NULL;
45 	tmp = NULL;
46 
47 	/*
48 	 * Read the object's entry from the metadata file, we're done if we
49 	 * don't find one.
50 	 */
51 	if ((ret =
52 	    __wt_metadata_search(session, dhandle->name, &metaconf)) != 0) {
53 		if (ret == WT_NOTFOUND)
54 			ret = __wt_set_return(session, ENOENT);
55 		WT_RET(ret);
56 	}
57 
58 	/*
59 	 * The defaults are included because persistent configuration
60 	 * information is stored in the metadata file and it may be from an
61 	 * earlier version of WiredTiger.  If defaults are included in the
62 	 * configuration, we can add new configuration strings without
63 	 * upgrading the metadata file or writing special code in case a
64 	 * configuration string isn't initialized, as long as the new
65 	 * configuration string has an appropriate default value.
66 	 *
67 	 * The error handling is a little odd, but be careful: we're holding a
68 	 * chunk of allocated memory in metaconf.  If we fail before we copy a
69 	 * reference to it into the object's configuration array, we must free
70 	 * it, after the copy, we don't want to free it.
71 	 */
72 	WT_ERR(__wt_calloc_def(session, 3, &dhandle->cfg));
73 	switch (dhandle->type) {
74 	case WT_DHANDLE_TYPE_BTREE:
75 		/*
76 		 * We are stripping out the checkpoint and checkpoint_lsn
77 		 * information from the config string. We save the rest of
78 		 * the metadata string, that is essentially static and
79 		 * unchanging and then concatenate the new checkpoint and
80 		 * LSN information on each checkpoint. The reason is
81 		 * performance and avoiding a lot of calls to the config
82 		 * parsing functions during a checkpoint for information
83 		 * that changes in a very well known way.
84 		 */
85 		cfg[0] = metaconf;
86 		cfg[1] = "checkpoint=()";
87 		cfg[2] = NULL;
88 		WT_ERR(__wt_strdup(session,
89 		    WT_CONFIG_BASE(session, file_meta), &dhandle->cfg[0]));
90 		WT_ASSERT(session, dhandle->meta_base == NULL);
91 		/*
92 		 * First collapse and overwrite any checkpoint information
93 		 * because we do not know the name or how many checkpoints
94 		 * may be in this metadata. So first we have to set the string
95 		 * to the empty checkpoint string and call collapse to
96 		 * overwrite anything existing.
97 		 */
98 		WT_ERR(__wt_config_collapse(session, cfg, &tmp));
99 		/*
100 		 * Now strip out the checkpoint and checkpoint LSN items
101 		 * from the configuration string and that is now our
102 		 * base metadata string.
103 		 */
104 		cfg[0] = tmp;
105 		cfg[1] = NULL;
106 		WT_ERR(__wt_config_merge(session,
107 		    cfg, "checkpoint=,checkpoint_lsn=", &base));
108 		__wt_free(session, tmp);
109 		break;
110 	case WT_DHANDLE_TYPE_TABLE:
111 		WT_ERR(__wt_strdup(session,
112 		    WT_CONFIG_BASE(session, table_meta), &dhandle->cfg[0]));
113 		break;
114 	}
115 	dhandle->cfg[1] = metaconf;
116 	dhandle->meta_base = base;
117 	return (0);
118 
119 err:	__wt_free(session, base);
120 	__wt_free(session, metaconf);
121 	__wt_free(session, tmp);
122 	return (ret);
123 }
124 
125 /*
126  * __conn_dhandle_destroy --
127  *	Destroy a data handle.
128  */
129 static int
__conn_dhandle_destroy(WT_SESSION_IMPL * session,WT_DATA_HANDLE * dhandle)130 __conn_dhandle_destroy(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle)
131 {
132 	WT_DECL_RET;
133 
134 	switch (dhandle->type) {
135 	case WT_DHANDLE_TYPE_BTREE:
136 		WT_WITH_DHANDLE(session, dhandle,
137 		    ret = __wt_btree_discard(session));
138 		break;
139 	case WT_DHANDLE_TYPE_TABLE:
140 		ret = __wt_schema_close_table(session, (WT_TABLE *)dhandle);
141 		break;
142 	}
143 
144 	__wt_rwlock_destroy(session, &dhandle->rwlock);
145 	__wt_free(session, dhandle->name);
146 	__wt_free(session, dhandle->checkpoint);
147 	__conn_dhandle_config_clear(session);
148 	__wt_spin_destroy(session, &dhandle->close_lock);
149 	__wt_stat_dsrc_discard(session, dhandle);
150 	__wt_overwrite_and_free(session, dhandle);
151 	return (ret);
152 }
153 
154 /*
155  * __wt_conn_dhandle_alloc --
156  *	Allocate a new data handle and return it linked into the connection's
157  *	list.
158  */
159 int
__wt_conn_dhandle_alloc(WT_SESSION_IMPL * session,const char * uri,const char * checkpoint)160 __wt_conn_dhandle_alloc(
161     WT_SESSION_IMPL *session, const char *uri, const char *checkpoint)
162 {
163 	WT_BTREE *btree;
164 	WT_DATA_HANDLE *dhandle;
165 	WT_DECL_RET;
166 	WT_TABLE *table;
167 	uint64_t bucket;
168 
169 	/*
170 	 * Ensure no one beat us to creating the handle now that we hold the
171 	 * write lock.
172 	 */
173 	if ((ret =
174 	     __wt_conn_dhandle_find(session, uri, checkpoint)) != WT_NOTFOUND)
175 		return (ret);
176 
177 	if (WT_PREFIX_MATCH(uri, "file:")) {
178 		WT_RET(__wt_calloc_one(session, &dhandle));
179 		dhandle->type = WT_DHANDLE_TYPE_BTREE;
180 	} else if (WT_PREFIX_MATCH(uri, "table:")) {
181 		WT_RET(__wt_calloc_one(session, &table));
182 		dhandle = (WT_DATA_HANDLE *)table;
183 		dhandle->type = WT_DHANDLE_TYPE_TABLE;
184 	} else
185 		WT_PANIC_RET(session, EINVAL,
186 		    "illegal handle allocation URI %s", uri);
187 
188 	/* Btree handles keep their data separate from the interface. */
189 	if (dhandle->type == WT_DHANDLE_TYPE_BTREE) {
190 		WT_ERR(__wt_calloc_one(session, &btree));
191 		dhandle->handle = btree;
192 		btree->dhandle = dhandle;
193 	}
194 
195 	if (strcmp(uri, WT_METAFILE_URI) == 0)
196 		F_SET(dhandle, WT_DHANDLE_IS_METADATA);
197 
198 	WT_ERR(__wt_rwlock_init(session, &dhandle->rwlock));
199 	dhandle->name_hash = __wt_hash_city64(uri, strlen(uri));
200 	WT_ERR(__wt_strdup(session, uri, &dhandle->name));
201 	WT_ERR(__wt_strdup(session, checkpoint, &dhandle->checkpoint));
202 
203 	WT_ERR(__wt_spin_init(
204 	    session, &dhandle->close_lock, "data handle close"));
205 
206 	/*
207 	 * We are holding the data handle list lock, which protects most
208 	 * threads from seeing the new handle until that lock is released.
209 	 *
210 	 * However, the sweep server scans the list of handles without holding
211 	 * that lock, so we need a write barrier here to ensure the sweep
212 	 * server doesn't see a partially filled in structure.
213 	 */
214 	WT_WRITE_BARRIER();
215 
216 	/*
217 	 * Prepend the handle to the connection list, assuming we're likely to
218 	 * need new files again soon, until they are cached by all sessions.
219 	 */
220 	bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE;
221 	WT_CONN_DHANDLE_INSERT(S2C(session), dhandle, bucket);
222 
223 	session->dhandle = dhandle;
224 	return (0);
225 
226 err:	WT_TRET(__conn_dhandle_destroy(session, dhandle));
227 	return (ret);
228 }
229 
230 /*
231  * __wt_conn_dhandle_find --
232  *	Find a previously opened data handle.
233  */
234 int
__wt_conn_dhandle_find(WT_SESSION_IMPL * session,const char * uri,const char * checkpoint)235 __wt_conn_dhandle_find(
236     WT_SESSION_IMPL *session, const char *uri, const char *checkpoint)
237 {
238 	WT_CONNECTION_IMPL *conn;
239 	WT_DATA_HANDLE *dhandle;
240 	uint64_t bucket;
241 
242 	conn = S2C(session);
243 
244 	/* We must be holding the handle list lock at a higher level. */
245 	WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));
246 
247 	bucket = __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE;
248 	if (checkpoint == NULL) {
249 		TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq) {
250 			if (F_ISSET(dhandle, WT_DHANDLE_DEAD))
251 				continue;
252 			if (dhandle->checkpoint == NULL &&
253 			    strcmp(uri, dhandle->name) == 0) {
254 				session->dhandle = dhandle;
255 				return (0);
256 			}
257 		}
258 	} else
259 		TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq) {
260 			if (F_ISSET(dhandle, WT_DHANDLE_DEAD))
261 				continue;
262 			if (dhandle->checkpoint != NULL &&
263 			    strcmp(uri, dhandle->name) == 0 &&
264 			    strcmp(checkpoint, dhandle->checkpoint) == 0) {
265 				session->dhandle = dhandle;
266 				return (0);
267 			}
268 		}
269 
270 	return (WT_NOTFOUND);
271 }
272 
273 /*
274  * __wt_conn_dhandle_close --
275  *	Sync and close the underlying btree handle.
276  */
277 int
__wt_conn_dhandle_close(WT_SESSION_IMPL * session,bool final,bool mark_dead)278 __wt_conn_dhandle_close(
279     WT_SESSION_IMPL *session, bool final, bool mark_dead)
280 {
281 	WT_BM *bm;
282 	WT_BTREE *btree;
283 	WT_CONNECTION_IMPL *conn;
284 	WT_DATA_HANDLE *dhandle;
285 	WT_DECL_RET;
286 	bool discard, is_btree, marked_dead, no_schema_lock;
287 
288 	conn = S2C(session);
289 	dhandle = session->dhandle;
290 
291 	if (!F_ISSET(dhandle, WT_DHANDLE_OPEN))
292 		return (0);
293 
294 	/*
295 	 * The only data handle type that uses the "handle" field is btree.
296 	 * For other data handle types, it should be NULL.
297 	 */
298 	is_btree = dhandle->type == WT_DHANDLE_TYPE_BTREE;
299 	btree = is_btree ? dhandle->handle : NULL;
300 
301 	if (is_btree) {
302 		/* Turn off eviction. */
303 		WT_RET(__wt_evict_file_exclusive_on(session));
304 
305 		/* Reset the tree's eviction priority (if any). */
306 		__wt_evict_priority_clear(session);
307 	}
308 
309 	/*
310 	 * If we don't already have the schema lock, make it an error to try to
311 	 * acquire it.  The problem is that we are holding an exclusive lock on
312 	 * the handle, and if we attempt to acquire the schema lock we might
313 	 * deadlock with a thread that has the schema lock and wants a handle
314 	 * lock.
315 	 */
316 	no_schema_lock = false;
317 	if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)) {
318 		no_schema_lock = true;
319 		F_SET(session, WT_SESSION_NO_SCHEMA_LOCK);
320 	}
321 
322 	/*
323 	 * We may not be holding the schema lock, and threads may be walking
324 	 * the list of open handles (for example, checkpoint).  Acquire the
325 	 * handle's close lock. We don't have the sweep server acquire the
326 	 * handle's rwlock so we have to prevent races through the close code.
327 	 */
328 	__wt_spin_lock(session, &dhandle->close_lock);
329 
330 	discard = marked_dead = false;
331 	if (is_btree && !F_ISSET(btree,
332 	    WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) {
333 		/*
334 		 * If the handle is already marked dead, we're just here to
335 		 * discard it.
336 		 */
337 		if (F_ISSET(dhandle, WT_DHANDLE_DEAD))
338 			discard = true;
339 
340 		/*
341 		 * Mark the handle dead (letting the tree be discarded later) if
342 		 * it's not already marked dead, and it's not a memory-mapped
343 		 * tree. (We can't mark memory-mapped tree handles dead because
344 		 * we close the underlying file handle to allow the file to be
345 		 * removed and memory-mapped trees contain pointers into memory
346 		 * that become invalid if the mapping is closed.)
347 		 */
348 		bm = btree->bm;
349 		if (!discard && mark_dead &&
350 		    (bm == NULL || !bm->is_mapped(bm, session)))
351 			marked_dead = true;
352 
353 		/*
354 		 * Flush dirty data from any durable trees we couldn't mark
355 		 * dead.  That involves writing a checkpoint, which can fail if
356 		 * an update cannot be written, causing the close to fail: if
357 		 * not the final close, return the EBUSY error to our caller
358 		 * for eventual retry.
359 		 *
360 		 * We can't discard non-durable trees yet: first we have to
361 		 * close the underlying btree handle, then we can mark the
362 		 * data handle dead.
363 		 *
364 		 * If we are closing with timestamps enforced, then we have
365 		 * already checkpointed as of the timestamp as needed and any
366 		 * remaining dirty data should be discarded.
367 		 */
368 		if (!discard && !marked_dead) {
369 			if (F_ISSET(conn, WT_CONN_CLOSING_TIMESTAMP) ||
370 			    F_ISSET(conn, WT_CONN_IN_MEMORY) ||
371 			    F_ISSET(btree, WT_BTREE_NO_CHECKPOINT))
372 				discard = true;
373 			else {
374 				WT_TRET(__wt_checkpoint_close(session, final));
375 				if (!final && ret == EBUSY)
376 					WT_ERR(ret);
377 			}
378 		}
379 	}
380 
381 	/* Close the underlying handle. */
382 	switch (dhandle->type) {
383 	case WT_DHANDLE_TYPE_BTREE:
384 		WT_TRET(__wt_btree_close(session));
385 		F_CLR(btree, WT_BTREE_SPECIAL_FLAGS);
386 		break;
387 	case WT_DHANDLE_TYPE_TABLE:
388 		WT_TRET(__wt_schema_close_table(session, (WT_TABLE *)dhandle));
389 		break;
390 	}
391 
392 	/*
393 	 * If marking the handle dead, do so after closing the underlying btree.
394 	 * (Don't do it before that, the block manager asserts there are never
395 	 * two references to a block manager object, and re-opening the handle
396 	 * can succeed once we mark this handle dead.)
397 	 *
398 	 * Check discard too, code we call to clear the cache expects the data
399 	 * handle dead flag to be set when discarding modified pages.
400 	 */
401 	if (marked_dead || discard)
402 		F_SET(dhandle, WT_DHANDLE_DEAD);
403 
404 	/*
405 	 * Discard from cache any trees not marked dead in this call (that is,
406 	 * including trees previously marked dead). Done after marking the data
407 	 * handle dead for a couple reasons: first, we don't need to hold an
408 	 * exclusive handle to do it, second, code we call to clear the cache
409 	 * expects the data handle dead flag to be set when discarding modified
410 	 * pages.
411 	 */
412 	if (discard)
413 		WT_TRET(__wt_cache_op(session, WT_SYNC_DISCARD));
414 
415 	/*
416 	 * If we marked a handle dead it will be closed by sweep, via another
417 	 * call to this function. Otherwise, we're done with this handle.
418 	 */
419 	if (!marked_dead) {
420 		F_CLR(dhandle, WT_DHANDLE_OPEN);
421 		if (dhandle->checkpoint == NULL)
422 			--conn->open_btree_count;
423 	}
424 	WT_ASSERT(session,
425 	    F_ISSET(dhandle, WT_DHANDLE_DEAD) ||
426 	    !F_ISSET(dhandle, WT_DHANDLE_OPEN));
427 
428 err:	__wt_spin_unlock(session, &dhandle->close_lock);
429 
430 	if (no_schema_lock)
431 		F_CLR(session, WT_SESSION_NO_SCHEMA_LOCK);
432 
433 	if (is_btree)
434 		__wt_evict_file_exclusive_off(session);
435 
436 	return (ret);
437 }
438 
439 /*
440  * __wt_conn_dhandle_open --
441  *	Open the current data handle.
442  */
443 int
__wt_conn_dhandle_open(WT_SESSION_IMPL * session,const char * cfg[],uint32_t flags)444 __wt_conn_dhandle_open(
445     WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags)
446 {
447 	WT_BTREE *btree;
448 	WT_DATA_HANDLE *dhandle;
449 	WT_DECL_RET;
450 
451 	dhandle = session->dhandle;
452 	btree = dhandle->handle;
453 
454 	WT_ASSERT(session,
455 	    F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) &&
456 	    !LF_ISSET(WT_DHANDLE_LOCK_ONLY));
457 
458 	WT_ASSERT(session,
459 	     !F_ISSET(S2C(session), WT_CONN_CLOSING_NO_MORE_OPENS));
460 
461 	/* Turn off eviction. */
462 	if (dhandle->type == WT_DHANDLE_TYPE_BTREE)
463 		WT_RET(__wt_evict_file_exclusive_on(session));
464 
465 	/*
466 	 * If the handle is already open, it has to be closed so it can be
467 	 * reopened with a new configuration.
468 	 *
469 	 * This call can return EBUSY if there's an update in the tree that's
470 	 * not yet globally visible. That's not a problem because it can only
471 	 * happen when we're switching from a normal handle to a "special" one,
472 	 * so we're returning EBUSY to an attempt to verify or do other special
473 	 * operations. The reverse won't happen because when the handle from a
474 	 * verify or other special operation is closed, there won't be updates
475 	 * in the tree that can block the close.
476 	 */
477 	if (F_ISSET(dhandle, WT_DHANDLE_OPEN))
478 		WT_ERR(__wt_conn_dhandle_close(session, false, false));
479 
480 	/* Discard any previous configuration, set up the new configuration. */
481 	__conn_dhandle_config_clear(session);
482 	WT_ERR(__conn_dhandle_config_set(session));
483 
484 	switch (dhandle->type) {
485 	case WT_DHANDLE_TYPE_BTREE:
486 		/* Set any special flags on the btree handle. */
487 		F_SET(btree, LF_MASK(WT_BTREE_SPECIAL_FLAGS));
488 
489 		/*
490 		 * Allocate data-source statistics memory. We don't allocate
491 		 * that memory when allocating the data handle because not all
492 		 * data handles need statistics (for example, handles used for
493 		 * checkpoint locking).  If we are reopening the handle, then
494 		 * it may already have statistics memory, check to avoid the
495 		 * leak.
496 		 */
497 		if (dhandle->stat_array == NULL)
498 			WT_ERR(__wt_stat_dsrc_init(session, dhandle));
499 
500 		WT_ERR(__wt_btree_open(session, cfg));
501 		break;
502 	case WT_DHANDLE_TYPE_TABLE:
503 		WT_ERR(__wt_schema_open_table(session, cfg));
504 		break;
505 	}
506 
507 	/*
508 	 * Bulk handles require true exclusive access, otherwise, handles
509 	 * marked as exclusive are allowed to be relocked by the same
510 	 * session.
511 	 */
512 	if (F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) &&
513 	    !LF_ISSET(WT_BTREE_BULK)) {
514 		dhandle->excl_session = session;
515 		dhandle->excl_ref = 1;
516 	}
517 	F_SET(dhandle, WT_DHANDLE_OPEN);
518 
519 	/*
520 	 * Checkpoint handles are read-only, so eviction calculations based on
521 	 * the number of btrees are better to ignore them.
522 	 */
523 	if (dhandle->checkpoint == NULL)
524 		++S2C(session)->open_btree_count;
525 
526 	if (0) {
527 err:		if (btree != NULL)
528 			F_CLR(btree, WT_BTREE_SPECIAL_FLAGS);
529 	}
530 
531 	if (dhandle->type == WT_DHANDLE_TYPE_BTREE)
532 		__wt_evict_file_exclusive_off(session);
533 
534 	return (ret);
535 }
536 
537 /*
538  * __conn_btree_apply_internal --
539  *	Apply a function to an open data handle.
540  */
541 static int
__conn_btree_apply_internal(WT_SESSION_IMPL * session,WT_DATA_HANDLE * dhandle,int (* file_func)(WT_SESSION_IMPL *,const char * []),int (* name_func)(WT_SESSION_IMPL *,const char *,bool *),const char * cfg[])542 __conn_btree_apply_internal(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle,
543     int (*file_func)(WT_SESSION_IMPL *, const char *[]),
544     int (*name_func)(WT_SESSION_IMPL *, const char *, bool *),
545     const char *cfg[])
546 {
547 	WT_DECL_RET;
548 	bool skip;
549 
550 	/* Always apply the name function, if supplied. */
551 	skip = false;
552 	if (name_func != NULL)
553 		WT_RET(name_func(session, dhandle->name, &skip));
554 
555 	/* If there is no file function, don't bother locking the handle */
556 	if (file_func == NULL || skip)
557 		return (0);
558 
559 	/*
560 	 * We need to pull the handle into the session handle cache and make
561 	 * sure it's referenced to stop other internal code dropping the handle
562 	 * (e.g in LSM when cleaning up obsolete chunks).
563 	 */
564 	if ((ret = __wt_session_get_dhandle(session,
565 	    dhandle->name, dhandle->checkpoint, NULL, 0)) != 0)
566 		return (ret == EBUSY ? 0 : ret);
567 
568 	WT_SAVE_DHANDLE(session, ret = file_func(session, cfg));
569 	WT_TRET(__wt_session_release_dhandle(session));
570 	return (ret);
571 }
572 
573 /*
574  * __wt_conn_btree_apply --
575  *	Apply a function to all open btree handles with the given URI.
576  */
577 int
__wt_conn_btree_apply(WT_SESSION_IMPL * session,const char * uri,int (* file_func)(WT_SESSION_IMPL *,const char * []),int (* name_func)(WT_SESSION_IMPL *,const char *,bool *),const char * cfg[])578 __wt_conn_btree_apply(WT_SESSION_IMPL *session, const char *uri,
579     int (*file_func)(WT_SESSION_IMPL *, const char *[]),
580     int (*name_func)(WT_SESSION_IMPL *, const char *, bool *),
581     const char *cfg[])
582 {
583 	WT_CONNECTION_IMPL *conn;
584 	WT_DATA_HANDLE *dhandle;
585 	WT_DECL_RET;
586 	uint64_t bucket;
587 
588 	conn = S2C(session);
589 
590 	/*
591 	 * If we're given a URI, then we walk only the hash list for that
592 	 * name.  If we don't have a URI we walk the entire dhandle list.
593 	 */
594 	if (uri != NULL) {
595 		bucket =
596 		    __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE;
597 
598 		for (dhandle = NULL;;) {
599 			WT_WITH_HANDLE_LIST_READ_LOCK(session,
600 			    WT_DHANDLE_NEXT(session, dhandle,
601 			    &conn->dhhash[bucket], hashq));
602 			if (dhandle == NULL)
603 				return (0);
604 
605 			if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
606 			    F_ISSET(dhandle, WT_DHANDLE_DEAD) ||
607 			    dhandle->checkpoint != NULL ||
608 			    strcmp(uri, dhandle->name) != 0)
609 				continue;
610 			WT_ERR(__conn_btree_apply_internal(session,
611 			    dhandle, file_func, name_func, cfg));
612 		}
613 	} else {
614 		for (dhandle = NULL;;) {
615 			WT_WITH_HANDLE_LIST_READ_LOCK(session,
616 			    WT_DHANDLE_NEXT(session, dhandle, &conn->dhqh, q));
617 			if (dhandle == NULL)
618 				return (0);
619 
620 			if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
621 			    F_ISSET(dhandle, WT_DHANDLE_DEAD) ||
622 			    dhandle->type != WT_DHANDLE_TYPE_BTREE ||
623 			    dhandle->checkpoint != NULL ||
624 			    WT_IS_METADATA(dhandle))
625 				continue;
626 			WT_ERR(__conn_btree_apply_internal(session,
627 			    dhandle, file_func, name_func, cfg));
628 		}
629 	}
630 
631 err:	WT_DHANDLE_RELEASE(dhandle);
632 	return (ret);
633 }
634 
635 /*
636  * __conn_dhandle_close_one --
637  *	Lock and, if necessary, close a data handle.
638  */
639 static int
__conn_dhandle_close_one(WT_SESSION_IMPL * session,const char * uri,const char * checkpoint,bool removed,bool mark_dead)640 __conn_dhandle_close_one(WT_SESSION_IMPL *session,
641     const char *uri, const char *checkpoint, bool removed, bool mark_dead)
642 {
643 	WT_DECL_RET;
644 
645 	/*
646 	 * Lock the handle exclusively.  If this is part of schema-changing
647 	 * operation (indicated by metadata tracking being enabled), hold the
648 	 * lock for the duration of the operation.
649 	 */
650 	WT_RET(__wt_session_get_dhandle(session, uri, checkpoint,
651 	    NULL, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY));
652 	if (WT_META_TRACKING(session))
653 		WT_RET(__wt_meta_track_handle_lock(session, false));
654 
655 	/*
656 	 * We have an exclusive lock, which means there are no cursors open at
657 	 * this point.  Close the handle, if necessary.
658 	 */
659 	if (F_ISSET(session->dhandle, WT_DHANDLE_OPEN)) {
660 		__wt_meta_track_sub_on(session);
661 		ret = __wt_conn_dhandle_close(session, false, mark_dead);
662 
663 		/*
664 		 * If the close succeeded, drop any locks it acquired.  If
665 		 * there was a failure, this function will fail and the whole
666 		 * transaction will be rolled back.
667 		 */
668 		if (ret == 0)
669 			ret = __wt_meta_track_sub_off(session);
670 	}
671 	if (removed)
672 		F_SET(session->dhandle, WT_DHANDLE_DROPPED);
673 
674 	if (!WT_META_TRACKING(session))
675 		WT_TRET(__wt_session_release_dhandle(session));
676 
677 	return (ret);
678 }
679 
680 /*
681  * __wt_conn_dhandle_close_all --
682  *	Close all data handles handles with matching name (including all
683  *	checkpoint handles).
684  */
685 int
__wt_conn_dhandle_close_all(WT_SESSION_IMPL * session,const char * uri,bool removed,bool mark_dead)686 __wt_conn_dhandle_close_all(
687     WT_SESSION_IMPL *session, const char *uri, bool removed, bool mark_dead)
688 {
689 	WT_CONNECTION_IMPL *conn;
690 	WT_DATA_HANDLE *dhandle;
691 	WT_DECL_RET;
692 	uint64_t bucket;
693 
694 	conn = S2C(session);
695 
696 	WT_ASSERT(session,
697 	    F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
698 	WT_ASSERT(session, session->dhandle == NULL);
699 
700 	/*
701 	 * Lock the live handle first.  This ordering is important: we rely on
702 	 * locking the live handle to fail fast if the tree is busy (e.g., with
703 	 * cursors open or in a checkpoint).
704 	 */
705 	WT_ERR(__conn_dhandle_close_one(
706 	    session, uri, NULL, removed, mark_dead));
707 
708 	bucket = __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE;
709 	TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq) {
710 		if (strcmp(dhandle->name, uri) != 0 ||
711 		    dhandle->checkpoint == NULL ||
712 		    F_ISSET(dhandle, WT_DHANDLE_DEAD))
713 			continue;
714 
715 		WT_ERR(__conn_dhandle_close_one(
716 		    session, dhandle->name, dhandle->checkpoint, removed,
717 		    mark_dead));
718 	}
719 
720 err:	session->dhandle = NULL;
721 	return (ret);
722 }
723 
724 /*
725  * __conn_dhandle_remove --
726  *	Remove a handle from the shared list.
727  */
728 static int
__conn_dhandle_remove(WT_SESSION_IMPL * session,bool final)729 __conn_dhandle_remove(WT_SESSION_IMPL *session, bool final)
730 {
731 	WT_CONNECTION_IMPL *conn;
732 	WT_DATA_HANDLE *dhandle;
733 	uint64_t bucket;
734 
735 	conn = S2C(session);
736 	dhandle = session->dhandle;
737 	bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE;
738 
739 	WT_ASSERT(session,
740 	    F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
741 	WT_ASSERT(session, dhandle != conn->cache->walk_tree);
742 
743 	/* Check if the handle was reacquired by a session while we waited. */
744 	if (!final &&
745 	    (dhandle->session_inuse != 0 || dhandle->session_ref != 0))
746 		return (__wt_set_return(session, EBUSY));
747 
748 	WT_CONN_DHANDLE_REMOVE(conn, dhandle, bucket);
749 	return (0);
750 
751 }
752 
753 /*
754  * __wt_conn_dhandle_discard_single --
755  *	Close/discard a single data handle.
756  */
757 int
__wt_conn_dhandle_discard_single(WT_SESSION_IMPL * session,bool final,bool mark_dead)758 __wt_conn_dhandle_discard_single(
759     WT_SESSION_IMPL *session, bool final, bool mark_dead)
760 {
761 	WT_DATA_HANDLE *dhandle;
762 	WT_DECL_RET;
763 	int tret;
764 	bool set_pass_intr;
765 
766 	dhandle = session->dhandle;
767 
768 	if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
769 		tret = __wt_conn_dhandle_close(session, final, mark_dead);
770 		if (final && tret != 0) {
771 			__wt_err(session, tret,
772 			    "Final close of %s failed", dhandle->name);
773 			WT_TRET(tret);
774 		} else if (!final)
775 			WT_RET(tret);
776 	}
777 
778 	/*
779 	 * Kludge: interrupt the eviction server in case it is holding the
780 	 * handle list lock.
781 	 */
782 	set_pass_intr = false;
783 	if (!F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) {
784 		set_pass_intr = true;
785 		(void)__wt_atomic_addv32(&S2C(session)->cache->pass_intr, 1);
786 	}
787 
788 	/* Try to remove the handle, protected by the data handle lock. */
789 	WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
790 	    tret = __conn_dhandle_remove(session, final));
791 	if (set_pass_intr)
792 		(void)__wt_atomic_subv32(&S2C(session)->cache->pass_intr, 1);
793 	WT_TRET(tret);
794 
795 	/*
796 	 * After successfully removing the handle, clean it up.
797 	 */
798 	if (ret == 0 || final) {
799 		WT_TRET(__conn_dhandle_destroy(session, dhandle));
800 		session->dhandle = NULL;
801 	}
802 
803 	return (ret);
804 }
805 
806 /*
807  * __wt_conn_dhandle_discard --
808  *	Close/discard all data handles.
809  */
810 int
__wt_conn_dhandle_discard(WT_SESSION_IMPL * session)811 __wt_conn_dhandle_discard(WT_SESSION_IMPL *session)
812 {
813 	WT_CONNECTION_IMPL *conn;
814 	WT_DATA_HANDLE *dhandle, *dhandle_tmp;
815 	WT_DECL_RET;
816 
817 	conn = S2C(session);
818 
819 	/*
820 	 * Empty the session cache: any data handles created in a connection
821 	 * method may be cached here, and we're about to close them.
822 	 */
823 	__wt_session_close_cache(session);
824 
825 	/*
826 	 * Close open data handles: first, everything apart from metadata and
827 	 * lookaside (as closing a normal file may write metadata and read
828 	 * lookaside entries).  Then close whatever is left open.
829 	 */
830 restart:
831 	TAILQ_FOREACH(dhandle, &conn->dhqh, q) {
832 		if (WT_IS_METADATA(dhandle) ||
833 		    strcmp(dhandle->name, WT_LAS_URI) == 0 ||
834 		    WT_PREFIX_MATCH(dhandle->name, WT_SYSTEM_PREFIX))
835 			continue;
836 
837 		WT_WITH_DHANDLE(session, dhandle,
838 		    WT_TRET(__wt_conn_dhandle_discard_single(
839 		    session, true, F_ISSET(conn, WT_CONN_PANIC))));
840 		goto restart;
841 	}
842 
843 	/* Shut down the lookaside table after all eviction is complete. */
844 	WT_TRET(__wt_las_destroy(session));
845 
846 	/*
847 	 * Closing the files may have resulted in entries on our default
848 	 * session's list of open data handles, specifically, we added the
849 	 * metadata file if any of the files were dirty.  Clean up that list
850 	 * before we shut down the metadata entry, for good.
851 	 */
852 	__wt_session_close_cache(session);
853 	F_SET(session, WT_SESSION_NO_DATA_HANDLES);
854 
855 	/*
856 	 * The connection may have an open metadata cursor handle. We cannot
857 	 * close it before now because it's potentially used when discarding
858 	 * other open data handles. Close it before discarding the underlying
859 	 * metadata handle.
860 	 */
861 	if (session->meta_cursor != NULL)
862 		WT_TRET(session->meta_cursor->close(session->meta_cursor));
863 
864 	/* Close the remaining handles. */
865 	WT_TAILQ_SAFE_REMOVE_BEGIN(dhandle, &conn->dhqh, q, dhandle_tmp) {
866 		WT_WITH_DHANDLE(session, dhandle,
867 		    WT_TRET(__wt_conn_dhandle_discard_single(
868 		    session, true, F_ISSET(conn, WT_CONN_PANIC))));
869 	} WT_TAILQ_SAFE_REMOVE_END
870 
871 	return (ret);
872 }
873 
874 /*
875  * __wt_verbose_dump_handles --
876  *	Dump information about all data handles.
877  */
878 int
__wt_verbose_dump_handles(WT_SESSION_IMPL * session)879 __wt_verbose_dump_handles(WT_SESSION_IMPL *session)
880 {
881 	WT_CONNECTION_IMPL *conn;
882 	WT_DATA_HANDLE *dhandle;
883 
884 	conn = S2C(session);
885 
886 	WT_RET(__wt_msg(session, "%s", WT_DIVIDER));
887 	WT_RET(__wt_msg(session, "Data handle dump:"));
888 	for (dhandle = NULL;;) {
889 		WT_WITH_HANDLE_LIST_READ_LOCK(session,
890 		    WT_DHANDLE_NEXT(session, dhandle, &conn->dhqh, q));
891 		if (dhandle == NULL)
892 			break;
893 		WT_RET(__wt_msg(session, "Name: %s", dhandle->name));
894 		if (dhandle->checkpoint != NULL)
895 			WT_RET(__wt_msg(session,
896 			    "Checkpoint: %s", dhandle->checkpoint));
897 		WT_RET(__wt_msg(session, "  Sessions referencing handle: %"
898 		    PRIu32, dhandle->session_ref));
899 		WT_RET(__wt_msg(session, "  Sessions using handle: %"
900 		    PRId32, dhandle->session_inuse));
901 		WT_RET(__wt_msg(session, "  Exclusive references to handle: %"
902 		    PRIu32, dhandle->excl_ref));
903 		if (dhandle->excl_ref != 0)
904 			WT_RET(__wt_msg(session,
905 			    "  Session with exclusive use: %p",
906 			    (void *)dhandle->excl_session));
907 		WT_RET(__wt_msg(session,
908 		    "  Flags: 0x%08" PRIx32, dhandle->flags));
909 	}
910 	return (0);
911 }
912