1 /*-
2 * Copyright (c) 2014-2018 MongoDB, Inc.
3 * Copyright (c) 2008-2014 WiredTiger, Inc.
4 * All rights reserved.
5 *
6 * See the file LICENSE for redistribution information.
7 */
8
9 #include "wt_internal.h"
10
11 /*
12 * __conn_dhandle_config_clear --
13 * Clear the underlying object's configuration information.
14 */
15 static void
__conn_dhandle_config_clear(WT_SESSION_IMPL * session)16 __conn_dhandle_config_clear(WT_SESSION_IMPL *session)
17 {
18 WT_DATA_HANDLE *dhandle;
19 const char **a;
20
21 dhandle = session->dhandle;
22
23 if (dhandle->cfg == NULL)
24 return;
25 for (a = dhandle->cfg; *a != NULL; ++a)
26 __wt_free(session, *a);
27 __wt_free(session, dhandle->cfg);
28 __wt_free(session, dhandle->meta_base);
29 }
30
31 /*
32 * __conn_dhandle_config_set --
33 * Set up a btree handle's configuration information.
34 */
35 static int
__conn_dhandle_config_set(WT_SESSION_IMPL * session)36 __conn_dhandle_config_set(WT_SESSION_IMPL *session)
37 {
38 WT_DATA_HANDLE *dhandle;
39 WT_DECL_RET;
40 const char *base, *cfg[3];
41 char *metaconf, *tmp;
42
43 dhandle = session->dhandle;
44 base = NULL;
45 tmp = NULL;
46
47 /*
48 * Read the object's entry from the metadata file, we're done if we
49 * don't find one.
50 */
51 if ((ret =
52 __wt_metadata_search(session, dhandle->name, &metaconf)) != 0) {
53 if (ret == WT_NOTFOUND)
54 ret = __wt_set_return(session, ENOENT);
55 WT_RET(ret);
56 }
57
58 /*
59 * The defaults are included because persistent configuration
60 * information is stored in the metadata file and it may be from an
61 * earlier version of WiredTiger. If defaults are included in the
62 * configuration, we can add new configuration strings without
63 * upgrading the metadata file or writing special code in case a
64 * configuration string isn't initialized, as long as the new
65 * configuration string has an appropriate default value.
66 *
67 * The error handling is a little odd, but be careful: we're holding a
68 * chunk of allocated memory in metaconf. If we fail before we copy a
69 * reference to it into the object's configuration array, we must free
70 * it, after the copy, we don't want to free it.
71 */
72 WT_ERR(__wt_calloc_def(session, 3, &dhandle->cfg));
73 switch (dhandle->type) {
74 case WT_DHANDLE_TYPE_BTREE:
75 /*
76 * We are stripping out the checkpoint and checkpoint_lsn
77 * information from the config string. We save the rest of
78 * the metadata string, that is essentially static and
79 * unchanging and then concatenate the new checkpoint and
80 * LSN information on each checkpoint. The reason is
81 * performance and avoiding a lot of calls to the config
82 * parsing functions during a checkpoint for information
83 * that changes in a very well known way.
84 */
85 cfg[0] = metaconf;
86 cfg[1] = "checkpoint=()";
87 cfg[2] = NULL;
88 WT_ERR(__wt_strdup(session,
89 WT_CONFIG_BASE(session, file_meta), &dhandle->cfg[0]));
90 WT_ASSERT(session, dhandle->meta_base == NULL);
91 /*
92 * First collapse and overwrite any checkpoint information
93 * because we do not know the name or how many checkpoints
94 * may be in this metadata. So first we have to set the string
95 * to the empty checkpoint string and call collapse to
96 * overwrite anything existing.
97 */
98 WT_ERR(__wt_config_collapse(session, cfg, &tmp));
99 /*
100 * Now strip out the checkpoint and checkpoint LSN items
101 * from the configuration string and that is now our
102 * base metadata string.
103 */
104 cfg[0] = tmp;
105 cfg[1] = NULL;
106 WT_ERR(__wt_config_merge(session,
107 cfg, "checkpoint=,checkpoint_lsn=", &base));
108 __wt_free(session, tmp);
109 break;
110 case WT_DHANDLE_TYPE_TABLE:
111 WT_ERR(__wt_strdup(session,
112 WT_CONFIG_BASE(session, table_meta), &dhandle->cfg[0]));
113 break;
114 }
115 dhandle->cfg[1] = metaconf;
116 dhandle->meta_base = base;
117 return (0);
118
119 err: __wt_free(session, base);
120 __wt_free(session, metaconf);
121 __wt_free(session, tmp);
122 return (ret);
123 }
124
125 /*
126 * __conn_dhandle_destroy --
127 * Destroy a data handle.
128 */
129 static int
__conn_dhandle_destroy(WT_SESSION_IMPL * session,WT_DATA_HANDLE * dhandle)130 __conn_dhandle_destroy(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle)
131 {
132 WT_DECL_RET;
133
134 switch (dhandle->type) {
135 case WT_DHANDLE_TYPE_BTREE:
136 WT_WITH_DHANDLE(session, dhandle,
137 ret = __wt_btree_discard(session));
138 break;
139 case WT_DHANDLE_TYPE_TABLE:
140 ret = __wt_schema_close_table(session, (WT_TABLE *)dhandle);
141 break;
142 }
143
144 __wt_rwlock_destroy(session, &dhandle->rwlock);
145 __wt_free(session, dhandle->name);
146 __wt_free(session, dhandle->checkpoint);
147 __conn_dhandle_config_clear(session);
148 __wt_spin_destroy(session, &dhandle->close_lock);
149 __wt_stat_dsrc_discard(session, dhandle);
150 __wt_overwrite_and_free(session, dhandle);
151 return (ret);
152 }
153
154 /*
155 * __wt_conn_dhandle_alloc --
156 * Allocate a new data handle and return it linked into the connection's
157 * list.
158 */
159 int
__wt_conn_dhandle_alloc(WT_SESSION_IMPL * session,const char * uri,const char * checkpoint)160 __wt_conn_dhandle_alloc(
161 WT_SESSION_IMPL *session, const char *uri, const char *checkpoint)
162 {
163 WT_BTREE *btree;
164 WT_DATA_HANDLE *dhandle;
165 WT_DECL_RET;
166 WT_TABLE *table;
167 uint64_t bucket;
168
169 /*
170 * Ensure no one beat us to creating the handle now that we hold the
171 * write lock.
172 */
173 if ((ret =
174 __wt_conn_dhandle_find(session, uri, checkpoint)) != WT_NOTFOUND)
175 return (ret);
176
177 if (WT_PREFIX_MATCH(uri, "file:")) {
178 WT_RET(__wt_calloc_one(session, &dhandle));
179 dhandle->type = WT_DHANDLE_TYPE_BTREE;
180 } else if (WT_PREFIX_MATCH(uri, "table:")) {
181 WT_RET(__wt_calloc_one(session, &table));
182 dhandle = (WT_DATA_HANDLE *)table;
183 dhandle->type = WT_DHANDLE_TYPE_TABLE;
184 } else
185 WT_PANIC_RET(session, EINVAL,
186 "illegal handle allocation URI %s", uri);
187
188 /* Btree handles keep their data separate from the interface. */
189 if (dhandle->type == WT_DHANDLE_TYPE_BTREE) {
190 WT_ERR(__wt_calloc_one(session, &btree));
191 dhandle->handle = btree;
192 btree->dhandle = dhandle;
193 }
194
195 if (strcmp(uri, WT_METAFILE_URI) == 0)
196 F_SET(dhandle, WT_DHANDLE_IS_METADATA);
197
198 WT_ERR(__wt_rwlock_init(session, &dhandle->rwlock));
199 dhandle->name_hash = __wt_hash_city64(uri, strlen(uri));
200 WT_ERR(__wt_strdup(session, uri, &dhandle->name));
201 WT_ERR(__wt_strdup(session, checkpoint, &dhandle->checkpoint));
202
203 WT_ERR(__wt_spin_init(
204 session, &dhandle->close_lock, "data handle close"));
205
206 /*
207 * We are holding the data handle list lock, which protects most
208 * threads from seeing the new handle until that lock is released.
209 *
210 * However, the sweep server scans the list of handles without holding
211 * that lock, so we need a write barrier here to ensure the sweep
212 * server doesn't see a partially filled in structure.
213 */
214 WT_WRITE_BARRIER();
215
216 /*
217 * Prepend the handle to the connection list, assuming we're likely to
218 * need new files again soon, until they are cached by all sessions.
219 */
220 bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE;
221 WT_CONN_DHANDLE_INSERT(S2C(session), dhandle, bucket);
222
223 session->dhandle = dhandle;
224 return (0);
225
226 err: WT_TRET(__conn_dhandle_destroy(session, dhandle));
227 return (ret);
228 }
229
230 /*
231 * __wt_conn_dhandle_find --
232 * Find a previously opened data handle.
233 */
234 int
__wt_conn_dhandle_find(WT_SESSION_IMPL * session,const char * uri,const char * checkpoint)235 __wt_conn_dhandle_find(
236 WT_SESSION_IMPL *session, const char *uri, const char *checkpoint)
237 {
238 WT_CONNECTION_IMPL *conn;
239 WT_DATA_HANDLE *dhandle;
240 uint64_t bucket;
241
242 conn = S2C(session);
243
244 /* We must be holding the handle list lock at a higher level. */
245 WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));
246
247 bucket = __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE;
248 if (checkpoint == NULL) {
249 TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq) {
250 if (F_ISSET(dhandle, WT_DHANDLE_DEAD))
251 continue;
252 if (dhandle->checkpoint == NULL &&
253 strcmp(uri, dhandle->name) == 0) {
254 session->dhandle = dhandle;
255 return (0);
256 }
257 }
258 } else
259 TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq) {
260 if (F_ISSET(dhandle, WT_DHANDLE_DEAD))
261 continue;
262 if (dhandle->checkpoint != NULL &&
263 strcmp(uri, dhandle->name) == 0 &&
264 strcmp(checkpoint, dhandle->checkpoint) == 0) {
265 session->dhandle = dhandle;
266 return (0);
267 }
268 }
269
270 return (WT_NOTFOUND);
271 }
272
273 /*
274 * __wt_conn_dhandle_close --
275 * Sync and close the underlying btree handle.
276 */
277 int
__wt_conn_dhandle_close(WT_SESSION_IMPL * session,bool final,bool mark_dead)278 __wt_conn_dhandle_close(
279 WT_SESSION_IMPL *session, bool final, bool mark_dead)
280 {
281 WT_BM *bm;
282 WT_BTREE *btree;
283 WT_CONNECTION_IMPL *conn;
284 WT_DATA_HANDLE *dhandle;
285 WT_DECL_RET;
286 bool discard, is_btree, marked_dead, no_schema_lock;
287
288 conn = S2C(session);
289 dhandle = session->dhandle;
290
291 if (!F_ISSET(dhandle, WT_DHANDLE_OPEN))
292 return (0);
293
294 /*
295 * The only data handle type that uses the "handle" field is btree.
296 * For other data handle types, it should be NULL.
297 */
298 is_btree = dhandle->type == WT_DHANDLE_TYPE_BTREE;
299 btree = is_btree ? dhandle->handle : NULL;
300
301 if (is_btree) {
302 /* Turn off eviction. */
303 WT_RET(__wt_evict_file_exclusive_on(session));
304
305 /* Reset the tree's eviction priority (if any). */
306 __wt_evict_priority_clear(session);
307 }
308
309 /*
310 * If we don't already have the schema lock, make it an error to try to
311 * acquire it. The problem is that we are holding an exclusive lock on
312 * the handle, and if we attempt to acquire the schema lock we might
313 * deadlock with a thread that has the schema lock and wants a handle
314 * lock.
315 */
316 no_schema_lock = false;
317 if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)) {
318 no_schema_lock = true;
319 F_SET(session, WT_SESSION_NO_SCHEMA_LOCK);
320 }
321
322 /*
323 * We may not be holding the schema lock, and threads may be walking
324 * the list of open handles (for example, checkpoint). Acquire the
325 * handle's close lock. We don't have the sweep server acquire the
326 * handle's rwlock so we have to prevent races through the close code.
327 */
328 __wt_spin_lock(session, &dhandle->close_lock);
329
330 discard = marked_dead = false;
331 if (is_btree && !F_ISSET(btree,
332 WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) {
333 /*
334 * If the handle is already marked dead, we're just here to
335 * discard it.
336 */
337 if (F_ISSET(dhandle, WT_DHANDLE_DEAD))
338 discard = true;
339
340 /*
341 * Mark the handle dead (letting the tree be discarded later) if
342 * it's not already marked dead, and it's not a memory-mapped
343 * tree. (We can't mark memory-mapped tree handles dead because
344 * we close the underlying file handle to allow the file to be
345 * removed and memory-mapped trees contain pointers into memory
346 * that become invalid if the mapping is closed.)
347 */
348 bm = btree->bm;
349 if (!discard && mark_dead &&
350 (bm == NULL || !bm->is_mapped(bm, session)))
351 marked_dead = true;
352
353 /*
354 * Flush dirty data from any durable trees we couldn't mark
355 * dead. That involves writing a checkpoint, which can fail if
356 * an update cannot be written, causing the close to fail: if
357 * not the final close, return the EBUSY error to our caller
358 * for eventual retry.
359 *
360 * We can't discard non-durable trees yet: first we have to
361 * close the underlying btree handle, then we can mark the
362 * data handle dead.
363 *
364 * If we are closing with timestamps enforced, then we have
365 * already checkpointed as of the timestamp as needed and any
366 * remaining dirty data should be discarded.
367 */
368 if (!discard && !marked_dead) {
369 if (F_ISSET(conn, WT_CONN_CLOSING_TIMESTAMP) ||
370 F_ISSET(conn, WT_CONN_IN_MEMORY) ||
371 F_ISSET(btree, WT_BTREE_NO_CHECKPOINT))
372 discard = true;
373 else {
374 WT_TRET(__wt_checkpoint_close(session, final));
375 if (!final && ret == EBUSY)
376 WT_ERR(ret);
377 }
378 }
379 }
380
381 /* Close the underlying handle. */
382 switch (dhandle->type) {
383 case WT_DHANDLE_TYPE_BTREE:
384 WT_TRET(__wt_btree_close(session));
385 F_CLR(btree, WT_BTREE_SPECIAL_FLAGS);
386 break;
387 case WT_DHANDLE_TYPE_TABLE:
388 WT_TRET(__wt_schema_close_table(session, (WT_TABLE *)dhandle));
389 break;
390 }
391
392 /*
393 * If marking the handle dead, do so after closing the underlying btree.
394 * (Don't do it before that, the block manager asserts there are never
395 * two references to a block manager object, and re-opening the handle
396 * can succeed once we mark this handle dead.)
397 *
398 * Check discard too, code we call to clear the cache expects the data
399 * handle dead flag to be set when discarding modified pages.
400 */
401 if (marked_dead || discard)
402 F_SET(dhandle, WT_DHANDLE_DEAD);
403
404 /*
405 * Discard from cache any trees not marked dead in this call (that is,
406 * including trees previously marked dead). Done after marking the data
407 * handle dead for a couple reasons: first, we don't need to hold an
408 * exclusive handle to do it, second, code we call to clear the cache
409 * expects the data handle dead flag to be set when discarding modified
410 * pages.
411 */
412 if (discard)
413 WT_TRET(__wt_cache_op(session, WT_SYNC_DISCARD));
414
415 /*
416 * If we marked a handle dead it will be closed by sweep, via another
417 * call to this function. Otherwise, we're done with this handle.
418 */
419 if (!marked_dead) {
420 F_CLR(dhandle, WT_DHANDLE_OPEN);
421 if (dhandle->checkpoint == NULL)
422 --conn->open_btree_count;
423 }
424 WT_ASSERT(session,
425 F_ISSET(dhandle, WT_DHANDLE_DEAD) ||
426 !F_ISSET(dhandle, WT_DHANDLE_OPEN));
427
428 err: __wt_spin_unlock(session, &dhandle->close_lock);
429
430 if (no_schema_lock)
431 F_CLR(session, WT_SESSION_NO_SCHEMA_LOCK);
432
433 if (is_btree)
434 __wt_evict_file_exclusive_off(session);
435
436 return (ret);
437 }
438
439 /*
440 * __wt_conn_dhandle_open --
441 * Open the current data handle.
442 */
443 int
__wt_conn_dhandle_open(WT_SESSION_IMPL * session,const char * cfg[],uint32_t flags)444 __wt_conn_dhandle_open(
445 WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags)
446 {
447 WT_BTREE *btree;
448 WT_DATA_HANDLE *dhandle;
449 WT_DECL_RET;
450
451 dhandle = session->dhandle;
452 btree = dhandle->handle;
453
454 WT_ASSERT(session,
455 F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) &&
456 !LF_ISSET(WT_DHANDLE_LOCK_ONLY));
457
458 WT_ASSERT(session,
459 !F_ISSET(S2C(session), WT_CONN_CLOSING_NO_MORE_OPENS));
460
461 /* Turn off eviction. */
462 if (dhandle->type == WT_DHANDLE_TYPE_BTREE)
463 WT_RET(__wt_evict_file_exclusive_on(session));
464
465 /*
466 * If the handle is already open, it has to be closed so it can be
467 * reopened with a new configuration.
468 *
469 * This call can return EBUSY if there's an update in the tree that's
470 * not yet globally visible. That's not a problem because it can only
471 * happen when we're switching from a normal handle to a "special" one,
472 * so we're returning EBUSY to an attempt to verify or do other special
473 * operations. The reverse won't happen because when the handle from a
474 * verify or other special operation is closed, there won't be updates
475 * in the tree that can block the close.
476 */
477 if (F_ISSET(dhandle, WT_DHANDLE_OPEN))
478 WT_ERR(__wt_conn_dhandle_close(session, false, false));
479
480 /* Discard any previous configuration, set up the new configuration. */
481 __conn_dhandle_config_clear(session);
482 WT_ERR(__conn_dhandle_config_set(session));
483
484 switch (dhandle->type) {
485 case WT_DHANDLE_TYPE_BTREE:
486 /* Set any special flags on the btree handle. */
487 F_SET(btree, LF_MASK(WT_BTREE_SPECIAL_FLAGS));
488
489 /*
490 * Allocate data-source statistics memory. We don't allocate
491 * that memory when allocating the data handle because not all
492 * data handles need statistics (for example, handles used for
493 * checkpoint locking). If we are reopening the handle, then
494 * it may already have statistics memory, check to avoid the
495 * leak.
496 */
497 if (dhandle->stat_array == NULL)
498 WT_ERR(__wt_stat_dsrc_init(session, dhandle));
499
500 WT_ERR(__wt_btree_open(session, cfg));
501 break;
502 case WT_DHANDLE_TYPE_TABLE:
503 WT_ERR(__wt_schema_open_table(session, cfg));
504 break;
505 }
506
507 /*
508 * Bulk handles require true exclusive access, otherwise, handles
509 * marked as exclusive are allowed to be relocked by the same
510 * session.
511 */
512 if (F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) &&
513 !LF_ISSET(WT_BTREE_BULK)) {
514 dhandle->excl_session = session;
515 dhandle->excl_ref = 1;
516 }
517 F_SET(dhandle, WT_DHANDLE_OPEN);
518
519 /*
520 * Checkpoint handles are read-only, so eviction calculations based on
521 * the number of btrees are better to ignore them.
522 */
523 if (dhandle->checkpoint == NULL)
524 ++S2C(session)->open_btree_count;
525
526 if (0) {
527 err: if (btree != NULL)
528 F_CLR(btree, WT_BTREE_SPECIAL_FLAGS);
529 }
530
531 if (dhandle->type == WT_DHANDLE_TYPE_BTREE)
532 __wt_evict_file_exclusive_off(session);
533
534 return (ret);
535 }
536
537 /*
538 * __conn_btree_apply_internal --
539 * Apply a function to an open data handle.
540 */
541 static int
__conn_btree_apply_internal(WT_SESSION_IMPL * session,WT_DATA_HANDLE * dhandle,int (* file_func)(WT_SESSION_IMPL *,const char * []),int (* name_func)(WT_SESSION_IMPL *,const char *,bool *),const char * cfg[])542 __conn_btree_apply_internal(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle,
543 int (*file_func)(WT_SESSION_IMPL *, const char *[]),
544 int (*name_func)(WT_SESSION_IMPL *, const char *, bool *),
545 const char *cfg[])
546 {
547 WT_DECL_RET;
548 bool skip;
549
550 /* Always apply the name function, if supplied. */
551 skip = false;
552 if (name_func != NULL)
553 WT_RET(name_func(session, dhandle->name, &skip));
554
555 /* If there is no file function, don't bother locking the handle */
556 if (file_func == NULL || skip)
557 return (0);
558
559 /*
560 * We need to pull the handle into the session handle cache and make
561 * sure it's referenced to stop other internal code dropping the handle
562 * (e.g in LSM when cleaning up obsolete chunks).
563 */
564 if ((ret = __wt_session_get_dhandle(session,
565 dhandle->name, dhandle->checkpoint, NULL, 0)) != 0)
566 return (ret == EBUSY ? 0 : ret);
567
568 WT_SAVE_DHANDLE(session, ret = file_func(session, cfg));
569 WT_TRET(__wt_session_release_dhandle(session));
570 return (ret);
571 }
572
573 /*
574 * __wt_conn_btree_apply --
575 * Apply a function to all open btree handles with the given URI.
576 */
577 int
__wt_conn_btree_apply(WT_SESSION_IMPL * session,const char * uri,int (* file_func)(WT_SESSION_IMPL *,const char * []),int (* name_func)(WT_SESSION_IMPL *,const char *,bool *),const char * cfg[])578 __wt_conn_btree_apply(WT_SESSION_IMPL *session, const char *uri,
579 int (*file_func)(WT_SESSION_IMPL *, const char *[]),
580 int (*name_func)(WT_SESSION_IMPL *, const char *, bool *),
581 const char *cfg[])
582 {
583 WT_CONNECTION_IMPL *conn;
584 WT_DATA_HANDLE *dhandle;
585 WT_DECL_RET;
586 uint64_t bucket;
587
588 conn = S2C(session);
589
590 /*
591 * If we're given a URI, then we walk only the hash list for that
592 * name. If we don't have a URI we walk the entire dhandle list.
593 */
594 if (uri != NULL) {
595 bucket =
596 __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE;
597
598 for (dhandle = NULL;;) {
599 WT_WITH_HANDLE_LIST_READ_LOCK(session,
600 WT_DHANDLE_NEXT(session, dhandle,
601 &conn->dhhash[bucket], hashq));
602 if (dhandle == NULL)
603 return (0);
604
605 if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
606 F_ISSET(dhandle, WT_DHANDLE_DEAD) ||
607 dhandle->checkpoint != NULL ||
608 strcmp(uri, dhandle->name) != 0)
609 continue;
610 WT_ERR(__conn_btree_apply_internal(session,
611 dhandle, file_func, name_func, cfg));
612 }
613 } else {
614 for (dhandle = NULL;;) {
615 WT_WITH_HANDLE_LIST_READ_LOCK(session,
616 WT_DHANDLE_NEXT(session, dhandle, &conn->dhqh, q));
617 if (dhandle == NULL)
618 return (0);
619
620 if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
621 F_ISSET(dhandle, WT_DHANDLE_DEAD) ||
622 dhandle->type != WT_DHANDLE_TYPE_BTREE ||
623 dhandle->checkpoint != NULL ||
624 WT_IS_METADATA(dhandle))
625 continue;
626 WT_ERR(__conn_btree_apply_internal(session,
627 dhandle, file_func, name_func, cfg));
628 }
629 }
630
631 err: WT_DHANDLE_RELEASE(dhandle);
632 return (ret);
633 }
634
635 /*
636 * __conn_dhandle_close_one --
637 * Lock and, if necessary, close a data handle.
638 */
639 static int
__conn_dhandle_close_one(WT_SESSION_IMPL * session,const char * uri,const char * checkpoint,bool removed,bool mark_dead)640 __conn_dhandle_close_one(WT_SESSION_IMPL *session,
641 const char *uri, const char *checkpoint, bool removed, bool mark_dead)
642 {
643 WT_DECL_RET;
644
645 /*
646 * Lock the handle exclusively. If this is part of schema-changing
647 * operation (indicated by metadata tracking being enabled), hold the
648 * lock for the duration of the operation.
649 */
650 WT_RET(__wt_session_get_dhandle(session, uri, checkpoint,
651 NULL, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY));
652 if (WT_META_TRACKING(session))
653 WT_RET(__wt_meta_track_handle_lock(session, false));
654
655 /*
656 * We have an exclusive lock, which means there are no cursors open at
657 * this point. Close the handle, if necessary.
658 */
659 if (F_ISSET(session->dhandle, WT_DHANDLE_OPEN)) {
660 __wt_meta_track_sub_on(session);
661 ret = __wt_conn_dhandle_close(session, false, mark_dead);
662
663 /*
664 * If the close succeeded, drop any locks it acquired. If
665 * there was a failure, this function will fail and the whole
666 * transaction will be rolled back.
667 */
668 if (ret == 0)
669 ret = __wt_meta_track_sub_off(session);
670 }
671 if (removed)
672 F_SET(session->dhandle, WT_DHANDLE_DROPPED);
673
674 if (!WT_META_TRACKING(session))
675 WT_TRET(__wt_session_release_dhandle(session));
676
677 return (ret);
678 }
679
680 /*
681 * __wt_conn_dhandle_close_all --
682 * Close all data handles handles with matching name (including all
683 * checkpoint handles).
684 */
685 int
__wt_conn_dhandle_close_all(WT_SESSION_IMPL * session,const char * uri,bool removed,bool mark_dead)686 __wt_conn_dhandle_close_all(
687 WT_SESSION_IMPL *session, const char *uri, bool removed, bool mark_dead)
688 {
689 WT_CONNECTION_IMPL *conn;
690 WT_DATA_HANDLE *dhandle;
691 WT_DECL_RET;
692 uint64_t bucket;
693
694 conn = S2C(session);
695
696 WT_ASSERT(session,
697 F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
698 WT_ASSERT(session, session->dhandle == NULL);
699
700 /*
701 * Lock the live handle first. This ordering is important: we rely on
702 * locking the live handle to fail fast if the tree is busy (e.g., with
703 * cursors open or in a checkpoint).
704 */
705 WT_ERR(__conn_dhandle_close_one(
706 session, uri, NULL, removed, mark_dead));
707
708 bucket = __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE;
709 TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq) {
710 if (strcmp(dhandle->name, uri) != 0 ||
711 dhandle->checkpoint == NULL ||
712 F_ISSET(dhandle, WT_DHANDLE_DEAD))
713 continue;
714
715 WT_ERR(__conn_dhandle_close_one(
716 session, dhandle->name, dhandle->checkpoint, removed,
717 mark_dead));
718 }
719
720 err: session->dhandle = NULL;
721 return (ret);
722 }
723
724 /*
725 * __conn_dhandle_remove --
726 * Remove a handle from the shared list.
727 */
728 static int
__conn_dhandle_remove(WT_SESSION_IMPL * session,bool final)729 __conn_dhandle_remove(WT_SESSION_IMPL *session, bool final)
730 {
731 WT_CONNECTION_IMPL *conn;
732 WT_DATA_HANDLE *dhandle;
733 uint64_t bucket;
734
735 conn = S2C(session);
736 dhandle = session->dhandle;
737 bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE;
738
739 WT_ASSERT(session,
740 F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
741 WT_ASSERT(session, dhandle != conn->cache->walk_tree);
742
743 /* Check if the handle was reacquired by a session while we waited. */
744 if (!final &&
745 (dhandle->session_inuse != 0 || dhandle->session_ref != 0))
746 return (__wt_set_return(session, EBUSY));
747
748 WT_CONN_DHANDLE_REMOVE(conn, dhandle, bucket);
749 return (0);
750
751 }
752
753 /*
754 * __wt_conn_dhandle_discard_single --
755 * Close/discard a single data handle.
756 */
757 int
__wt_conn_dhandle_discard_single(WT_SESSION_IMPL * session,bool final,bool mark_dead)758 __wt_conn_dhandle_discard_single(
759 WT_SESSION_IMPL *session, bool final, bool mark_dead)
760 {
761 WT_DATA_HANDLE *dhandle;
762 WT_DECL_RET;
763 int tret;
764 bool set_pass_intr;
765
766 dhandle = session->dhandle;
767
768 if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
769 tret = __wt_conn_dhandle_close(session, final, mark_dead);
770 if (final && tret != 0) {
771 __wt_err(session, tret,
772 "Final close of %s failed", dhandle->name);
773 WT_TRET(tret);
774 } else if (!final)
775 WT_RET(tret);
776 }
777
778 /*
779 * Kludge: interrupt the eviction server in case it is holding the
780 * handle list lock.
781 */
782 set_pass_intr = false;
783 if (!F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) {
784 set_pass_intr = true;
785 (void)__wt_atomic_addv32(&S2C(session)->cache->pass_intr, 1);
786 }
787
788 /* Try to remove the handle, protected by the data handle lock. */
789 WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
790 tret = __conn_dhandle_remove(session, final));
791 if (set_pass_intr)
792 (void)__wt_atomic_subv32(&S2C(session)->cache->pass_intr, 1);
793 WT_TRET(tret);
794
795 /*
796 * After successfully removing the handle, clean it up.
797 */
798 if (ret == 0 || final) {
799 WT_TRET(__conn_dhandle_destroy(session, dhandle));
800 session->dhandle = NULL;
801 }
802
803 return (ret);
804 }
805
806 /*
807 * __wt_conn_dhandle_discard --
808 * Close/discard all data handles.
809 */
810 int
__wt_conn_dhandle_discard(WT_SESSION_IMPL * session)811 __wt_conn_dhandle_discard(WT_SESSION_IMPL *session)
812 {
813 WT_CONNECTION_IMPL *conn;
814 WT_DATA_HANDLE *dhandle, *dhandle_tmp;
815 WT_DECL_RET;
816
817 conn = S2C(session);
818
819 /*
820 * Empty the session cache: any data handles created in a connection
821 * method may be cached here, and we're about to close them.
822 */
823 __wt_session_close_cache(session);
824
825 /*
826 * Close open data handles: first, everything apart from metadata and
827 * lookaside (as closing a normal file may write metadata and read
828 * lookaside entries). Then close whatever is left open.
829 */
830 restart:
831 TAILQ_FOREACH(dhandle, &conn->dhqh, q) {
832 if (WT_IS_METADATA(dhandle) ||
833 strcmp(dhandle->name, WT_LAS_URI) == 0 ||
834 WT_PREFIX_MATCH(dhandle->name, WT_SYSTEM_PREFIX))
835 continue;
836
837 WT_WITH_DHANDLE(session, dhandle,
838 WT_TRET(__wt_conn_dhandle_discard_single(
839 session, true, F_ISSET(conn, WT_CONN_PANIC))));
840 goto restart;
841 }
842
843 /* Shut down the lookaside table after all eviction is complete. */
844 WT_TRET(__wt_las_destroy(session));
845
846 /*
847 * Closing the files may have resulted in entries on our default
848 * session's list of open data handles, specifically, we added the
849 * metadata file if any of the files were dirty. Clean up that list
850 * before we shut down the metadata entry, for good.
851 */
852 __wt_session_close_cache(session);
853 F_SET(session, WT_SESSION_NO_DATA_HANDLES);
854
855 /*
856 * The connection may have an open metadata cursor handle. We cannot
857 * close it before now because it's potentially used when discarding
858 * other open data handles. Close it before discarding the underlying
859 * metadata handle.
860 */
861 if (session->meta_cursor != NULL)
862 WT_TRET(session->meta_cursor->close(session->meta_cursor));
863
864 /* Close the remaining handles. */
865 WT_TAILQ_SAFE_REMOVE_BEGIN(dhandle, &conn->dhqh, q, dhandle_tmp) {
866 WT_WITH_DHANDLE(session, dhandle,
867 WT_TRET(__wt_conn_dhandle_discard_single(
868 session, true, F_ISSET(conn, WT_CONN_PANIC))));
869 } WT_TAILQ_SAFE_REMOVE_END
870
871 return (ret);
872 }
873
874 /*
875 * __wt_verbose_dump_handles --
876 * Dump information about all data handles.
877 */
878 int
__wt_verbose_dump_handles(WT_SESSION_IMPL * session)879 __wt_verbose_dump_handles(WT_SESSION_IMPL *session)
880 {
881 WT_CONNECTION_IMPL *conn;
882 WT_DATA_HANDLE *dhandle;
883
884 conn = S2C(session);
885
886 WT_RET(__wt_msg(session, "%s", WT_DIVIDER));
887 WT_RET(__wt_msg(session, "Data handle dump:"));
888 for (dhandle = NULL;;) {
889 WT_WITH_HANDLE_LIST_READ_LOCK(session,
890 WT_DHANDLE_NEXT(session, dhandle, &conn->dhqh, q));
891 if (dhandle == NULL)
892 break;
893 WT_RET(__wt_msg(session, "Name: %s", dhandle->name));
894 if (dhandle->checkpoint != NULL)
895 WT_RET(__wt_msg(session,
896 "Checkpoint: %s", dhandle->checkpoint));
897 WT_RET(__wt_msg(session, " Sessions referencing handle: %"
898 PRIu32, dhandle->session_ref));
899 WT_RET(__wt_msg(session, " Sessions using handle: %"
900 PRId32, dhandle->session_inuse));
901 WT_RET(__wt_msg(session, " Exclusive references to handle: %"
902 PRIu32, dhandle->excl_ref));
903 if (dhandle->excl_ref != 0)
904 WT_RET(__wt_msg(session,
905 " Session with exclusive use: %p",
906 (void *)dhandle->excl_session));
907 WT_RET(__wt_msg(session,
908 " Flags: 0x%08" PRIx32, dhandle->flags));
909 }
910 return (0);
911 }
912