1 /*-
2 * Copyright (c) 2014-2018 MongoDB, Inc.
3 * Copyright (c) 2008-2014 WiredTiger, Inc.
4 * All rights reserved.
5 *
6 * See the file LICENSE for redistribution information.
7 */
8
9 #include "wt_internal.h"
10
11 /*
12 * When an operation is accessing the lookaside table, it should ignore the
13 * cache size (since the cache is already full), any pages it reads should be
14 * evicted before application data, and the operation can't reenter
15 * reconciliation.
16 */
17 #define WT_LAS_SESSION_FLAGS \
18 (WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_READ_WONT_NEED | \
19 WT_SESSION_NO_RECONCILE)
20
21 /*
22 * __las_set_isolation --
23 * Switch to read-uncommitted.
24 */
25 static void
__las_set_isolation(WT_SESSION_IMPL * session,WT_TXN_ISOLATION * saved_isolationp)26 __las_set_isolation(
27 WT_SESSION_IMPL *session, WT_TXN_ISOLATION *saved_isolationp)
28 {
29 *saved_isolationp = session->txn.isolation;
30 session->txn.isolation = WT_ISO_READ_UNCOMMITTED;
31 }
32
33 /*
34 * __las_restore_isolation --
35 * Restore isolation.
36 */
37 static void
__las_restore_isolation(WT_SESSION_IMPL * session,WT_TXN_ISOLATION saved_isolation)38 __las_restore_isolation(
39 WT_SESSION_IMPL *session, WT_TXN_ISOLATION saved_isolation)
40 {
41 session->txn.isolation = saved_isolation;
42 }
43
44 /*
45 * __las_entry_count --
46 * Return when there are entries in the lookaside table.
47 */
48 static uint64_t
__las_entry_count(WT_CACHE * cache)49 __las_entry_count(WT_CACHE *cache)
50 {
51 uint64_t insert_cnt, remove_cnt;
52
53 insert_cnt = cache->las_insert_count;
54 WT_ORDERED_READ(remove_cnt, cache->las_remove_count);
55
56 return (insert_cnt > remove_cnt ? insert_cnt - remove_cnt : 0);
57 }
58
59 /*
60 * __wt_las_config --
61 * Configure the lookaside table.
62 */
63 int
__wt_las_config(WT_SESSION_IMPL * session,const char ** cfg)64 __wt_las_config(WT_SESSION_IMPL *session, const char **cfg)
65 {
66 WT_CONFIG_ITEM cval;
67 WT_CURSOR_BTREE *las_cursor;
68 WT_SESSION_IMPL *las_session;
69
70 WT_RET(__wt_config_gets(
71 session, cfg, "cache_overflow.file_max", &cval));
72
73 if (cval.val != 0 && cval.val < WT_LAS_FILE_MIN)
74 WT_RET_MSG(session, EINVAL,
75 "max cache overflow size %" PRId64 " below minimum %d",
76 cval.val, WT_LAS_FILE_MIN);
77
78 /* This is expected for in-memory configurations. */
79 las_session = S2C(session)->cache->las_session[0];
80 WT_ASSERT(session,
81 las_session != NULL || F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
82
83 if (las_session == NULL)
84 return (0);
85
86 /*
87 * We need to set file_max on the btree associated with one of the
88 * lookaside sessions.
89 */
90 las_cursor = (WT_CURSOR_BTREE *)las_session->las_cursor;
91 las_cursor->btree->file_max = (uint64_t)cval.val;
92
93 WT_STAT_CONN_SET(
94 session, cache_lookaside_ondisk_max, las_cursor->btree->file_max);
95
96 return (0);
97 }
98
99 /*
100 * __wt_las_empty --
101 * Return when there are entries in the lookaside table.
102 */
103 bool
__wt_las_empty(WT_SESSION_IMPL * session)104 __wt_las_empty(WT_SESSION_IMPL *session)
105 {
106 return (__las_entry_count(S2C(session)->cache) == 0);
107 }
108
109 /*
110 * __wt_las_stats_update --
111 * Update the lookaside table statistics for return to the application.
112 */
113 void
__wt_las_stats_update(WT_SESSION_IMPL * session)114 __wt_las_stats_update(WT_SESSION_IMPL *session)
115 {
116 WT_CACHE *cache;
117 WT_CONNECTION_IMPL *conn;
118 WT_CONNECTION_STATS **cstats;
119 WT_DSRC_STATS **dstats;
120 int64_t v;
121
122 conn = S2C(session);
123 cache = conn->cache;
124
125 /*
126 * Lookaside table statistics are copied from the underlying lookaside
127 * table data-source statistics. If there's no lookaside table, values
128 * remain 0.
129 */
130 if (!F_ISSET(conn, WT_CONN_LOOKASIDE_OPEN))
131 return;
132
133 /* Set the connection-wide statistics. */
134 cstats = conn->stats;
135
136 WT_STAT_SET(session, cstats,
137 cache_lookaside_entries, __las_entry_count(cache));
138
139 /*
140 * We have a cursor, and we need the underlying data handle; we can get
141 * to it by way of the underlying btree handle, but it's a little ugly.
142 */
143 dstats = ((WT_CURSOR_BTREE *)
144 cache->las_session[0]->las_cursor)->btree->dhandle->stats;
145
146 v = WT_STAT_READ(dstats, cursor_update);
147 WT_STAT_SET(session, cstats, cache_lookaside_insert, v);
148 v = WT_STAT_READ(dstats, cursor_remove);
149 WT_STAT_SET(session, cstats, cache_lookaside_remove, v);
150
151 /*
152 * If we're clearing stats we need to clear the cursor values we just
153 * read. This does not clear the rest of the statistics in the
154 * lookaside data source stat cursor, but we own that namespace so we
155 * don't have to worry about users seeing inconsistent data source
156 * information.
157 */
158 if (FLD_ISSET(conn->stat_flags, WT_STAT_CLEAR)) {
159 WT_STAT_SET(session, dstats, cursor_insert, 0);
160 WT_STAT_SET(session, dstats, cursor_remove, 0);
161 }
162 }
163
164 /*
165 * __wt_las_create --
166 * Initialize the database's lookaside store.
167 */
168 int
__wt_las_create(WT_SESSION_IMPL * session,const char ** cfg)169 __wt_las_create(WT_SESSION_IMPL *session, const char **cfg)
170 {
171 WT_CACHE *cache;
172 WT_CONNECTION_IMPL *conn;
173 WT_DECL_RET;
174 int i;
175 const char *drop_cfg[] = {
176 WT_CONFIG_BASE(session, WT_SESSION_drop), "force=true", NULL };
177
178 conn = S2C(session);
179 cache = conn->cache;
180
181 /* Read-only and in-memory configurations don't need the LAS table. */
182 if (F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY))
183 return (0);
184
185 /*
186 * Done at startup: we cannot do it on demand because we require the
187 * schema lock to create and drop the table, and it may not always be
188 * available.
189 *
190 * Discard any previous incarnation of the table.
191 */
192 WT_WITH_SCHEMA_LOCK(session,
193 ret = __wt_schema_drop(session, WT_LAS_URI, drop_cfg));
194 WT_RET(ret);
195
196 /* Re-create the table. */
197 WT_RET(__wt_session_create(session, WT_LAS_URI, WT_LAS_CONFIG));
198
199 /*
200 * Open a shared internal session and cursor used for the lookaside
201 * table. This session should never perform reconciliation.
202 */
203 for (i = 0; i < WT_LAS_NUM_SESSIONS; i++) {
204 WT_RET(__wt_open_internal_session(conn, "lookaside table",
205 true, WT_LAS_SESSION_FLAGS, &cache->las_session[i]));
206 WT_RET(__wt_las_cursor_open(cache->las_session[i]));
207 }
208
209 WT_RET(__wt_las_config(session, cfg));
210
211 /* The statistics server is already running, make sure we don't race. */
212 WT_WRITE_BARRIER();
213 F_SET(conn, WT_CONN_LOOKASIDE_OPEN);
214
215 return (0);
216 }
217
218 /*
219 * __wt_las_destroy --
220 * Destroy the database's lookaside store.
221 */
222 int
__wt_las_destroy(WT_SESSION_IMPL * session)223 __wt_las_destroy(WT_SESSION_IMPL *session)
224 {
225 WT_CACHE *cache;
226 WT_CONNECTION_IMPL *conn;
227 WT_DECL_RET;
228 WT_SESSION *wt_session;
229 int i;
230
231 conn = S2C(session);
232 cache = conn->cache;
233
234 F_CLR(conn, WT_CONN_LOOKASIDE_OPEN);
235 if (cache == NULL)
236 return (0);
237
238 for (i = 0; i < WT_LAS_NUM_SESSIONS; i++) {
239 if (cache->las_session[i] == NULL)
240 continue;
241
242 wt_session = &cache->las_session[i]->iface;
243 WT_TRET(wt_session->close(wt_session, NULL));
244 cache->las_session[i] = NULL;
245 }
246
247 __wt_buf_free(session, &cache->las_sweep_key);
248 __wt_free(session, cache->las_dropped);
249 __wt_free(session, cache->las_sweep_dropmap);
250
251 return (ret);
252 }
253
254 /*
255 * __wt_las_cursor_open --
256 * Open a new lookaside table cursor.
257 */
258 int
__wt_las_cursor_open(WT_SESSION_IMPL * session)259 __wt_las_cursor_open(WT_SESSION_IMPL *session)
260 {
261 WT_BTREE *btree;
262 WT_CURSOR *cursor;
263 WT_DECL_RET;
264 const char *open_cursor_cfg[] = {
265 WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL };
266
267 WT_WITHOUT_DHANDLE(session, ret = __wt_open_cursor(
268 session, WT_LAS_URI, NULL, open_cursor_cfg, &cursor));
269 WT_RET(ret);
270
271 /*
272 * Retrieve the btree from the cursor, rather than the session because
273 * we don't always switch the LAS handle in to the session before
274 * entering this function.
275 */
276 btree = ((WT_CURSOR_BTREE *)cursor)->btree;
277
278 /* Track the lookaside file ID. */
279 if (S2C(session)->cache->las_fileid == 0)
280 S2C(session)->cache->las_fileid = btree->id;
281
282 /*
283 * Set special flags for the lookaside table: the lookaside flag (used,
284 * for example, to avoid writing records during reconciliation), also
285 * turn off checkpoints and logging.
286 *
287 * Test flags before setting them so updates can't race in subsequent
288 * opens (the first update is safe because it's single-threaded from
289 * wiredtiger_open).
290 */
291 if (!F_ISSET(btree, WT_BTREE_LOOKASIDE))
292 F_SET(btree, WT_BTREE_LOOKASIDE);
293 if (!F_ISSET(btree, WT_BTREE_NO_CHECKPOINT))
294 F_SET(btree, WT_BTREE_NO_CHECKPOINT);
295 if (!F_ISSET(btree, WT_BTREE_NO_LOGGING))
296 F_SET(btree, WT_BTREE_NO_LOGGING);
297
298 session->las_cursor = cursor;
299 F_SET(session, WT_SESSION_LOOKASIDE_CURSOR);
300
301 return (0);
302 }
303
304 /*
305 * __wt_las_cursor --
306 * Return a lookaside cursor.
307 */
308 void
__wt_las_cursor(WT_SESSION_IMPL * session,WT_CURSOR ** cursorp,uint32_t * session_flags)309 __wt_las_cursor(
310 WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags)
311 {
312 WT_CACHE *cache;
313 int i;
314
315 *cursorp = NULL;
316
317 /*
318 * We don't want to get tapped for eviction after we start using the
319 * lookaside cursor; save a copy of the current eviction state, we'll
320 * turn eviction off before we return.
321 *
322 * Don't cache lookaside table pages, we're here because of eviction
323 * problems and there's no reason to believe lookaside pages will be
324 * useful more than once.
325 */
326 *session_flags = F_MASK(session, WT_LAS_SESSION_FLAGS);
327
328 cache = S2C(session)->cache;
329
330 /*
331 * Some threads have their own lookaside table cursors, else lock the
332 * shared lookaside cursor.
333 */
334 if (F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR))
335 *cursorp = session->las_cursor;
336 else {
337 for (;;) {
338 __wt_spin_lock(session, &cache->las_lock);
339 for (i = 0; i < WT_LAS_NUM_SESSIONS; i++) {
340 if (!cache->las_session_inuse[i]) {
341 *cursorp =
342 cache->las_session[i]->las_cursor;
343 cache->las_session_inuse[i] = true;
344 break;
345 }
346 }
347 __wt_spin_unlock(session, &cache->las_lock);
348 if (*cursorp != NULL)
349 break;
350 /*
351 * If all the lookaside sessions are busy, stall.
352 *
353 * XXX better as a condition variable.
354 */
355 __wt_sleep(0, WT_THOUSAND);
356 if (F_ISSET(session, WT_SESSION_INTERNAL))
357 WT_STAT_CONN_INCRV(session,
358 cache_lookaside_cursor_wait_internal,
359 WT_THOUSAND);
360 else
361 WT_STAT_CONN_INCRV(session,
362 cache_lookaside_cursor_wait_application,
363 WT_THOUSAND);
364
365 }
366 }
367
368 /* Configure session to access the lookaside table. */
369 F_SET(session, WT_LAS_SESSION_FLAGS);
370 }
371
372 /*
373 * __wt_las_cursor_close --
374 * Discard a lookaside cursor.
375 */
376 int
__wt_las_cursor_close(WT_SESSION_IMPL * session,WT_CURSOR ** cursorp,uint32_t session_flags)377 __wt_las_cursor_close(
378 WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags)
379 {
380 WT_CACHE *cache;
381 WT_CURSOR *cursor;
382 WT_DECL_RET;
383 int i;
384
385 cache = S2C(session)->cache;
386
387 if ((cursor = *cursorp) == NULL)
388 return (0);
389 *cursorp = NULL;
390
391 /* Reset the cursor. */
392 ret = cursor->reset(cursor);
393
394 /*
395 * We turned off caching and eviction while the lookaside cursor was in
396 * use, restore the session's flags.
397 */
398 F_CLR(session, WT_LAS_SESSION_FLAGS);
399 F_SET(session, session_flags);
400
401 /*
402 * Some threads have their own lookaside table cursors, else unlock the
403 * shared lookaside cursor.
404 */
405 if (!F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR)) {
406 __wt_spin_lock(session, &cache->las_lock);
407 for (i = 0; i < WT_LAS_NUM_SESSIONS; i++)
408 if (cursor->session == &cache->las_session[i]->iface) {
409 cache->las_session_inuse[i] = false;
410 break;
411 }
412 __wt_spin_unlock(session, &cache->las_lock);
413 WT_ASSERT(session, i != WT_LAS_NUM_SESSIONS);
414 }
415
416 return (ret);
417 }
418
419 /*
420 * __wt_las_page_skip_locked --
421 * Check if we can skip reading a page with lookaside entries, where
422 * the page is already locked.
423 */
424 bool
__wt_las_page_skip_locked(WT_SESSION_IMPL * session,WT_REF * ref)425 __wt_las_page_skip_locked(WT_SESSION_IMPL *session, WT_REF *ref)
426 {
427 WT_TXN *txn;
428
429 txn = &session->txn;
430
431 /*
432 * Skip lookaside pages if reading without a timestamp and all the
433 * updates in lookaside are in the past.
434 *
435 * Lookaside eviction preferentially chooses the newest updates when
436 * creating page images with no stable timestamp. If a stable timestamp
437 * has been set, we have to visit the page because eviction chooses old
438 * version of records in that case.
439 *
440 * One case where we may need to visit the page is if lookaside eviction
441 * is active in tree 2 when a checkpoint has started and is working its
442 * way through tree 1. In that case, lookaside may have created a page
443 * image with updates in the future of the checkpoint.
444 *
445 * We also need to instantiate a lookaside page if this is an update
446 * operation in progress.
447 */
448 if (F_ISSET(txn, WT_TXN_UPDATE))
449 return (false);
450
451 if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT))
452 return (false);
453
454 /*
455 * If some of the page's history overlaps with the reader's snapshot
456 * then we have to read it. This is only relevant if we chose versions
457 * that were unstable when the page was written.
458 */
459 if (ref->page_las->skew_newest &&
460 WT_TXNID_LE(txn->snap_min, ref->page_las->unstable_txn))
461 return (false);
462
463 if (!F_ISSET(txn, WT_TXN_HAS_TS_READ))
464 return (ref->page_las->skew_newest);
465
466 #ifdef HAVE_TIMESTAMPS
467 /*
468 * Skip lookaside pages if reading as of a timestamp, we evicted new
469 * versions of data and all the updates are in the past.
470 */
471 if (ref->page_las->skew_newest &&
472 __wt_timestamp_cmp(
473 &txn->read_timestamp, &ref->page_las->unstable_timestamp) > 0)
474 return (true);
475
476 /*
477 * Skip lookaside pages if reading as of a timestamp, we evicted old
478 * versions of data and all the unstable updates are in the future.
479 */
480 if (!ref->page_las->skew_newest &&
481 __wt_timestamp_cmp(
482 &txn->read_timestamp, &ref->page_las->unstable_timestamp) < 0)
483 return (true);
484 #endif
485
486 return (false);
487 }
488
489 /*
490 * __wt_las_page_skip --
491 * Check if we can skip reading a page with lookaside entries, where the
492 * page needs to be locked before checking.
493 */
494 bool
__wt_las_page_skip(WT_SESSION_IMPL * session,WT_REF * ref)495 __wt_las_page_skip(WT_SESSION_IMPL *session, WT_REF *ref)
496 {
497 uint32_t previous_state;
498 bool skip;
499
500 if ((previous_state = ref->state) != WT_REF_LIMBO &&
501 previous_state != WT_REF_LOOKASIDE)
502 return (false);
503
504 if (!__wt_atomic_casv32(&ref->state, previous_state, WT_REF_LOCKED))
505 return (false);
506
507 skip = __wt_las_page_skip_locked(session, ref);
508
509 /* Restore the state and push the change. */
510 ref->state = previous_state;
511 WT_FULL_BARRIER();
512
513 return (skip);
514 }
515
516 /*
517 * __las_remove_block --
518 * Remove all records for a given page from the lookaside store.
519 */
520 static int
__las_remove_block(WT_CURSOR * cursor,uint64_t pageid,bool lock_wait,uint64_t * remove_cntp)521 __las_remove_block(
522 WT_CURSOR *cursor, uint64_t pageid, bool lock_wait, uint64_t *remove_cntp)
523 {
524 WT_CONNECTION_IMPL *conn;
525 WT_DECL_RET;
526 WT_ITEM las_key;
527 WT_SESSION_IMPL *session;
528 WT_TXN_ISOLATION saved_isolation;
529 uint64_t las_counter, las_pageid;
530 uint32_t las_id;
531 bool local_txn;
532
533 *remove_cntp = 0;
534
535 session = (WT_SESSION_IMPL *)cursor->session;
536 conn = S2C(session);
537 local_txn = false;
538
539 /* Prevent the sweep thread from removing the block. */
540 if (lock_wait)
541 __wt_writelock(session, &conn->cache->las_sweepwalk_lock);
542 else
543 WT_RET(__wt_try_writelock(
544 session, &conn->cache->las_sweepwalk_lock));
545
546 __las_set_isolation(session, &saved_isolation);
547 WT_ERR(__wt_txn_begin(session, NULL));
548 local_txn = true;
549
550 /*
551 * Search for the block's unique btree ID and page ID prefix and step
552 * through all matching records, removing them.
553 */
554 for (ret = __wt_las_cursor_position(cursor, pageid);
555 ret == 0; ret = cursor->next(cursor)) {
556 WT_ERR(cursor->get_key(cursor,
557 &las_pageid, &las_id, &las_counter, &las_key));
558
559 /* Confirm that we have a matching record. */
560 if (las_pageid != pageid)
561 break;
562
563 WT_ERR(cursor->remove(cursor));
564 ++*remove_cntp;
565 }
566 WT_ERR_NOTFOUND_OK(ret);
567
568 err: if (local_txn) {
569 if (ret == 0)
570 ret = __wt_txn_commit(session, NULL);
571 else
572 WT_TRET(__wt_txn_rollback(session, NULL));
573 }
574
575 __las_restore_isolation(session, saved_isolation);
576 __wt_writeunlock(session, &conn->cache->las_sweepwalk_lock);
577 return (ret);
578 }
579
580 /*
581 * __las_insert_block_verbose --
582 * Display a verbose message once per checkpoint with details about the
583 * cache state when performing a lookaside table write.
584 */
585 static int
__las_insert_block_verbose(WT_SESSION_IMPL * session,WT_BTREE * btree,WT_MULTI * multi)586 __las_insert_block_verbose(
587 WT_SESSION_IMPL *session, WT_BTREE *btree, WT_MULTI *multi)
588 {
589 WT_CACHE *cache;
590 WT_CONNECTION_IMPL *conn;
591 double pct_dirty, pct_full;
592 uint64_t ckpt_gen_current, ckpt_gen_last;
593 uint32_t btree_id;
594 #ifdef HAVE_TIMESTAMPS
595 char hex_timestamp[2 * WT_TIMESTAMP_SIZE + 1];
596 #endif
597 const char *ts;
598
599 btree_id = btree->id;
600
601 if (!WT_VERBOSE_ISSET(session,
602 WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY))
603 return (0);
604
605 conn = S2C(session);
606 cache = conn->cache;
607 ckpt_gen_current = __wt_gen(session, WT_GEN_CHECKPOINT);
608 ckpt_gen_last = cache->las_verb_gen_write;
609
610 /*
611 * Print a message if verbose lookaside, or once per checkpoint if
612 * only reporting activity. Avoid an expensive atomic operation as
613 * often as possible when the message rate is limited.
614 */
615 if (WT_VERBOSE_ISSET(session, WT_VERB_LOOKASIDE) ||
616 (ckpt_gen_current > ckpt_gen_last &&
617 __wt_atomic_casv64(&cache->las_verb_gen_write,
618 ckpt_gen_last, ckpt_gen_current))) {
619 (void)__wt_eviction_clean_needed(session, &pct_full);
620 (void)__wt_eviction_dirty_needed(session, &pct_dirty);
621
622 #ifdef HAVE_TIMESTAMPS
623 WT_RET(__wt_timestamp_to_hex_string(session, hex_timestamp,
624 &multi->page_las.unstable_timestamp));
625 ts = hex_timestamp;
626 #else
627 ts = "disabled";
628 #endif
629 __wt_verbose(session,
630 WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY,
631 "Page reconciliation triggered lookaside write "
632 "file ID %" PRIu32 ", page ID %" PRIu64 ". "
633 "Max txn ID %" PRIu64 ", unstable timestamp %s, %s. "
634 "Entries now in lookaside file: %" PRId64 ", "
635 "cache dirty: %2.3f%% , "
636 "cache use: %2.3f%%",
637 btree_id, multi->page_las.las_pageid,
638 multi->page_las.max_txn,
639 ts,
640 multi->page_las.skew_newest ? "newest" : "not newest",
641 WT_STAT_READ(conn->stats, cache_lookaside_entries),
642 pct_dirty, pct_full);
643 }
644
645 /* Never skip updating the tracked generation */
646 if (WT_VERBOSE_ISSET(session, WT_VERB_LOOKASIDE))
647 cache->las_verb_gen_write = ckpt_gen_current;
648 return (0);
649 }
650
651 /*
652 * __wt_las_insert_block --
653 * Copy one set of saved updates into the database's lookaside table.
654 */
655 int
__wt_las_insert_block(WT_CURSOR * cursor,WT_BTREE * btree,WT_PAGE * page,WT_MULTI * multi,WT_ITEM * key)656 __wt_las_insert_block(WT_CURSOR *cursor,
657 WT_BTREE *btree, WT_PAGE *page, WT_MULTI *multi, WT_ITEM *key)
658 {
659 WT_CONNECTION_IMPL *conn;
660 WT_DECL_RET;
661 WT_DECL_TIMESTAMP(prev_timestamp)
662 WT_ITEM las_timestamp, las_value;
663 WT_SAVE_UPD *list;
664 WT_SESSION_IMPL *session;
665 WT_TXN_ISOLATION saved_isolation;
666 WT_UPDATE *upd;
667 wt_off_t las_size;
668 uint64_t insert_cnt;
669 uint64_t las_counter, las_pageid, max_las_size;
670 uint32_t btree_id, i, slot;
671 uint8_t *p;
672 bool local_txn;
673
674 session = (WT_SESSION_IMPL *)cursor->session;
675 conn = S2C(session);
676 WT_CLEAR(las_timestamp);
677 WT_CLEAR(las_value);
678 insert_cnt = 0;
679 btree_id = btree->id;
680 local_txn = false;
681
682 __wt_timestamp_set_zero(&prev_timestamp);
683
684 las_pageid = __wt_atomic_add64(&conn->cache->las_pageid, 1);
685
686 if (!btree->lookaside_entries)
687 btree->lookaside_entries = true;
688
689 #ifdef HAVE_DIAGNOSTIC
690 {
691 uint64_t remove_cnt;
692 /*
693 * There should never be any entries with the page ID we are about to
694 * use.
695 */
696 WT_RET_BUSY_OK(
697 __las_remove_block(cursor, las_pageid, false, &remove_cnt));
698 WT_ASSERT(session, remove_cnt == 0);
699 }
700 #endif
701
702 /* Wrap all the updates in a transaction. */
703 __las_set_isolation(session, &saved_isolation);
704 WT_ERR(__wt_txn_begin(session, NULL));
705 local_txn = true;
706
707 /*
708 * Inserts should be on the same page absent a split, search any pinned
709 * leaf page.
710 */
711 F_SET(cursor, WT_CURSTD_UPDATE_LOCAL);
712
713 /* Enter each update in the boundary's list into the lookaside store. */
714 for (las_counter = 0, i = 0,
715 list = multi->supd; i < multi->supd_entries; ++i, ++list) {
716 /* Lookaside table key component: source key. */
717 switch (page->type) {
718 case WT_PAGE_COL_FIX:
719 case WT_PAGE_COL_VAR:
720 p = key->mem;
721 WT_ERR(
722 __wt_vpack_uint(&p, 0, WT_INSERT_RECNO(list->ins)));
723 key->size = WT_PTRDIFF(p, key->data);
724 break;
725 case WT_PAGE_ROW_LEAF:
726 if (list->ins == NULL) {
727 WT_WITH_BTREE(session, btree,
728 ret = __wt_row_leaf_key(
729 session, page, list->ripcip, key, false));
730 WT_ERR(ret);
731 } else {
732 key->data = WT_INSERT_KEY(list->ins);
733 key->size = WT_INSERT_KEY_SIZE(list->ins);
734 }
735 break;
736 WT_ILLEGAL_VALUE_ERR(session, page->type);
737 }
738
739 /*
740 * Lookaside table value component: update reference. Updates
741 * come from the row-store insert list (an inserted item), or
742 * update array (an update to an original on-page item), or from
743 * a column-store insert list (column-store format has no update
744 * array, the insert list contains both inserted items and
745 * updates to original on-page items). When rolling forward a
746 * modify update from an original on-page item, we need an
747 * on-page slot so we can find the original on-page item. When
748 * rolling forward from an inserted item, no on-page slot is
749 * possible.
750 */
751 slot = UINT32_MAX; /* Impossible slot */
752 if (list->ripcip != NULL)
753 slot = page->type == WT_PAGE_ROW_LEAF ?
754 WT_ROW_SLOT(page, list->ripcip) :
755 WT_COL_SLOT(page, list->ripcip);
756 upd = list->ins == NULL ?
757 page->modify->mod_row_update[slot] : list->ins->upd;
758
759 /*
760 * Walk the list of updates, storing each key/value pair into
761 * the lookaside table. Skip aborted items (there's no point
762 * to restoring them), and assert we never see a reserved item.
763 */
764 do {
765 if (upd->txnid == WT_TXN_ABORTED)
766 continue;
767
768 switch (upd->type) {
769 case WT_UPDATE_MODIFY:
770 case WT_UPDATE_STANDARD:
771 las_value.data = upd->data;
772 las_value.size = upd->size;
773 break;
774 case WT_UPDATE_BIRTHMARK:
775 case WT_UPDATE_TOMBSTONE:
776 las_value.size = 0;
777 break;
778 WT_ILLEGAL_VALUE_ERR(session, upd->type);
779 }
780
781 cursor->set_key(cursor,
782 las_pageid, btree_id, ++las_counter, key);
783
784 #ifdef HAVE_TIMESTAMPS
785 las_timestamp.data = &upd->timestamp;
786 las_timestamp.size = WT_TIMESTAMP_SIZE;
787 #endif
788 /*
789 * If saving a non-zero length value on the page, save a
790 * birthmark instead of duplicating it in the lookaside
791 * table. (We check the length because row-store doesn't
792 * write zero-length data items.)
793 */
794 if (upd == list->onpage_upd &&
795 upd->size > 0 &&
796 (upd->type == WT_UPDATE_STANDARD ||
797 upd->type == WT_UPDATE_MODIFY)) {
798 las_value.size = 0;
799 cursor->set_value(cursor, upd->txnid,
800 &las_timestamp, upd->prepare_state,
801 WT_UPDATE_BIRTHMARK, &las_value);
802 } else
803 cursor->set_value(cursor, upd->txnid,
804 &las_timestamp, upd->prepare_state,
805 upd->type, &las_value);
806
807 /*
808 * Using update instead of insert so the page stays
809 * pinned and can be searched before the tree.
810 */
811 WT_ERR(cursor->update(cursor));
812 ++insert_cnt;
813 } while ((upd = upd->next) != NULL);
814 }
815
816 WT_ERR(__wt_block_manager_named_size(session, WT_LAS_FILE, &las_size));
817 WT_STAT_CONN_SET(session, cache_lookaside_ondisk, las_size);
818 max_las_size = ((WT_CURSOR_BTREE *)cursor)->btree->file_max;
819 if (max_las_size != 0 && (uint64_t)las_size > max_las_size)
820 WT_PANIC_MSG(session, WT_PANIC,
821 "WiredTigerLAS: file size of %" PRIu64 " exceeds maximum "
822 "size %" PRIu64, (uint64_t)las_size, max_las_size);
823
824 err: /* Resolve the transaction. */
825 if (local_txn) {
826 if (ret == 0)
827 ret = __wt_txn_commit(session, NULL);
828 else
829 WT_TRET(__wt_txn_rollback(session, NULL));
830
831 /* Adjust the entry count. */
832 if (ret == 0)
833 (void)__wt_atomic_add64(
834 &conn->cache->las_insert_count, insert_cnt);
835 }
836
837 __las_restore_isolation(session, saved_isolation);
838 F_CLR(cursor, WT_CURSTD_UPDATE_LOCAL);
839
840 if (ret == 0 && insert_cnt > 0) {
841 multi->page_las.las_pageid = las_pageid;
842 ret = __las_insert_block_verbose(session, btree, multi);
843 }
844
845 return (ret);
846 }
847
848 /*
849 * __wt_las_cursor_position --
850 * Position a lookaside cursor at the beginning of a block.
851 *
852 * There may be no block of lookaside entries if they have been removed by
853 * WT_CONNECTION::rollback_to_stable.
854 */
855 int
__wt_las_cursor_position(WT_CURSOR * cursor,uint64_t pageid)856 __wt_las_cursor_position(WT_CURSOR *cursor, uint64_t pageid)
857 {
858 WT_ITEM las_key;
859 uint64_t las_counter, las_pageid;
860 uint32_t las_id;
861 int exact;
862
863 /*
864 * When scanning for all pages, start at the beginning of the lookaside
865 * table.
866 */
867 if (pageid == 0) {
868 WT_RET(cursor->reset(cursor));
869 return (cursor->next(cursor));
870 }
871
872 /*
873 * Because of the special visibility rules for lookaside, a new block
874 * can appear in between our search and the block of interest. Keep
875 * trying until we find it.
876 */
877 for (;;) {
878 WT_CLEAR(las_key);
879 cursor->set_key(cursor,
880 pageid, (uint32_t)0, (uint64_t)0, &las_key);
881 WT_RET(cursor->search_near(cursor, &exact));
882 if (exact < 0) {
883 WT_RET(cursor->next(cursor));
884
885 /*
886 * Because of the special visibility rules for
887 * lookaside, a new block can appear in between our
888 * search and the block of interest. Keep trying while
889 * we have a key lower than we expect.
890 *
891 * There may be no block of lookaside entries if they
892 * have been removed by
893 * WT_CONNECTION::rollback_to_stable.
894 */
895 WT_RET(cursor->get_key(cursor,
896 &las_pageid, &las_id, &las_counter, &las_key));
897 if (las_pageid < pageid)
898 continue;
899 }
900
901 return (0);
902 }
903
904 /* NOTREACHED */
905 }
906
907 /*
908 * __wt_las_remove_block --
909 * Remove all records for a given page from the lookaside table.
910 */
911 int
__wt_las_remove_block(WT_SESSION_IMPL * session,uint64_t pageid,bool lock_wait)912 __wt_las_remove_block(
913 WT_SESSION_IMPL *session, uint64_t pageid, bool lock_wait)
914 {
915 WT_CONNECTION_IMPL *conn;
916 WT_CURSOR *cursor;
917 WT_DECL_RET;
918 uint64_t remove_cnt;
919 uint32_t session_flags;
920
921 conn = S2C(session);
922 session_flags = 0; /* [-Wconditional-uninitialized] */
923
924 /*
925 * This is an external API for removing records from the lookaside
926 * table, first acquiring a lookaside table cursor and enclosing
927 * transaction, then calling an underlying function to do the work.
928 */
929 __wt_las_cursor(session, &cursor, &session_flags);
930
931 if ((ret = __las_remove_block(
932 cursor, pageid, lock_wait, &remove_cnt)) == 0)
933 (void)__wt_atomic_add64(
934 &conn->cache->las_remove_count, remove_cnt);
935
936 WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
937 return (ret);
938 }
939
940 /*
941 * __wt_las_remove_dropped --
942 * Remove an opened btree ID if it is in the dropped table.
943 */
944 void
__wt_las_remove_dropped(WT_SESSION_IMPL * session)945 __wt_las_remove_dropped(WT_SESSION_IMPL *session)
946 {
947 WT_BTREE *btree;
948 WT_CACHE *cache;
949 u_int i, j;
950
951 btree = S2BT(session);
952 cache = S2C(session)->cache;
953
954 __wt_spin_lock(session, &cache->las_sweep_lock);
955 for (i = 0; i < cache->las_dropped_next &&
956 cache->las_dropped[i] != btree->id; i++)
957 ;
958
959 if (i < cache->las_dropped_next) {
960 cache->las_dropped_next--;
961 for (j = i; j < cache->las_dropped_next; j++)
962 cache->las_dropped[j] = cache->las_dropped[j + 1];
963 }
964 __wt_spin_unlock(session, &cache->las_sweep_lock);
965 }
966
967 /*
968 * __wt_las_save_dropped --
969 * Save a dropped btree ID to be swept from the lookaside table.
970 */
971 int
__wt_las_save_dropped(WT_SESSION_IMPL * session)972 __wt_las_save_dropped(WT_SESSION_IMPL *session)
973 {
974 WT_BTREE *btree;
975 WT_CACHE *cache;
976 WT_DECL_RET;
977
978 btree = S2BT(session);
979 cache = S2C(session)->cache;
980
981 __wt_spin_lock(session, &cache->las_sweep_lock);
982 WT_ERR(__wt_realloc_def(session, &cache->las_dropped_alloc,
983 cache->las_dropped_next + 1, &cache->las_dropped));
984 cache->las_dropped[cache->las_dropped_next++] = btree->id;
985 err: __wt_spin_unlock(session, &cache->las_sweep_lock);
986 return (ret);
987 }
988
989 /*
990 * __las_sweep_count --
991 * Calculate how many records to examine per sweep step.
992 */
993 static inline uint64_t
__las_sweep_count(WT_CACHE * cache)994 __las_sweep_count(WT_CACHE *cache)
995 {
996 uint64_t las_entry_count;
997
998 /*
999 * The sweep server is a slow moving thread. Try to review the entire
1000 * lookaside table once every 5 minutes.
1001 *
1002 * The reason is because the lookaside table exists because we're seeing
1003 * cache/eviction pressure (it allows us to trade performance and disk
1004 * space for cache space), and it's likely lookaside blocks are being
1005 * evicted, and reading them back in doesn't help things. A trickier,
1006 * but possibly better, alternative might be to review all lookaside
1007 * blocks in the cache in order to get rid of them, and slowly review
1008 * lookaside blocks that have already been evicted.
1009 *
1010 * Put upper and lower bounds on the calculation: since reads of pages
1011 * with lookaside entries are blocked during sweep, make sure we do
1012 * some work but don't block reads for too long.
1013 */
1014 las_entry_count = __las_entry_count(cache);
1015 return ((uint64_t)WT_MAX(WT_LAS_SWEEP_ENTRIES,
1016 las_entry_count / (5 * WT_MINUTE / WT_LAS_SWEEP_SEC)));
1017 }
1018
1019 /*
1020 * __las_sweep_init --
1021 * Prepare to start a lookaside sweep.
1022 */
1023 static int
__las_sweep_init(WT_SESSION_IMPL * session)1024 __las_sweep_init(WT_SESSION_IMPL *session)
1025 {
1026 WT_CACHE *cache;
1027 WT_DECL_RET;
1028 u_int i;
1029
1030 cache = S2C(session)->cache;
1031
1032 __wt_spin_lock(session, &cache->las_sweep_lock);
1033
1034 /*
1035 * If no files have been dropped and the lookaside file is empty,
1036 * there's nothing to do.
1037 */
1038 if (cache->las_dropped_next == 0 && __wt_las_empty(session))
1039 WT_ERR(WT_NOTFOUND);
1040
1041 /*
1042 * Record the current page ID: sweep will stop after this point.
1043 *
1044 * Since the btree IDs we're scanning are closed, any eviction must
1045 * have already completed, so we won't miss anything with this
1046 * approach.
1047 *
1048 * Also, if a tree is reopened and there is lookaside activity before
1049 * this sweep completes, it will have a higher page ID and should not
1050 * be removed.
1051 */
1052 cache->las_sweep_max_pageid = cache->las_pageid;
1053
1054 /* Scan the btree IDs to find min/max. */
1055 cache->las_sweep_dropmin = UINT32_MAX;
1056 cache->las_sweep_dropmax = 0;
1057 for (i = 0; i < cache->las_dropped_next; i++) {
1058 cache->las_sweep_dropmin =
1059 WT_MIN(cache->las_sweep_dropmin, cache->las_dropped[i]);
1060 cache->las_sweep_dropmax =
1061 WT_MAX(cache->las_sweep_dropmax, cache->las_dropped[i]);
1062 }
1063
1064 /* Initialize the bitmap. */
1065 __wt_free(session, cache->las_sweep_dropmap);
1066 WT_ERR(__bit_alloc(session,
1067 1 + cache->las_sweep_dropmax - cache->las_sweep_dropmin,
1068 &cache->las_sweep_dropmap));
1069 for (i = 0; i < cache->las_dropped_next; i++)
1070 __bit_set(cache->las_sweep_dropmap,
1071 cache->las_dropped[i] - cache->las_sweep_dropmin);
1072
1073 /* Clear the list of btree IDs. */
1074 cache->las_dropped_next = 0;
1075
1076 err: __wt_spin_unlock(session, &cache->las_sweep_lock);
1077 return (ret);
1078 }
1079
1080 /*
1081 * __wt_las_sweep --
1082 * Sweep the lookaside table.
1083 */
1084 int
__wt_las_sweep(WT_SESSION_IMPL * session)1085 __wt_las_sweep(WT_SESSION_IMPL *session)
1086 {
1087 WT_CACHE *cache;
1088 WT_CURSOR *cursor;
1089 WT_DECL_ITEM(saved_key);
1090 WT_DECL_RET;
1091 WT_ITEM las_key, las_timestamp, las_value;
1092 WT_ITEM *sweep_key;
1093 #ifdef HAVE_TIMESTAMPS
1094 wt_timestamp_t timestamp, *val_ts;
1095 #else
1096 wt_timestamp_t *val_ts;
1097 #endif
1098 uint64_t cnt, remove_cnt, las_pageid, saved_pageid, visit_cnt;
1099 uint64_t las_counter, las_txnid;
1100 uint32_t las_id, session_flags;
1101 uint8_t prepare_state, upd_type;
1102 int notused;
1103 bool local_txn, locked, removing_key_block;
1104
1105 cache = S2C(session)->cache;
1106 cursor = NULL;
1107 sweep_key = &cache->las_sweep_key;
1108 remove_cnt = 0;
1109 session_flags = 0; /* [-Werror=maybe-uninitialized] */
1110 local_txn = locked = removing_key_block = false;
1111
1112 WT_RET(__wt_scr_alloc(session, 0, &saved_key));
1113 saved_pageid = 0;
1114
1115 /*
1116 * Prevent other threads removing entries from underneath the sweep.
1117 */
1118 __wt_writelock(session, &cache->las_sweepwalk_lock);
1119 locked = true;
1120
1121 /*
1122 * Allocate a cursor and wrap all the updates in a transaction.
1123 * We should have our own lookaside cursor.
1124 */
1125 __wt_las_cursor(session, &cursor, &session_flags);
1126 WT_ASSERT(session, cursor->session == &session->iface);
1127 WT_ERR(__wt_txn_begin(session, NULL));
1128 local_txn = true;
1129
1130 /* Encourage a race */
1131 __wt_timing_stress(session, WT_TIMING_STRESS_LOOKASIDE_SWEEP);
1132
1133 /*
1134 * When continuing a sweep, position the cursor using the key from the
1135 * last call (we don't care if we're before or after the key, either
1136 * side is fine).
1137 *
1138 * Otherwise, we're starting a new sweep, gather the list of trees to
1139 * sweep.
1140 */
1141 if (sweep_key->size != 0) {
1142 __wt_cursor_set_raw_key(cursor, sweep_key);
1143 ret = cursor->search_near(cursor, ¬used);
1144
1145 /*
1146 * Don't search for the same key twice; if we don't set a new
1147 * key below, it's because we've reached the end of the table
1148 * and we want the next pass to start at the beginning of the
1149 * table. Searching for the same key could leave us stuck at
1150 * the end of the table, repeatedly checking the same rows.
1151 */
1152 __wt_buf_free(session, sweep_key);
1153 } else
1154 ret = __las_sweep_init(session);
1155 if (ret != 0)
1156 goto srch_notfound;
1157
1158 cnt = __las_sweep_count(cache);
1159 visit_cnt = 0;
1160
1161 /* Walk the file. */
1162 while ((ret = cursor->next(cursor)) == 0) {
1163 WT_ERR(cursor->get_key(cursor,
1164 &las_pageid, &las_id, &las_counter, &las_key));
1165
1166 __wt_verbose(session,
1167 WT_VERB_LOOKASIDE_ACTIVITY,
1168 "Sweep reviewing lookaside entry with lookaside "
1169 "page ID %" PRIu64 " btree ID %" PRIu32
1170 " saved key size: %" WT_SIZET_FMT,
1171 las_pageid, las_id, saved_key->size);
1172
1173 /*
1174 * Signal to stop if the cache is stuck: we are ignoring the
1175 * cache size while scanning the lookaside table, so we're
1176 * making things worse.
1177 */
1178 if (__wt_cache_stuck(session))
1179 cnt = 0;
1180
1181 /*
1182 * Don't go past the end of lookaside from when sweep started.
1183 * If a file is reopened, its ID may be reused past this point
1184 * so the bitmap we're using is not valid.
1185 */
1186 if (las_pageid > cache->las_sweep_max_pageid) {
1187 __wt_buf_free(session, sweep_key);
1188 ret = WT_NOTFOUND;
1189 break;
1190 }
1191
1192 /*
1193 * We only want to break between key blocks. Stop if we've
1194 * processed enough entries either all we wanted or enough
1195 * and there is a reader waiting and we're on a key boundary.
1196 */
1197 ++visit_cnt;
1198 if (!removing_key_block && (cnt == 0 ||
1199 (visit_cnt > WT_LAS_SWEEP_ENTRIES && cache->las_reader)))
1200 break;
1201 if (cnt > 0)
1202 --cnt;
1203
1204 /*
1205 * If the entry belongs to a dropped tree, discard it.
1206 *
1207 * Cursor opened overwrite=true: won't return WT_NOTFOUND
1208 * should another thread remove the record before we do (not
1209 * expected for dropped trees), and the cursor remains
1210 * positioned in that case.
1211 */
1212 if (las_id >= cache->las_sweep_dropmin &&
1213 las_id <= cache->las_sweep_dropmax &&
1214 __bit_test(cache->las_sweep_dropmap,
1215 las_id - cache->las_sweep_dropmin)) {
1216 WT_ERR(cursor->remove(cursor));
1217 ++remove_cnt;
1218 saved_key->size = 0;
1219 /*
1220 * Allow sweep to break while removing entries from a
1221 * dead file.
1222 */
1223 removing_key_block = false;
1224 continue;
1225 }
1226
1227 /*
1228 * Remove all entries for a key once they have aged out and are
1229 * no longer needed.
1230 */
1231 WT_ERR(cursor->get_value(cursor, &las_txnid,
1232 &las_timestamp, &prepare_state, &upd_type, &las_value));
1233 #ifdef HAVE_TIMESTAMPS
1234 WT_ASSERT(session, las_timestamp.size == WT_TIMESTAMP_SIZE);
1235 memcpy(×tamp, las_timestamp.data, las_timestamp.size);
1236 val_ts = ×tamp;
1237 #else
1238 val_ts = NULL;
1239 #endif
1240
1241 /*
1242 * Check to see if the page or key has changed this iteration,
1243 * and if they have, setup context for safely removing obsolete
1244 * updates.
1245 *
1246 * It's important to check for page boundaries explicitly
1247 * because it is possible for the same key to be at the start
1248 * of the next block. See WT-3982 for details.
1249 */
1250 if (las_pageid != saved_pageid ||
1251 saved_key->size != las_key.size ||
1252 memcmp(saved_key->data, las_key.data, las_key.size) != 0) {
1253 /* If we've examined enough entries, give up. */
1254 if (cnt == 0)
1255 break;
1256
1257 saved_pageid = las_pageid;
1258 WT_ERR(__wt_buf_set(
1259 session, saved_key, las_key.data, las_key.size));
1260
1261 /*
1262 * There are several conditions that need to be met
1263 * before we choose to remove a key block:
1264 * * The entries were written with skew newest.
1265 * Indicated by the first entry being a birthmark.
1266 * * The first entry is globally visible.
1267 * * The entry wasn't from a prepared transaction.
1268 */
1269 if (upd_type == WT_UPDATE_BIRTHMARK &&
1270 __wt_txn_visible_all(session, las_txnid, val_ts) &&
1271 prepare_state != WT_PREPARE_INPROGRESS)
1272 removing_key_block = true;
1273 else
1274 removing_key_block = false;
1275 }
1276
1277 if (!removing_key_block)
1278 continue;
1279
1280 __wt_verbose(session,
1281 WT_VERB_LOOKASIDE_ACTIVITY,
1282 "Sweep removing lookaside entry with "
1283 "page ID: %" PRIu64 " btree ID: %" PRIu32
1284 " saved key size: %" WT_SIZET_FMT ", record type: %" PRIu8
1285 " transaction ID: %" PRIu64,
1286 las_pageid, las_id, saved_key->size, upd_type, las_txnid);
1287 WT_ERR(cursor->remove(cursor));
1288 ++remove_cnt;
1289 }
1290
1291 /*
1292 * If the loop terminates after completing a work unit, we will
1293 * continue the table sweep next time. Get a local copy of the
1294 * sweep key, we're going to reset the cursor; do so before
1295 * calling cursor.remove, cursor.remove can discard our hazard
1296 * pointer and the page could be evicted from underneath us.
1297 */
1298 if (ret == 0) {
1299 WT_ERR(__wt_cursor_get_raw_key(cursor, sweep_key));
1300 if (!WT_DATA_IN_ITEM(sweep_key))
1301 WT_ERR(__wt_buf_set(session, sweep_key,
1302 sweep_key->data, sweep_key->size));
1303 }
1304
1305 srch_notfound:
1306 WT_ERR_NOTFOUND_OK(ret);
1307
1308 if (0) {
1309 err: __wt_buf_free(session, sweep_key);
1310 }
1311 if (local_txn) {
1312 if (ret == 0)
1313 ret = __wt_txn_commit(session, NULL);
1314 else
1315 WT_TRET(__wt_txn_rollback(session, NULL));
1316 if (ret == 0)
1317 (void)__wt_atomic_add64(
1318 &cache->las_remove_count, remove_cnt);
1319 }
1320
1321 WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
1322
1323 if (locked)
1324 __wt_writeunlock(session, &cache->las_sweepwalk_lock);
1325
1326 __wt_scr_free(session, &saved_key);
1327
1328 return (ret);
1329 }
1330