1 /*-
2 * Copyright (c) 2014-2018 MongoDB, Inc.
3 * Copyright (c) 2008-2014 WiredTiger, Inc.
4 * All rights reserved.
5 *
6 * See the file LICENSE for redistribution information.
7 */
8
9 #include "wt_internal.h"
10
11 /*
12 * Walking backwards through skip lists.
13 *
14 * The skip list stack is an array of pointers set up by a search. It points
15 * to the position a node should go in the skip list. In other words, the skip
16 * list search stack always points *after* the search item (that is, into the
17 * search item's next array).
18 *
19 * Helper macros to go from a stack pointer at level i, pointing into a next
20 * array, back to the insert node containing that next array.
21 */
22 #undef PREV_ITEM
23 #define PREV_ITEM(ins_head, insp, i) \
24 (((insp) == &(ins_head)->head[i] || (insp) == NULL) ? NULL : \
25 (WT_INSERT *)((char *)((insp) - (i)) - offsetof(WT_INSERT, next)))
26
27 #undef PREV_INS
28 #define PREV_INS(cbt, i) \
29 PREV_ITEM((cbt)->ins_head, (cbt)->ins_stack[(i)], (i))
30
31 /*
32 * __cursor_skip_prev --
33 * Move back one position in a skip list stack (aka "finger").
34 */
35 static inline int
__cursor_skip_prev(WT_CURSOR_BTREE * cbt)36 __cursor_skip_prev(WT_CURSOR_BTREE *cbt)
37 {
38 WT_INSERT *current, *ins;
39 WT_ITEM key;
40 WT_SESSION_IMPL *session;
41 int i;
42
43 session = (WT_SESSION_IMPL *)cbt->iface.session;
44
45 restart:
46 /*
47 * If the search stack does not point at the current item, fill it in
48 * with a search.
49 */
50 while ((current = cbt->ins) != PREV_INS(cbt, 0)) {
51 if (cbt->btree->type == BTREE_ROW) {
52 key.data = WT_INSERT_KEY(current);
53 key.size = WT_INSERT_KEY_SIZE(current);
54 WT_RET(__wt_search_insert(
55 session, cbt, cbt->ins_head, &key));
56 } else
57 cbt->ins = __col_insert_search(cbt->ins_head,
58 cbt->ins_stack, cbt->next_stack,
59 WT_INSERT_RECNO(current));
60 }
61
62 /*
63 * Find the first node up the search stack that does not move.
64 *
65 * The depth of the current item must be at least this level, since we
66 * see it in that many levels of the stack.
67 *
68 * !!! Watch these loops carefully: they all rely on the value of i,
69 * and the exit conditions to end up with the right values are
70 * non-trivial.
71 */
72 ins = NULL; /* -Wconditional-uninitialized */
73 for (i = 0; i < WT_SKIP_MAXDEPTH - 1; i++)
74 if ((ins = PREV_INS(cbt, i + 1)) != current)
75 break;
76
77 /*
78 * Find a starting point for the new search. That is either at the
79 * non-moving node if we found a valid node, or the beginning of the
80 * next list down that is not the current node.
81 *
82 * Since it is the beginning of a list, and we know the current node is
83 * has a skip depth at least this high, any node we find must sort
84 * before the current node.
85 */
86 if (ins == NULL || ins == current)
87 for (; i >= 0; i--) {
88 cbt->ins_stack[i] = NULL;
89 cbt->next_stack[i] = NULL;
90 ins = cbt->ins_head->head[i];
91 if (ins != NULL && ins != current)
92 break;
93 }
94
95 /* Walk any remaining levels until just before the current node. */
96 while (i >= 0) {
97 /*
98 * If we get to the end of a list without finding the current
99 * item, we must have raced with an insert. Restart the search.
100 */
101 if (ins == NULL) {
102 cbt->ins_stack[0] = NULL;
103 cbt->next_stack[0] = NULL;
104 goto restart;
105 }
106 if (ins->next[i] != current) /* Stay at this level */
107 ins = ins->next[i];
108 else { /* Drop down a level */
109 cbt->ins_stack[i] = &ins->next[i];
110 cbt->next_stack[i] = ins->next[i];
111 --i;
112 }
113 }
114
115 /* If we found a previous node, the next one must be current. */
116 if (cbt->ins_stack[0] != NULL && *cbt->ins_stack[0] != current)
117 goto restart;
118
119 cbt->ins = PREV_INS(cbt, 0);
120 return (0);
121 }
122
123 /*
124 * __cursor_fix_append_prev --
125 * Return the previous fixed-length entry on the append list.
126 */
127 static inline int
__cursor_fix_append_prev(WT_CURSOR_BTREE * cbt,bool newpage)128 __cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, bool newpage)
129 {
130 WT_SESSION_IMPL *session;
131 WT_UPDATE *upd;
132
133 session = (WT_SESSION_IMPL *)cbt->iface.session;
134
135 if (newpage) {
136 if ((cbt->ins = WT_SKIP_LAST(cbt->ins_head)) == NULL)
137 return (WT_NOTFOUND);
138 } else {
139 /* Move to the previous record in the append list, if any. */
140 if (cbt->ins != NULL &&
141 cbt->recno <= WT_INSERT_RECNO(cbt->ins))
142 WT_RET(__cursor_skip_prev(cbt));
143
144 /*
145 * Handle the special case of leading implicit records, that is,
146 * there aren't any records in the page not on the append list,
147 * and the append list's first record isn't the first record on
148 * the page. (Although implemented as a test of the page values,
149 * this is really a test for a tree where the first inserted
150 * record wasn't record 1, any other page with only an append
151 * list will have a first page record number matching the first
152 * record in the append list.)
153 *
154 * The "right" place to handle this is probably in our caller.
155 * The high-level cursor-previous routine would:
156 * -- call this routine to walk the append list
157 * -- call the routine to walk the standard page items
158 * -- call the tree walk routine looking for a previous page
159 * Each of them returns WT_NOTFOUND, at which point our caller
160 * checks the cursor record number, and if it's larger than 1,
161 * returns the implicit records. Instead, I'm trying to detect
162 * the case here, mostly because I don't want to put that code
163 * into our caller. Anyway, if this code breaks for any reason,
164 * that's the way I'd go.
165 *
166 * If we're not pointing to a WT_INSERT entry (we didn't find a
167 * WT_INSERT record preceding our record name-space), check if
168 * we've reached the beginning of this page, a possibility if a
169 * page had a large number of items appended, and then split.
170 * If not, check if there are any records on the page. If there
171 * aren't, then we're in the magic zone, keep going until we get
172 * to a record number matching the first record on the page.
173 */
174 if (cbt->ins == NULL &&
175 (cbt->recno == cbt->ref->ref_recno ||
176 __col_fix_last_recno(cbt->ref) != 0))
177 return (WT_NOTFOUND);
178 }
179
180 /*
181 * This code looks different from the cursor-next code. The append list
182 * may be preceded by other rows. If we're iterating through the tree,
183 * starting at the last record in the tree, by definition we're starting
184 * a new iteration and we set the record number to the last record found
185 * on the page. Otherwise, decrement the record.
186 */
187 if (newpage)
188 __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
189 else
190 __cursor_set_recno(cbt, cbt->recno - 1);
191
192 /*
193 * Fixed-width column store appends are inherently non-transactional.
194 * Even a non-visible update by a concurrent or aborted transaction
195 * changes the effective end of the data. The effect is subtle because
196 * of the blurring between deleted and empty values, but ideally we
197 * would skip all uncommitted changes at the end of the data. This
198 * doesn't apply to variable-width column stores because the implicitly
199 * created records written by reconciliation are deleted and so can be
200 * never seen by a read.
201 */
202 if (cbt->ins == NULL || cbt->recno > WT_INSERT_RECNO(cbt->ins)) {
203 cbt->v = 0;
204 cbt->iface.value.data = &cbt->v;
205 } else {
206 upd = NULL;
207 WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
208 if (upd == NULL) {
209 cbt->v = 0;
210 cbt->iface.value.data = &cbt->v;
211 } else
212 cbt->iface.value.data = upd->data;
213 }
214 cbt->iface.value.size = 1;
215 return (0);
216 }
217
218 /*
219 * __cursor_fix_prev --
220 * Move to the previous, fixed-length column-store item.
221 */
222 static inline int
__cursor_fix_prev(WT_CURSOR_BTREE * cbt,bool newpage)223 __cursor_fix_prev(WT_CURSOR_BTREE *cbt, bool newpage)
224 {
225 WT_BTREE *btree;
226 WT_PAGE *page;
227 WT_SESSION_IMPL *session;
228 WT_UPDATE *upd;
229
230 session = (WT_SESSION_IMPL *)cbt->iface.session;
231 page = cbt->ref->page;
232 btree = S2BT(session);
233
234 /* Initialize for each new page. */
235 if (newpage) {
236 cbt->last_standard_recno = __col_fix_last_recno(cbt->ref);
237 if (cbt->last_standard_recno == 0)
238 return (WT_NOTFOUND);
239 __cursor_set_recno(cbt, cbt->last_standard_recno);
240 goto new_page;
241 }
242
243 /* Move to the previous entry and return the item. */
244 if (cbt->recno == cbt->ref->ref_recno)
245 return (WT_NOTFOUND);
246 __cursor_set_recno(cbt, cbt->recno - 1);
247
248 new_page:
249 /* Check any insert list for a matching record. */
250 cbt->ins_head = WT_COL_UPDATE_SINGLE(page);
251 cbt->ins = __col_insert_search(
252 cbt->ins_head, cbt->ins_stack, cbt->next_stack, cbt->recno);
253 if (cbt->ins != NULL && cbt->recno != WT_INSERT_RECNO(cbt->ins))
254 cbt->ins = NULL;
255 upd = NULL;
256 if (cbt->ins != NULL)
257 WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
258 if (upd == NULL) {
259 cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt);
260 cbt->iface.value.data = &cbt->v;
261 } else
262 cbt->iface.value.data = upd->data;
263 cbt->iface.value.size = 1;
264 return (0);
265 }
266
267 /*
268 * __cursor_var_append_prev --
269 * Return the previous variable-length entry on the append list.
270 */
271 static inline int
__cursor_var_append_prev(WT_CURSOR_BTREE * cbt,bool newpage)272 __cursor_var_append_prev(WT_CURSOR_BTREE *cbt, bool newpage)
273 {
274 WT_SESSION_IMPL *session;
275 WT_UPDATE *upd;
276
277 session = (WT_SESSION_IMPL *)cbt->iface.session;
278
279 if (newpage) {
280 cbt->ins = WT_SKIP_LAST(cbt->ins_head);
281 goto new_page;
282 }
283
284 for (;;) {
285 WT_RET(__cursor_skip_prev(cbt));
286 new_page: if (cbt->ins == NULL)
287 return (WT_NOTFOUND);
288
289 __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
290 WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
291 if (upd == NULL)
292 continue;
293 if (upd->type == WT_UPDATE_TOMBSTONE) {
294 if (upd->txnid != WT_TXN_NONE &&
295 __wt_txn_upd_visible_all(session, upd))
296 ++cbt->page_deleted_count;
297 continue;
298 }
299 return (__wt_value_return(cbt, upd));
300 }
301 /* NOTREACHED */
302 }
303
304 /*
305 * __cursor_var_prev --
306 * Move to the previous, variable-length column-store item.
307 */
308 static inline int
__cursor_var_prev(WT_CURSOR_BTREE * cbt,bool newpage)309 __cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage)
310 {
311 WT_CELL *cell;
312 WT_CELL_UNPACK unpack;
313 WT_COL *cip;
314 WT_INSERT *ins;
315 WT_PAGE *page;
316 WT_SESSION_IMPL *session;
317 WT_UPDATE *upd;
318 uint64_t rle_start;
319
320 session = (WT_SESSION_IMPL *)cbt->iface.session;
321 page = cbt->ref->page;
322
323 rle_start = 0; /* -Werror=maybe-uninitialized */
324
325 /* Initialize for each new page. */
326 if (newpage) {
327 cbt->last_standard_recno = __col_var_last_recno(cbt->ref);
328 if (cbt->last_standard_recno == 0)
329 return (WT_NOTFOUND);
330 __cursor_set_recno(cbt, cbt->last_standard_recno);
331 cbt->cip_saved = NULL;
332 goto new_page;
333 }
334
335 /* Move to the previous entry and return the item. */
336 for (;;) {
337 __cursor_set_recno(cbt, cbt->recno - 1);
338
339 new_page: if (cbt->recno < cbt->ref->ref_recno)
340 return (WT_NOTFOUND);
341
342 /* Find the matching WT_COL slot. */
343 if ((cip =
344 __col_var_search(cbt->ref, cbt->recno, &rle_start)) == NULL)
345 return (WT_NOTFOUND);
346 cbt->slot = WT_COL_SLOT(page, cip);
347
348 /* Check any insert list for a matching record. */
349 cbt->ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot);
350 cbt->ins = __col_insert_search_match(cbt->ins_head, cbt->recno);
351 upd = NULL;
352 if (cbt->ins != NULL)
353 WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
354 if (upd != NULL) {
355 if (upd->type == WT_UPDATE_TOMBSTONE) {
356 if (upd->txnid != WT_TXN_NONE &&
357 __wt_txn_upd_visible_all(session, upd))
358 ++cbt->page_deleted_count;
359 continue;
360 }
361 return (__wt_value_return(cbt, upd));
362 }
363
364 /*
365 * If we're at the same slot as the last reference and there's
366 * no matching insert list item, re-use the return information
367 * (so encoded items with large repeat counts aren't repeatedly
368 * decoded). Otherwise, unpack the cell and build the return
369 * information.
370 */
371 if (cbt->cip_saved != cip) {
372 if ((cell = WT_COL_PTR(page, cip)) == NULL)
373 continue;
374 __wt_cell_unpack(cell, &unpack);
375 if (unpack.type == WT_CELL_DEL) {
376 if (__wt_cell_rle(&unpack) == 1)
377 continue;
378 /*
379 * There can be huge gaps in the variable-length
380 * column-store name space appearing as deleted
381 * records. If more than one deleted record, do
382 * the work of finding the next record to return
383 * instead of looping through the records.
384 *
385 * First, find the largest record in the update
386 * list that's smaller than the current record.
387 */
388 ins = __col_insert_search_lt(
389 cbt->ins_head, cbt->recno);
390
391 /*
392 * Second, for records with RLEs greater than 1,
393 * the above call to __col_var_search located
394 * this record in the page's list of repeating
395 * records, and returned the starting record.
396 * The starting record - 1 is the record to
397 * which we could skip, if there was no larger
398 * record in the update list.
399 */
400 cbt->recno = rle_start - 1;
401 if (ins != NULL &&
402 WT_INSERT_RECNO(ins) > cbt->recno)
403 cbt->recno = WT_INSERT_RECNO(ins);
404
405 /* Adjust for the outer loop decrement. */
406 ++cbt->recno;
407 continue;
408 }
409 WT_RET(__wt_page_cell_data_ref(
410 session, page, &unpack, cbt->tmp));
411
412 cbt->cip_saved = cip;
413 }
414 cbt->iface.value.data = cbt->tmp->data;
415 cbt->iface.value.size = cbt->tmp->size;
416 return (0);
417 }
418 /* NOTREACHED */
419 }
420
421 /*
422 * __cursor_row_prev --
423 * Move to the previous row-store item.
424 */
425 static inline int
__cursor_row_prev(WT_CURSOR_BTREE * cbt,bool newpage)426 __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage)
427 {
428 WT_INSERT *ins;
429 WT_ITEM *key;
430 WT_PAGE *page;
431 WT_ROW *rip;
432 WT_SESSION_IMPL *session;
433 WT_UPDATE *upd;
434
435 session = (WT_SESSION_IMPL *)cbt->iface.session;
436 page = cbt->ref->page;
437 key = &cbt->iface.key;
438
439 /*
440 * For row-store pages, we need a single item that tells us the part
441 * of the page we're walking (otherwise switching from next to prev
442 * and vice-versa is just too complicated), so we map the WT_ROW and
443 * WT_INSERT_HEAD insert array slots into a single name space: slot 1
444 * is the "smallest key insert list", slot 2 is WT_ROW[0], slot 3 is
445 * WT_INSERT_HEAD[0], and so on. This means WT_INSERT lists are
446 * odd-numbered slots, and WT_ROW array slots are even-numbered slots.
447 *
448 * Initialize for each new page.
449 */
450 if (newpage) {
451 /*
452 * If we haven't instantiated keys on this page, do so, else it
453 * is a very, very slow traversal.
454 */
455 if (!F_ISSET_ATOMIC(page, WT_PAGE_BUILD_KEYS))
456 WT_RET(__wt_row_leaf_keys(session, page));
457
458 if (page->entries == 0)
459 cbt->ins_head = WT_ROW_INSERT_SMALLEST(page);
460 else
461 cbt->ins_head =
462 WT_ROW_INSERT_SLOT(page, page->entries - 1);
463 cbt->ins = WT_SKIP_LAST(cbt->ins_head);
464 cbt->row_iteration_slot = page->entries * 2 + 1;
465 cbt->rip_saved = NULL;
466 goto new_insert;
467 }
468
469 /* Move to the previous entry and return the item. */
470 for (;;) {
471 /*
472 * Continue traversing any insert list. Maintain the reference
473 * to the current insert element in case we switch to a cursor
474 * next movement.
475 */
476 if (cbt->ins != NULL)
477 WT_RET(__cursor_skip_prev(cbt));
478
479 new_insert: if ((ins = cbt->ins) != NULL) {
480 WT_RET(__wt_txn_read(session, ins->upd, &upd));
481 if (upd == NULL)
482 continue;
483 if (upd->type == WT_UPDATE_TOMBSTONE) {
484 if (upd->txnid != WT_TXN_NONE &&
485 __wt_txn_upd_visible_all(session, upd))
486 ++cbt->page_deleted_count;
487 continue;
488 }
489 key->data = WT_INSERT_KEY(ins);
490 key->size = WT_INSERT_KEY_SIZE(ins);
491 return (__wt_value_return(cbt, upd));
492 }
493
494 /* Check for the beginning of the page. */
495 if (cbt->row_iteration_slot == 1)
496 return (WT_NOTFOUND);
497 --cbt->row_iteration_slot;
498
499 /*
500 * Odd-numbered slots configure as WT_INSERT_HEAD entries,
501 * even-numbered slots configure as WT_ROW entries.
502 */
503 if (cbt->row_iteration_slot & 0x01) {
504 cbt->ins_head = cbt->row_iteration_slot == 1 ?
505 WT_ROW_INSERT_SMALLEST(page) :
506 WT_ROW_INSERT_SLOT(
507 page, cbt->row_iteration_slot / 2 - 1);
508 cbt->ins = WT_SKIP_LAST(cbt->ins_head);
509 goto new_insert;
510 }
511 cbt->ins_head = NULL;
512 cbt->ins = NULL;
513
514 cbt->slot = cbt->row_iteration_slot / 2 - 1;
515 rip = &page->pg_row[cbt->slot];
516 WT_RET(__wt_txn_read(session, WT_ROW_UPDATE(page, rip), &upd));
517 if (upd != NULL && upd->type == WT_UPDATE_TOMBSTONE) {
518 if (upd->txnid != WT_TXN_NONE &&
519 __wt_txn_upd_visible_all(session, upd))
520 ++cbt->page_deleted_count;
521 continue;
522 }
523 return (__cursor_row_slot_return(cbt, rip, upd));
524 }
525 /* NOTREACHED */
526 }
527
528 /*
529 * __wt_btcur_prev --
530 * Move to the previous record in the tree.
531 */
532 int
__wt_btcur_prev(WT_CURSOR_BTREE * cbt,bool truncating)533 __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
534 {
535 WT_CURSOR *cursor;
536 WT_DECL_RET;
537 WT_PAGE *page;
538 WT_SESSION_IMPL *session;
539 WT_UPDATE *upd;
540 uint32_t flags;
541 bool newpage, valid;
542
543 cursor = &cbt->iface;
544 session = (WT_SESSION_IMPL *)cbt->iface.session;
545
546 WT_STAT_CONN_INCR(session, cursor_prev);
547 WT_STAT_DATA_INCR(session, cursor_prev);
548
549 F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
550
551 /*
552 * In case of retrying a prev operation due to a prepare conflict,
553 * cursor would have been already positioned at an update structure
554 * which resulted in conflict. So, now when retrying we should examine
555 * the same update again instead of starting from the next one in the
556 * update chain.
557 */
558 F_CLR(cbt, WT_CBT_RETRY_NEXT);
559 if (F_ISSET(cbt, WT_CBT_RETRY_PREV)) {
560 WT_RET(__wt_cursor_valid(cbt, &upd, &valid));
561 F_CLR(cbt, WT_CBT_RETRY_PREV);
562 if (valid) {
563 /*
564 * If the update, which returned prepared conflict is
565 * visible, return the value.
566 */
567 return (__cursor_kv_return(cbt, upd));
568 }
569 }
570
571 WT_RET(__cursor_func_init(cbt, false));
572
573 /*
574 * If we aren't already iterating in the right direction, there's
575 * some setup to do.
576 */
577 if (!F_ISSET(cbt, WT_CBT_ITERATE_PREV))
578 __wt_btcur_iterate_setup(cbt);
579
580 /*
581 * Walk any page we're holding until the underlying call returns not-
582 * found. Then, move to the previous page, until we reach the start
583 * of the file.
584 */
585 flags = /* tree walk flags */
586 WT_READ_NO_SPLIT | WT_READ_PREV | WT_READ_SKIP_INTL;
587 if (truncating)
588 LF_SET(WT_READ_TRUNCATE);
589 for (newpage = false;; newpage = true) {
590 page = cbt->ref == NULL ? NULL : cbt->ref->page;
591
592 /*
593 * Column-store pages may have appended entries. Handle it
594 * separately from the usual cursor code, it's in a simple
595 * format.
596 */
597 if (newpage && page != NULL && page->type != WT_PAGE_ROW_LEAF &&
598 (cbt->ins_head = WT_COL_APPEND(page)) != NULL)
599 F_SET(cbt, WT_CBT_ITERATE_APPEND);
600
601 if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)) {
602 switch (page->type) {
603 case WT_PAGE_COL_FIX:
604 ret = __cursor_fix_append_prev(cbt, newpage);
605 break;
606 case WT_PAGE_COL_VAR:
607 ret = __cursor_var_append_prev(cbt, newpage);
608 break;
609 WT_ILLEGAL_VALUE_ERR(session, page->type);
610 }
611 if (ret == 0)
612 break;
613 F_CLR(cbt, WT_CBT_ITERATE_APPEND);
614 if (ret != WT_NOTFOUND)
615 break;
616 newpage = true;
617 }
618 if (page != NULL) {
619 switch (page->type) {
620 case WT_PAGE_COL_FIX:
621 ret = __cursor_fix_prev(cbt, newpage);
622 break;
623 case WT_PAGE_COL_VAR:
624 ret = __cursor_var_prev(cbt, newpage);
625 break;
626 case WT_PAGE_ROW_LEAF:
627 ret = __cursor_row_prev(cbt, newpage);
628 break;
629 WT_ILLEGAL_VALUE_ERR(session, page->type);
630 }
631 if (ret != WT_NOTFOUND)
632 break;
633 }
634
635 /*
636 * If we saw a lot of deleted records on this page, or we went
637 * all the way through a page and only saw deleted records, try
638 * to evict the page when we release it. Otherwise repeatedly
639 * deleting from the beginning of a tree can have quadratic
640 * performance. Take care not to force eviction of pages that
641 * are genuinely empty, in new trees.
642 */
643 if (page != NULL &&
644 (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD ||
645 (newpage && cbt->page_deleted_count > 0)))
646 __wt_page_evict_soon(session, cbt->ref);
647 cbt->page_deleted_count = 0;
648
649 if (F_ISSET(cbt, WT_CBT_READ_ONCE))
650 LF_SET(WT_READ_WONT_NEED);
651 WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
652 WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND);
653 }
654 #ifdef HAVE_DIAGNOSTIC
655 if (ret == 0)
656 WT_ERR(__wt_cursor_key_order_check(session, cbt, false));
657 #endif
658 err: switch (ret) {
659 case 0:
660 F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
661 break;
662 case WT_PREPARE_CONFLICT:
663 /*
664 * If prepare conflict occurs, cursor should not be reset,
665 * as current cursor position will be reused in case of a
666 * retry from user.
667 */
668 F_SET(cbt, WT_CBT_RETRY_PREV);
669 break;
670 default:
671 WT_TRET(__cursor_reset(cbt));
672 }
673 return (ret);
674 }
675