1 /*-
2 * Copyright (c) 2014-2018 MongoDB, Inc.
3 * Copyright (c) 2008-2014 WiredTiger, Inc.
4 * All rights reserved.
5 *
6 * See the file LICENSE for redistribution information.
7 */
8
9 /*
10 * __cursor_set_recno --
11 * The cursor value in the interface has to track the value in the
12 * underlying cursor, update them in parallel.
13 */
14 static inline void
__cursor_set_recno(WT_CURSOR_BTREE * cbt,uint64_t v)15 __cursor_set_recno(WT_CURSOR_BTREE *cbt, uint64_t v)
16 {
17 cbt->iface.recno = cbt->recno = v;
18 }
19
20 /*
21 * __cursor_novalue --
22 * Release any cached value before an operation that could update the
23 * transaction context and free data a value is pointing to.
24 */
25 static inline void
__cursor_novalue(WT_CURSOR * cursor)26 __cursor_novalue(WT_CURSOR *cursor)
27 {
28 F_CLR(cursor, WT_CURSTD_VALUE_INT);
29 }
30
31 /*
32 * __cursor_checkkey --
33 * Check if a key is set without making a copy.
34 */
35 static inline int
__cursor_checkkey(WT_CURSOR * cursor)36 __cursor_checkkey(WT_CURSOR *cursor)
37 {
38 return (F_ISSET(cursor, WT_CURSTD_KEY_SET) ?
39 0 : __wt_cursor_kv_not_set(cursor, true));
40 }
41
42 /*
43 * __cursor_checkvalue --
44 * Check if a value is set without making a copy.
45 */
46 static inline int
__cursor_checkvalue(WT_CURSOR * cursor)47 __cursor_checkvalue(WT_CURSOR *cursor)
48 {
49 return (F_ISSET(cursor, WT_CURSTD_VALUE_SET) ?
50 0 : __wt_cursor_kv_not_set(cursor, false));
51 }
52
53 /*
54 * __cursor_localkey --
55 * If the key points into the tree, get a local copy.
56 */
57 static inline int
__cursor_localkey(WT_CURSOR * cursor)58 __cursor_localkey(WT_CURSOR *cursor)
59 {
60 if (F_ISSET(cursor, WT_CURSTD_KEY_INT)) {
61 if (!WT_DATA_IN_ITEM(&cursor->key))
62 WT_RET(__wt_buf_set((WT_SESSION_IMPL *)cursor->session,
63 &cursor->key, cursor->key.data, cursor->key.size));
64 F_CLR(cursor, WT_CURSTD_KEY_INT);
65 F_SET(cursor, WT_CURSTD_KEY_EXT);
66 }
67 return (0);
68 }
69
70 /*
71 * __cursor_localvalue --
72 * If the value points into the tree, get a local copy.
73 */
74 static inline int
__cursor_localvalue(WT_CURSOR * cursor)75 __cursor_localvalue(WT_CURSOR *cursor)
76 {
77 if (F_ISSET(cursor, WT_CURSTD_VALUE_INT)) {
78 if (!WT_DATA_IN_ITEM(&cursor->value))
79 WT_RET(__wt_buf_set((WT_SESSION_IMPL *)cursor->session,
80 &cursor->value,
81 cursor->value.data, cursor->value.size));
82 F_CLR(cursor, WT_CURSTD_VALUE_INT);
83 F_SET(cursor, WT_CURSTD_VALUE_EXT);
84 }
85 return (0);
86 }
87
88 /*
89 * __cursor_needkey --
90 *
91 * Check if we have a key set. There's an additional semantic here: if we're
92 * pointing into the tree, get a local copy of whatever we're referencing in
93 * the tree, there's an obvious race with the cursor moving and the reference.
94 */
95 static inline int
__cursor_needkey(WT_CURSOR * cursor)96 __cursor_needkey(WT_CURSOR *cursor)
97 {
98 WT_RET(__cursor_localkey(cursor));
99 return (__cursor_checkkey(cursor));
100 }
101
102 /*
103 * __cursor_needvalue --
104 *
105 * Check if we have a value set. There's an additional semantic here: if we're
106 * pointing into the tree, get a local copy of whatever we're referencing in
107 * the tree, there's an obvious race with the cursor moving and the reference.
108 */
109 static inline int
__cursor_needvalue(WT_CURSOR * cursor)110 __cursor_needvalue(WT_CURSOR *cursor)
111 {
112 WT_RET(__cursor_localvalue(cursor));
113 return (__cursor_checkvalue(cursor));
114 }
115
116 /*
117 * __cursor_pos_clear --
118 * Reset the cursor's location.
119 */
120 static inline void
__cursor_pos_clear(WT_CURSOR_BTREE * cbt)121 __cursor_pos_clear(WT_CURSOR_BTREE *cbt)
122 {
123 /*
124 * Most of the cursor's location information that needs to be set on
125 * successful return is always set by a successful return, for example,
126 * we don't initialize the compare return value because it's always
127 * set by the row-store search. The other stuff gets cleared here,
128 * and it's a minimal set of things we need to clear. It would be a
129 * lot simpler to clear everything, but we call this function a lot.
130 */
131 cbt->recno = WT_RECNO_OOB;
132
133 cbt->ins = NULL;
134 cbt->ins_head = NULL;
135 cbt->ins_stack[0] = NULL;
136
137 F_CLR(cbt, WT_CBT_POSITION_MASK);
138 }
139
140 /*
141 * __cursor_enter --
142 * Activate a cursor.
143 */
144 static inline int
__cursor_enter(WT_SESSION_IMPL * session)145 __cursor_enter(WT_SESSION_IMPL *session)
146 {
147 /*
148 * If there are no other cursors positioned in the session, check
149 * whether the cache is full.
150 */
151 if (session->ncursors == 0)
152 WT_RET(__wt_cache_eviction_check(session, false, false, NULL));
153 ++session->ncursors;
154 return (0);
155 }
156
157 /*
158 * __cursor_leave --
159 * Deactivate a cursor.
160 */
161 static inline void
__cursor_leave(WT_SESSION_IMPL * session)162 __cursor_leave(WT_SESSION_IMPL *session)
163 {
164 /*
165 * Decrement the count of active cursors in the session. When that
166 * goes to zero, there are no active cursors, and we can release any
167 * snapshot we're holding for read committed isolation.
168 */
169 WT_ASSERT(session, session->ncursors > 0);
170 if (--session->ncursors == 0)
171 __wt_txn_read_last(session);
172 }
173
174 /*
175 * __cursor_reset --
176 * Reset the cursor, it no longer holds any position.
177 */
178 static inline int
__cursor_reset(WT_CURSOR_BTREE * cbt)179 __cursor_reset(WT_CURSOR_BTREE *cbt)
180 {
181 WT_DECL_RET;
182 WT_SESSION_IMPL *session;
183
184 session = (WT_SESSION_IMPL *)cbt->iface.session;
185
186 __cursor_pos_clear(cbt);
187
188 /* If the cursor was active, deactivate it. */
189 if (F_ISSET(cbt, WT_CBT_ACTIVE)) {
190 if (!F_ISSET(cbt, WT_CBT_NO_TXN))
191 __cursor_leave(session);
192 F_CLR(cbt, WT_CBT_ACTIVE);
193 }
194
195 /* If we're not holding a cursor reference, we're done. */
196 if (cbt->ref == NULL)
197 return (0);
198
199 /*
200 * If we were scanning and saw a lot of deleted records on this page,
201 * try to evict the page when we release it.
202 */
203 if (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD)
204 __wt_page_evict_soon(session, cbt->ref);
205 cbt->page_deleted_count = 0;
206
207 /*
208 * Release any page references we're holding. This can trigger eviction
209 * (e.g., forced eviction of big pages), so it's important to do after
210 * releasing our snapshot above.
211 *
212 * Clear the reference regardless, so we don't try the release twice.
213 */
214 ret = __wt_page_release(session, cbt->ref, 0);
215 cbt->ref = NULL;
216
217 return (ret);
218 }
219
220 /*
221 * __wt_curindex_get_valuev --
222 * Internal implementation of WT_CURSOR->get_value for index cursors
223 */
224 static inline int
__wt_curindex_get_valuev(WT_CURSOR * cursor,va_list ap)225 __wt_curindex_get_valuev(WT_CURSOR *cursor, va_list ap)
226 {
227 WT_CURSOR_INDEX *cindex;
228 WT_ITEM *item;
229 WT_SESSION_IMPL *session;
230
231 cindex = (WT_CURSOR_INDEX *)cursor;
232 session = (WT_SESSION_IMPL *)cursor->session;
233 WT_RET(__cursor_checkvalue(cursor));
234
235 if (F_ISSET(cursor, WT_CURSOR_RAW_OK)) {
236 WT_RET(__wt_schema_project_merge(session,
237 cindex->cg_cursors, cindex->value_plan,
238 cursor->value_format, &cursor->value));
239 item = va_arg(ap, WT_ITEM *);
240 item->data = cursor->value.data;
241 item->size = cursor->value.size;
242 } else
243 WT_RET(__wt_schema_project_out(session,
244 cindex->cg_cursors, cindex->value_plan, ap));
245 return (0);
246 }
247
248 /*
249 * __wt_curtable_get_valuev --
250 * Internal implementation of WT_CURSOR->get_value for table cursors.
251 */
252 static inline int
__wt_curtable_get_valuev(WT_CURSOR * cursor,va_list ap)253 __wt_curtable_get_valuev(WT_CURSOR *cursor, va_list ap)
254 {
255 WT_CURSOR *primary;
256 WT_CURSOR_TABLE *ctable;
257 WT_ITEM *item;
258 WT_SESSION_IMPL *session;
259
260 ctable = (WT_CURSOR_TABLE *)cursor;
261 session = (WT_SESSION_IMPL *)cursor->session;
262 primary = *ctable->cg_cursors;
263 WT_RET(__cursor_checkvalue(primary));
264
265 if (F_ISSET(cursor, WT_CURSOR_RAW_OK)) {
266 WT_RET(__wt_schema_project_merge(session,
267 ctable->cg_cursors, ctable->plan,
268 cursor->value_format, &cursor->value));
269 item = va_arg(ap, WT_ITEM *);
270 item->data = cursor->value.data;
271 item->size = cursor->value.size;
272 } else
273 WT_RET(__wt_schema_project_out(session,
274 ctable->cg_cursors, ctable->plan, ap));
275 return (0);
276 }
277
278 /*
279 * __wt_cursor_dhandle_incr_use --
280 * Increment the in-use counter in the cursor's data source.
281 */
282 static inline void
__wt_cursor_dhandle_incr_use(WT_SESSION_IMPL * session)283 __wt_cursor_dhandle_incr_use(WT_SESSION_IMPL *session)
284 {
285 WT_DATA_HANDLE *dhandle;
286
287 dhandle = session->dhandle;
288
289 /* If we open a handle with a time of death set, clear it. */
290 if (__wt_atomic_addi32(&dhandle->session_inuse, 1) == 1 &&
291 dhandle->timeofdeath != 0)
292 dhandle->timeofdeath = 0;
293 }
294
295 /*
296 * __wt_cursor_dhandle_decr_use --
297 * Decrement the in-use counter in the cursor's data source.
298 */
299 static inline void
__wt_cursor_dhandle_decr_use(WT_SESSION_IMPL * session)300 __wt_cursor_dhandle_decr_use(WT_SESSION_IMPL *session)
301 {
302 WT_DATA_HANDLE *dhandle;
303
304 dhandle = session->dhandle;
305
306 /* If we close a handle with a time of death set, clear it. */
307 WT_ASSERT(session, dhandle->session_inuse > 0);
308 if (__wt_atomic_subi32(&dhandle->session_inuse, 1) == 0 &&
309 dhandle->timeofdeath != 0)
310 dhandle->timeofdeath = 0;
311 }
312
313 /*
314 * __cursor_kv_return --
315 * Return a page referenced key/value pair to the application.
316 */
317 static inline int
__cursor_kv_return(WT_CURSOR_BTREE * cbt,WT_UPDATE * upd)318 __cursor_kv_return(WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
319 {
320 WT_RET(__wt_key_return(cbt));
321 WT_RET(__wt_value_return(cbt, upd));
322
323 return (0);
324 }
325
326 /*
327 * __cursor_func_init --
328 * Cursor call setup.
329 */
330 static inline int
__cursor_func_init(WT_CURSOR_BTREE * cbt,bool reenter)331 __cursor_func_init(WT_CURSOR_BTREE *cbt, bool reenter)
332 {
333 WT_SESSION_IMPL *session;
334
335 session = (WT_SESSION_IMPL *)cbt->iface.session;
336
337 if (reenter) {
338 #ifdef HAVE_DIAGNOSTIC
339 __wt_cursor_key_order_reset(cbt);
340 #endif
341 WT_RET(__cursor_reset(cbt));
342 }
343
344 /*
345 * Any old insert position is now invalid. We rely on this being
346 * cleared to detect if a new skiplist is installed after a search.
347 */
348 cbt->ins_stack[0] = NULL;
349
350 /* If the transaction is idle, check that the cache isn't full. */
351 WT_RET(__wt_txn_idle_cache_check(session));
352
353 /* Activate the file cursor. */
354 if (!F_ISSET(cbt, WT_CBT_ACTIVE)) {
355 if (!F_ISSET(cbt, WT_CBT_NO_TXN))
356 WT_RET(__cursor_enter(session));
357 F_SET(cbt, WT_CBT_ACTIVE);
358 }
359
360 /*
361 * If this is an ordinary transactional cursor, make sure we are set up
362 * to read.
363 */
364 if (!F_ISSET(cbt, WT_CBT_NO_TXN))
365 __wt_txn_cursor_op(session);
366 return (0);
367 }
368
369 /*
370 * __cursor_row_slot_return --
371 * Return a row-store leaf page slot's K/V pair.
372 */
373 static inline int
__cursor_row_slot_return(WT_CURSOR_BTREE * cbt,WT_ROW * rip,WT_UPDATE * upd)374 __cursor_row_slot_return(WT_CURSOR_BTREE *cbt, WT_ROW *rip, WT_UPDATE *upd)
375 {
376 WT_BTREE *btree;
377 WT_CELL *cell;
378 WT_CELL_UNPACK *kpack, _kpack, *vpack, _vpack;
379 WT_ITEM *kb, *vb;
380 WT_PAGE *page;
381 WT_SESSION_IMPL *session;
382 void *copy;
383
384 session = (WT_SESSION_IMPL *)cbt->iface.session;
385 btree = S2BT(session);
386 page = cbt->ref->page;
387
388 kpack = NULL;
389 vpack = &_vpack;
390
391 kb = &cbt->iface.key;
392 vb = &cbt->iface.value;
393
394 /*
395 * The row-store key can change underfoot; explicitly take a copy.
396 */
397 copy = WT_ROW_KEY_COPY(rip);
398
399 /*
400 * Get a key: we could just call __wt_row_leaf_key, but as a cursor
401 * is running through the tree, we may have additional information
402 * here (we may have the fully-built key that's immediately before
403 * the prefix-compressed key we want, so it's a faster construction).
404 *
405 * First, check for an immediately available key.
406 */
407 if (__wt_row_leaf_key_info(
408 page, copy, NULL, &cell, &kb->data, &kb->size))
409 goto value;
410
411 /* Huffman encoded keys are a slow path in all cases. */
412 if (btree->huffman_key != NULL)
413 goto slow;
414
415 /*
416 * Unpack the cell and deal with overflow and prefix-compressed keys.
417 * Inline building simple prefix-compressed keys from a previous key,
418 * otherwise build from scratch.
419 */
420 kpack = &_kpack;
421 __wt_cell_unpack(cell, kpack);
422 if (kpack->type == WT_CELL_KEY &&
423 cbt->rip_saved != NULL && cbt->rip_saved == rip - 1) {
424 WT_ASSERT(session, cbt->row_key->size >= kpack->prefix);
425
426 /*
427 * Grow the buffer as necessary as well as ensure data has been
428 * copied into local buffer space, then append the suffix to the
429 * prefix already in the buffer.
430 *
431 * Don't grow the buffer unnecessarily or copy data we don't
432 * need, truncate the item's data length to the prefix bytes.
433 */
434 cbt->row_key->size = kpack->prefix;
435 WT_RET(__wt_buf_grow(
436 session, cbt->row_key, cbt->row_key->size + kpack->size));
437 memcpy((uint8_t *)cbt->row_key->data + cbt->row_key->size,
438 kpack->data, kpack->size);
439 cbt->row_key->size += kpack->size;
440 } else {
441 /*
442 * Call __wt_row_leaf_key_work instead of __wt_row_leaf_key: we
443 * already did __wt_row_leaf_key's fast-path checks inline.
444 */
445 slow: WT_RET(__wt_row_leaf_key_work(
446 session, page, rip, cbt->row_key, false));
447 }
448 kb->data = cbt->row_key->data;
449 kb->size = cbt->row_key->size;
450 cbt->rip_saved = rip;
451
452 value:
453 /*
454 * If the item was ever modified, use the WT_UPDATE data. Note the
455 * caller passes us the update: it has already resolved which one
456 * (if any) is visible.
457 */
458 if (upd != NULL)
459 return (__wt_value_return(cbt, upd));
460
461 /* Else, simple values have their location encoded in the WT_ROW. */
462 if (__wt_row_leaf_value(page, rip, vb))
463 return (0);
464
465 /* Else, take the value from the original page cell. */
466 __wt_row_leaf_value_cell(page, rip, kpack, vpack);
467 return (__wt_page_cell_data_ref(session, cbt->ref->page, vpack, vb));
468 }
469