1 /*-
2 * Copyright (c) 2014-2018 MongoDB, Inc.
3 * Copyright (c) 2008-2014 WiredTiger, Inc.
4 * All rights reserved.
5 *
6 * See the file LICENSE for redistribution information.
7 */
8
9 #include "wt_internal.h"
10
11 static int __session_checkpoint(WT_SESSION *, const char *);
12 static int __session_snapshot(WT_SESSION *, const char *);
13 static int __session_rollback_transaction(WT_SESSION *, const char *);
14
15 /*
16 * __wt_session_notsup --
17 * Unsupported session method.
18 */
19 int
__wt_session_notsup(WT_SESSION_IMPL * session)20 __wt_session_notsup(WT_SESSION_IMPL *session)
21 {
22 WT_RET_MSG(session, ENOTSUP, "Unsupported session method");
23 }
24
25 /*
26 * __wt_session_reset_cursors --
27 * Reset all open cursors.
28 */
29 int
__wt_session_reset_cursors(WT_SESSION_IMPL * session,bool free_buffers)30 __wt_session_reset_cursors(WT_SESSION_IMPL *session, bool free_buffers)
31 {
32 WT_CURSOR *cursor;
33 WT_DECL_RET;
34
35 TAILQ_FOREACH(cursor, &session->cursors, q) {
36 /* Stop when there are no positioned cursors. */
37 if (session->ncursors == 0)
38 break;
39 if (!F_ISSET(cursor, WT_CURSTD_JOINED))
40 WT_TRET(cursor->reset(cursor));
41 /* Optionally, free the cursor buffers */
42 if (free_buffers) {
43 __wt_buf_free(session, &cursor->key);
44 __wt_buf_free(session, &cursor->value);
45 }
46 }
47
48 WT_ASSERT(session, session->ncursors == 0);
49 return (ret);
50 }
51
52 /*
53 * __wt_session_cursor_cache_sweep --
54 * Sweep the cursor cache.
55 */
56 int
__wt_session_cursor_cache_sweep(WT_SESSION_IMPL * session)57 __wt_session_cursor_cache_sweep(WT_SESSION_IMPL *session)
58 {
59 WT_CURSOR *cursor, *cursor_tmp;
60 WT_CURSOR_LIST *cached_list;
61 WT_DECL_RET;
62 time_t now;
63 uint32_t position;
64 int i, t_ret, nbuckets, nexamined, nclosed;
65 bool productive;
66
67 if (!F_ISSET(session, WT_SESSION_CACHE_CURSORS))
68 return (0);
69
70 /*
71 * Periodically sweep for dead cursors; if we've swept recently, don't
72 * do it again.
73 */
74 __wt_seconds(session, &now);
75 if (difftime(now, session->last_cursor_sweep) < 1)
76 return (0);
77 session->last_cursor_sweep = now;
78
79 position = session->cursor_sweep_position;
80 productive = true;
81 nbuckets = nexamined = nclosed = 0;
82
83 /* Turn off caching so that cursor close doesn't try to cache. */
84 F_CLR(session, WT_SESSION_CACHE_CURSORS);
85 for (i = 0; i < WT_SESSION_CURSOR_SWEEP_MAX && productive; i++) {
86 ++nbuckets;
87 cached_list = &session->cursor_cache[position];
88 position = (position + 1) % WT_HASH_ARRAY_SIZE;
89 TAILQ_FOREACH_SAFE(cursor, cached_list, q, cursor_tmp) {
90 /*
91 * First check to see if the cursor could be reopened.
92 */
93 ++nexamined;
94 t_ret = cursor->reopen(cursor, true);
95 if (t_ret != 0) {
96 WT_TRET_NOTFOUND_OK(t_ret);
97 WT_TRET_NOTFOUND_OK(
98 cursor->reopen(cursor, false));
99 WT_TRET(cursor->close(cursor));
100 ++nclosed;
101 }
102 }
103
104 /*
105 * We continue sweeping as long as we have some good average
106 * productivity, or we are under the minimum.
107 */
108 productive = (nclosed + WT_SESSION_CURSOR_SWEEP_MIN > i);
109 }
110
111 session->cursor_sweep_position = position;
112 F_SET(session, WT_SESSION_CACHE_CURSORS);
113
114 WT_STAT_CONN_INCR(session, cursor_sweep);
115 WT_STAT_CONN_INCRV(session, cursor_sweep_buckets, nbuckets);
116 WT_STAT_CONN_INCRV(session, cursor_sweep_examined, nexamined);
117 WT_STAT_CONN_INCRV(session, cursor_sweep_closed, nclosed);
118
119 return (ret);
120 }
121
122 /*
123 * __wt_session_copy_values --
124 * Copy values into all positioned cursors, so that they don't keep
125 * transaction IDs pinned.
126 */
127 int
__wt_session_copy_values(WT_SESSION_IMPL * session)128 __wt_session_copy_values(WT_SESSION_IMPL *session)
129 {
130 WT_CURSOR *cursor;
131
132 TAILQ_FOREACH(cursor, &session->cursors, q)
133 if (F_ISSET(cursor, WT_CURSTD_VALUE_INT)) {
134 #ifdef HAVE_DIAGNOSTIC
135 /*
136 * We have to do this with a transaction ID pinned
137 * unless the cursor is reading from a checkpoint.
138 */
139 WT_TXN_STATE *txn_state = WT_SESSION_TXN_STATE(session);
140 WT_ASSERT(session,
141 txn_state->pinned_id != WT_TXN_NONE ||
142 (WT_PREFIX_MATCH(cursor->uri, "file:") &&
143 F_ISSET((WT_CURSOR_BTREE *)cursor, WT_CBT_NO_TXN)));
144 #endif
145 WT_RET(__cursor_localvalue(cursor));
146 }
147
148 return (0);
149 }
150
151 /*
152 * __wt_session_release_resources --
153 * Release common session resources.
154 */
155 int
__wt_session_release_resources(WT_SESSION_IMPL * session)156 __wt_session_release_resources(WT_SESSION_IMPL *session)
157 {
158 WT_DECL_RET;
159
160 /* Transaction cleanup */
161 __wt_txn_release_resources(session);
162
163 /* Block manager cleanup */
164 if (session->block_manager_cleanup != NULL)
165 WT_TRET(session->block_manager_cleanup(session));
166
167 /* Reconciliation cleanup */
168 if (session->reconcile_cleanup != NULL)
169 WT_TRET(session->reconcile_cleanup(session));
170
171 /* Stashed memory. */
172 __wt_stash_discard(session);
173
174 /*
175 * Discard scratch buffers, error memory; last, just in case a cleanup
176 * routine uses scratch buffers.
177 */
178 __wt_scr_discard(session);
179 __wt_buf_free(session, &session->err);
180
181 return (ret);
182 }
183
184 /*
185 * __session_clear --
186 * Clear a session structure.
187 */
188 static void
__session_clear(WT_SESSION_IMPL * session)189 __session_clear(WT_SESSION_IMPL *session)
190 {
191 /*
192 * There's no serialization support around the review of the hazard
193 * array, which means threads checking for hazard pointers first check
194 * the active field (which may be 0) and then use the hazard pointer
195 * (which cannot be NULL).
196 *
197 * Additionally, the session structure can include information that
198 * persists past the session's end-of-life, stored as part of page
199 * splits.
200 *
201 * For these reasons, be careful when clearing the session structure.
202 */
203 __wt_txn_clear_timestamp_queues(session);
204 memset(session, 0, WT_SESSION_CLEAR_SIZE);
205
206 WT_INIT_LSN(&session->bg_sync_lsn);
207
208 session->hazard_inuse = 0;
209 session->nhazard = 0;
210 }
211
212 /*
213 * __session_close_cursors --
214 * Close all cursors in a list.
215 */
216 static int
__session_close_cursors(WT_SESSION_IMPL * session,WT_CURSOR_LIST * cursors)217 __session_close_cursors(WT_SESSION_IMPL *session, WT_CURSOR_LIST *cursors)
218 {
219 WT_CURSOR *cursor, *cursor_tmp;
220 WT_DECL_RET;
221
222 /* Close all open cursors. */
223 WT_TAILQ_SAFE_REMOVE_BEGIN(cursor, cursors, q, cursor_tmp) {
224 if (F_ISSET(cursor, WT_CURSTD_CACHED))
225 /*
226 * Put the cached cursor in an open state
227 * that allows it to be closed.
228 */
229 WT_TRET_NOTFOUND_OK(cursor->reopen(cursor, false));
230 else if (session->event_handler->handle_close != NULL &&
231 strcmp(cursor->internal_uri, WT_LAS_URI) != 0)
232 /*
233 * Notify the user that we are closing the cursor
234 * handle via the registered close callback.
235 */
236 WT_TRET(session->event_handler->handle_close(
237 session->event_handler, &session->iface, cursor));
238
239 WT_TRET(cursor->close(cursor));
240 } WT_TAILQ_SAFE_REMOVE_END
241
242 return (ret);
243 }
244
245 /*
246 * __session_close_cached_cursors --
247 * Fully close all cached cursors.
248 */
249 static int
__session_close_cached_cursors(WT_SESSION_IMPL * session)250 __session_close_cached_cursors(WT_SESSION_IMPL *session)
251 {
252 WT_DECL_RET;
253 int i;
254
255 for (i = 0; i < WT_HASH_ARRAY_SIZE; i++)
256 WT_TRET(__session_close_cursors(session,
257 &session->cursor_cache[i]));
258 return (ret);
259 }
260
261 /*
262 * __session_close --
263 * WT_SESSION->close method.
264 */
265 static int
__session_close(WT_SESSION * wt_session,const char * config)266 __session_close(WT_SESSION *wt_session, const char *config)
267 {
268 WT_CONNECTION_IMPL *conn;
269 WT_DECL_RET;
270 WT_SESSION_IMPL *session;
271
272 conn = (WT_CONNECTION_IMPL *)wt_session->connection;
273 session = (WT_SESSION_IMPL *)wt_session;
274
275 SESSION_API_CALL_PREPARE_ALLOWED(session, close, config, cfg);
276 WT_UNUSED(cfg);
277
278 /* Close all open cursors while the cursor cache is disabled. */
279 F_CLR(session, WT_SESSION_CACHE_CURSORS);
280
281 /* Rollback any active transaction. */
282 if (F_ISSET(&session->txn, WT_TXN_RUNNING))
283 WT_TRET(__session_rollback_transaction(wt_session, NULL));
284
285 /*
286 * Also release any pinned transaction ID from a non-transactional
287 * operation.
288 */
289 if (conn->txn_global.states != NULL)
290 __wt_txn_release_snapshot(session);
291
292 /* Close all open cursors. */
293 WT_TRET(__session_close_cursors(session, &session->cursors));
294 WT_TRET(__session_close_cached_cursors(session));
295
296 WT_ASSERT(session, session->ncursors == 0);
297
298 /* Discard cached handles. */
299 __wt_session_close_cache(session);
300
301 /* Confirm we're not holding any hazard pointers. */
302 __wt_hazard_close(session);
303
304 /* Discard metadata tracking. */
305 __wt_meta_track_discard(session);
306
307 /* Free transaction information. */
308 __wt_txn_destroy(session);
309
310 /*
311 * Close the file where we tracked long operations. Do this before
312 * releasing resources, as we do scratch buffer management when we
313 * flush optrack buffers to disk.
314 */
315 if (F_ISSET(conn, WT_CONN_OPTRACK)) {
316 if (session->optrackbuf_ptr > 0) {
317 __wt_optrack_flush_buffer(session);
318 WT_TRET(__wt_close(session, &session->optrack_fh));
319 }
320
321 /* Free the operation tracking buffer */
322 __wt_free(session, session->optrack_buf);
323 }
324
325 /* Release common session resources. */
326 WT_TRET(__wt_session_release_resources(session));
327
328 /* The API lock protects opening and closing of sessions. */
329 __wt_spin_lock(session, &conn->api_lock);
330
331 /* Decrement the count of open sessions. */
332 WT_STAT_CONN_DECR(session, session_open);
333
334 /*
335 * Sessions are re-used, clear the structure: the clear sets the active
336 * field to 0, which will exclude the hazard array from review by the
337 * eviction thread. Because some session fields are accessed by other
338 * threads, the structure must be cleared carefully.
339 *
340 * We don't need to publish here, because regardless of the active field
341 * being non-zero, the hazard pointer is always valid.
342 */
343 __session_clear(session);
344 session = conn->default_session;
345
346 /*
347 * Decrement the count of active sessions if that's possible: a session
348 * being closed may or may not be at the end of the array, step toward
349 * the beginning of the array until we reach an active session.
350 */
351 while (conn->sessions[conn->session_cnt - 1].active == 0)
352 if (--conn->session_cnt == 0)
353 break;
354
355 __wt_spin_unlock(session, &conn->api_lock);
356
357 /* We no longer have a session, don't try to update it. */
358 session = NULL;
359
360 err: API_END_RET_NOTFOUND_MAP(session, ret);
361 }
362
363 /*
364 * __session_reconfigure --
365 * WT_SESSION->reconfigure method.
366 */
367 static int
__session_reconfigure(WT_SESSION * wt_session,const char * config)368 __session_reconfigure(WT_SESSION *wt_session, const char *config)
369 {
370 WT_CONFIG_ITEM cval;
371 WT_DECL_RET;
372 WT_SESSION_IMPL *session;
373
374 session = (WT_SESSION_IMPL *)wt_session;
375 /*
376 * Indicated as allowed in prepared state, even though not allowed,
377 * so that running transaction check below take precedence.
378 */
379 SESSION_API_CALL_PREPARE_ALLOWED(session, reconfigure, config, cfg);
380
381 /*
382 * Note that this method only checks keys that are passed in by the
383 * application: we don't want to reset other session settings to their
384 * default values.
385 */
386 WT_UNUSED(cfg);
387
388 WT_ERR(__wt_txn_context_check(session, false));
389
390 WT_ERR(__wt_session_reset_cursors(session, false));
391
392 WT_ERR(__wt_txn_reconfigure(session, config));
393
394 ret = __wt_config_getones(session, config, "ignore_cache_size", &cval);
395 if (ret == 0) {
396 if (cval.val)
397 F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE);
398 else
399 F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE);
400 }
401 WT_ERR_NOTFOUND_OK(ret);
402
403 ret = __wt_config_getones(session, config, "cache_cursors", &cval);
404 if (ret == 0) {
405 if (cval.val)
406 F_SET(session, WT_SESSION_CACHE_CURSORS);
407 else {
408 F_CLR(session, WT_SESSION_CACHE_CURSORS);
409 WT_ERR(__session_close_cached_cursors(session));
410 }
411 }
412 WT_ERR_NOTFOUND_OK(ret);
413
414 err: API_END_RET_NOTFOUND_MAP(session, ret);
415 }
416
417 /*
418 * __session_open_cursor_int --
419 * Internal version of WT_SESSION::open_cursor, with second cursor arg.
420 */
421 static int
__session_open_cursor_int(WT_SESSION_IMPL * session,const char * uri,WT_CURSOR * owner,WT_CURSOR * other,const char * cfg[],WT_CURSOR ** cursorp)422 __session_open_cursor_int(WT_SESSION_IMPL *session, const char *uri,
423 WT_CURSOR *owner, WT_CURSOR *other, const char *cfg[], WT_CURSOR **cursorp)
424 {
425 WT_COLGROUP *colgroup;
426 WT_DATA_SOURCE *dsrc;
427 WT_DECL_RET;
428
429 *cursorp = NULL;
430
431 /*
432 * Open specific cursor types we know about, or call the generic data
433 * source open function.
434 *
435 * Unwind a set of string comparisons into a switch statement hoping
436 * the compiler can make it fast, but list the common choices first
437 * instead of sorting so if/else patterns are still fast.
438 */
439 switch (uri[0]) {
440 /*
441 * Common cursor types.
442 */
443 case 't':
444 if (WT_PREFIX_MATCH(uri, "table:"))
445 WT_RET(__wt_curtable_open(
446 session, uri, owner, cfg, cursorp));
447 break;
448 case 'c':
449 if (WT_PREFIX_MATCH(uri, "colgroup:")) {
450 /*
451 * Column groups are a special case: open a cursor on
452 * the underlying data source.
453 */
454 WT_RET(__wt_schema_get_colgroup(
455 session, uri, false, NULL, &colgroup));
456 WT_RET(__wt_open_cursor(
457 session, colgroup->source, owner, cfg, cursorp));
458 } else if (WT_PREFIX_MATCH(uri, "config:"))
459 WT_RET(__wt_curconfig_open(
460 session, uri, cfg, cursorp));
461 break;
462 case 'i':
463 if (WT_PREFIX_MATCH(uri, "index:"))
464 WT_RET(__wt_curindex_open(
465 session, uri, owner, cfg, cursorp));
466 break;
467 case 'j':
468 if (WT_PREFIX_MATCH(uri, "join:"))
469 WT_RET(__wt_curjoin_open(
470 session, uri, owner, cfg, cursorp));
471 break;
472 case 'l':
473 if (WT_PREFIX_MATCH(uri, "lsm:"))
474 WT_RET(__wt_clsm_open(
475 session, uri, owner, cfg, cursorp));
476 else if (WT_PREFIX_MATCH(uri, "log:"))
477 WT_RET(__wt_curlog_open(session, uri, cfg, cursorp));
478 break;
479
480 /*
481 * Less common cursor types.
482 */
483 case 'f':
484 if (WT_PREFIX_MATCH(uri, "file:"))
485 WT_RET(__wt_curfile_open(
486 session, uri, owner, cfg, cursorp));
487 break;
488 case 'm':
489 if (WT_PREFIX_MATCH(uri, WT_METADATA_URI))
490 WT_RET(__wt_curmetadata_open(
491 session, uri, owner, cfg, cursorp));
492 break;
493 case 'b':
494 if (WT_PREFIX_MATCH(uri, "backup:"))
495 WT_RET(__wt_curbackup_open(
496 session, uri, cfg, cursorp));
497 break;
498 case 's':
499 if (WT_PREFIX_MATCH(uri, "statistics:"))
500 WT_RET(__wt_curstat_open(session, uri, other, cfg,
501 cursorp));
502 break;
503 default:
504 break;
505 }
506
507 if (*cursorp == NULL &&
508 (dsrc = __wt_schema_get_source(session, uri)) != NULL)
509 WT_RET(dsrc->open_cursor == NULL ?
510 __wt_object_unsupported(session, uri) :
511 __wt_curds_open(session, uri, owner, cfg, dsrc, cursorp));
512
513 if (*cursorp == NULL)
514 return (__wt_bad_object_type(session, uri));
515
516 if (owner != NULL) {
517 /*
518 * We support caching simple cursors that have no
519 * children. If this cursor is a child, we're not going
520 * to cache this child or its parent.
521 */
522 F_CLR(owner, WT_CURSTD_CACHEABLE);
523 F_CLR(*cursorp, WT_CURSTD_CACHEABLE);
524 }
525
526 /*
527 * When opening simple tables, the table code calls this function on the
528 * underlying data source, in which case the application's URI has been
529 * copied.
530 */
531 if ((*cursorp)->uri == NULL &&
532 (ret = __wt_strdup(session, uri, &(*cursorp)->uri)) != 0) {
533 WT_TRET((*cursorp)->close(*cursorp));
534 *cursorp = NULL;
535 }
536
537 return (ret);
538 }
539
540 /*
541 * __wt_open_cursor --
542 * Internal version of WT_SESSION::open_cursor.
543 */
544 int
__wt_open_cursor(WT_SESSION_IMPL * session,const char * uri,WT_CURSOR * owner,const char * cfg[],WT_CURSOR ** cursorp)545 __wt_open_cursor(WT_SESSION_IMPL *session,
546 const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp)
547 {
548 WT_DECL_RET;
549
550 /* We do not cache any subordinate tables/files cursors. */
551 if (owner == NULL) {
552 if ((ret = __wt_cursor_cache_get(
553 session, uri, NULL, cfg, cursorp)) == 0)
554 return (0);
555 WT_RET_NOTFOUND_OK(ret);
556 }
557
558 return (__session_open_cursor_int(session, uri, owner, NULL, cfg,
559 cursorp));
560 }
561
562 /*
563 * __session_open_cursor --
564 * WT_SESSION->open_cursor method.
565 */
566 static int
__session_open_cursor(WT_SESSION * wt_session,const char * uri,WT_CURSOR * to_dup,const char * config,WT_CURSOR ** cursorp)567 __session_open_cursor(WT_SESSION *wt_session,
568 const char *uri, WT_CURSOR *to_dup, const char *config, WT_CURSOR **cursorp)
569 {
570 WT_CURSOR *cursor;
571 WT_DECL_RET;
572 WT_SESSION_IMPL *session;
573 bool statjoin;
574
575 cursor = *cursorp = NULL;
576
577 session = (WT_SESSION_IMPL *)wt_session;
578 SESSION_API_CALL(session, open_cursor, config, cfg);
579
580 statjoin = (to_dup != NULL && uri != NULL &&
581 strcmp(uri, "statistics:join") == 0);
582 if (!statjoin) {
583 if ((to_dup == NULL && uri == NULL) ||
584 (to_dup != NULL && uri != NULL))
585 WT_ERR_MSG(session, EINVAL,
586 "should be passed either a URI or a cursor to "
587 "duplicate, but not both");
588
589 if ((ret = __wt_cursor_cache_get(
590 session, uri, to_dup, cfg, &cursor)) == 0)
591 goto done;
592 WT_ERR_NOTFOUND_OK(ret);
593
594 if (to_dup != NULL) {
595 uri = to_dup->uri;
596 if (!WT_PREFIX_MATCH(uri, "colgroup:") &&
597 !WT_PREFIX_MATCH(uri, "index:") &&
598 !WT_PREFIX_MATCH(uri, "file:") &&
599 !WT_PREFIX_MATCH(uri, "lsm:") &&
600 !WT_PREFIX_MATCH(uri, WT_METADATA_URI) &&
601 !WT_PREFIX_MATCH(uri, "table:") &&
602 __wt_schema_get_source(session, uri) == NULL)
603 WT_ERR(__wt_bad_object_type(session, uri));
604 }
605 }
606
607 WT_ERR(__session_open_cursor_int(session, uri, NULL,
608 statjoin ? to_dup : NULL, cfg, &cursor));
609
610 done:
611 if (to_dup != NULL && !statjoin)
612 WT_ERR(__wt_cursor_dup_position(to_dup, cursor));
613
614 *cursorp = cursor;
615
616 if (0) {
617 err: if (cursor != NULL)
618 WT_TRET(cursor->close(cursor));
619 }
620 /*
621 * Opening a cursor on a non-existent data source will set ret to
622 * either of ENOENT or WT_NOTFOUND at this point. However,
623 * applications may reasonably do this inside a transaction to check
624 * for the existence of a table or index.
625 *
626 * Failure in opening a cursor should not set an error on the
627 * transaction and WT_NOTFOUND will be mapped to ENOENT.
628 */
629
630 API_END_RET_NO_TXN_ERROR(session, ret);
631 }
632
633 /*
634 * __session_alter --
635 * Alter a table setting.
636 */
637 static int
__session_alter(WT_SESSION * wt_session,const char * uri,const char * config)638 __session_alter(WT_SESSION *wt_session, const char *uri, const char *config)
639 {
640 WT_DECL_RET;
641 WT_SESSION_IMPL *session;
642
643 session = (WT_SESSION_IMPL *)wt_session;
644
645 SESSION_API_CALL(session, alter, config, cfg);
646
647 /* In-memory ignores alter operations. */
648 if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
649 goto err;
650
651 /* Disallow objects in the WiredTiger name space. */
652 WT_ERR(__wt_str_name_check(session, uri));
653
654 /*
655 * We replace the default configuration listing with the current
656 * configuration. Otherwise the defaults for values that can be
657 * altered would override settings used by the user in create.
658 */
659 cfg[0] = cfg[1];
660 cfg[1] = NULL;
661 WT_WITH_CHECKPOINT_LOCK(session,
662 WT_WITH_SCHEMA_LOCK(session,
663 ret = __wt_schema_alter(session, uri, cfg)));
664
665 err:
666 if (ret != 0)
667 WT_STAT_CONN_INCR(session, session_table_alter_fail);
668 else
669 WT_STAT_CONN_INCR(session, session_table_alter_success);
670 API_END_RET_NOTFOUND_MAP(session, ret);
671 }
672
673 /*
674 * __session_alter_readonly --
675 * WT_SESSION->alter method; readonly version.
676 */
677 static int
__session_alter_readonly(WT_SESSION * wt_session,const char * uri,const char * config)678 __session_alter_readonly(
679 WT_SESSION *wt_session, const char *uri, const char *config)
680 {
681 WT_DECL_RET;
682 WT_SESSION_IMPL *session;
683
684 WT_UNUSED(uri);
685 WT_UNUSED(config);
686
687 session = (WT_SESSION_IMPL *)wt_session;
688 SESSION_API_CALL_NOCONF(session, alter);
689
690 WT_STAT_CONN_INCR(session, session_table_alter_fail);
691 ret = __wt_session_notsup(session);
692 err: API_END_RET(session, ret);
693 }
694
695 /*
696 * __wt_session_create --
697 * Internal version of WT_SESSION::create.
698 */
699 int
__wt_session_create(WT_SESSION_IMPL * session,const char * uri,const char * config)700 __wt_session_create(
701 WT_SESSION_IMPL *session, const char *uri, const char *config)
702 {
703 WT_DECL_RET;
704
705 WT_WITH_SCHEMA_LOCK(session,
706 WT_WITH_TABLE_WRITE_LOCK(session,
707 ret = __wt_schema_create(session, uri, config)));
708 return (ret);
709 }
710
711 /*
712 * __session_create --
713 * WT_SESSION->create method.
714 */
715 static int
__session_create(WT_SESSION * wt_session,const char * uri,const char * config)716 __session_create(WT_SESSION *wt_session, const char *uri, const char *config)
717 {
718 WT_CONFIG_ITEM cval;
719 WT_DECL_RET;
720 WT_SESSION_IMPL *session;
721
722 session = (WT_SESSION_IMPL *)wt_session;
723 SESSION_API_CALL(session, create, config, cfg);
724 WT_UNUSED(cfg);
725
726 /* Disallow objects in the WiredTiger name space. */
727 WT_ERR(__wt_str_name_check(session, uri));
728
729 /*
730 * Type configuration only applies to tables, column groups and indexes.
731 * We don't want applications to attempt to layer LSM on top of their
732 * extended data-sources, and the fact we allow LSM as a valid URI is an
733 * invitation to that mistake: nip it in the bud.
734 */
735 if (!WT_PREFIX_MATCH(uri, "colgroup:") &&
736 !WT_PREFIX_MATCH(uri, "index:") &&
737 !WT_PREFIX_MATCH(uri, "table:")) {
738 /*
739 * We can't disallow type entirely, a configuration string might
740 * innocently include it, for example, a dump/load pair. If the
741 * underlying type is "file", it's OK ("file" is the underlying
742 * type for every type); if the URI type prefix and the type are
743 * the same, let it go.
744 */
745 if ((ret =
746 __wt_config_getones(session, config, "type", &cval)) == 0 &&
747 !WT_STRING_MATCH("file", cval.str, cval.len) &&
748 (strncmp(uri, cval.str, cval.len) != 0 ||
749 uri[cval.len] != ':'))
750 WT_ERR_MSG(session, EINVAL,
751 "%s: unsupported type configuration", uri);
752 WT_ERR_NOTFOUND_OK(ret);
753 }
754
755 ret = __wt_session_create(session, uri, config);
756
757 err:
758 if (ret != 0)
759 WT_STAT_CONN_INCR(session, session_table_create_fail);
760 else
761 WT_STAT_CONN_INCR(session, session_table_create_success);
762 API_END_RET_NOTFOUND_MAP(session, ret);
763 }
764
765 /*
766 * __session_create_readonly --
767 * WT_SESSION->create method; readonly version.
768 */
769 static int
__session_create_readonly(WT_SESSION * wt_session,const char * uri,const char * config)770 __session_create_readonly(
771 WT_SESSION *wt_session, const char *uri, const char *config)
772 {
773 WT_DECL_RET;
774 WT_SESSION_IMPL *session;
775
776 WT_UNUSED(uri);
777 WT_UNUSED(config);
778
779 session = (WT_SESSION_IMPL *)wt_session;
780 SESSION_API_CALL_NOCONF(session, create);
781
782 WT_STAT_CONN_INCR(session, session_table_create_fail);
783 ret = __wt_session_notsup(session);
784 err: API_END_RET(session, ret);
785 }
786
787 /*
788 * __session_log_flush --
789 * WT_SESSION->log_flush method.
790 */
791 static int
__session_log_flush(WT_SESSION * wt_session,const char * config)792 __session_log_flush(WT_SESSION *wt_session, const char *config)
793 {
794 WT_CONFIG_ITEM cval;
795 WT_CONNECTION_IMPL *conn;
796 WT_DECL_RET;
797 WT_SESSION_IMPL *session;
798 uint32_t flags;
799
800 session = (WT_SESSION_IMPL *)wt_session;
801 SESSION_API_CALL(session, log_flush, config, cfg);
802 WT_STAT_CONN_INCR(session, log_flush);
803
804 conn = S2C(session);
805 flags = 0;
806 /*
807 * If logging is not enabled there is nothing to do.
808 */
809 if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
810 WT_ERR_MSG(session, EINVAL, "logging not enabled");
811
812 WT_ERR(__wt_config_gets_def(session, cfg, "sync", 0, &cval));
813 if (WT_STRING_MATCH("background", cval.str, cval.len))
814 flags = WT_LOG_BACKGROUND;
815 else if (WT_STRING_MATCH("off", cval.str, cval.len))
816 flags = WT_LOG_FLUSH;
817 else if (WT_STRING_MATCH("on", cval.str, cval.len))
818 flags = WT_LOG_FSYNC;
819 ret = __wt_log_flush(session, flags);
820
821 err: API_END_RET(session, ret);
822 }
823
824 /*
825 * __session_log_flush_readonly --
826 * WT_SESSION->log_flush method; readonly version.
827 */
828 static int
__session_log_flush_readonly(WT_SESSION * wt_session,const char * config)829 __session_log_flush_readonly(WT_SESSION *wt_session, const char *config)
830 {
831 WT_DECL_RET;
832 WT_SESSION_IMPL *session;
833
834 WT_UNUSED(config);
835
836 session = (WT_SESSION_IMPL *)wt_session;
837 SESSION_API_CALL_NOCONF(session, log_flush);
838
839 ret = __wt_session_notsup(session);
840 err: API_END_RET(session, ret);
841 }
842
843 /*
844 * __session_log_printf --
845 * WT_SESSION->log_printf method.
846 */
847 static int
__session_log_printf(WT_SESSION * wt_session,const char * fmt,...)848 __session_log_printf(WT_SESSION *wt_session, const char *fmt, ...)
849 WT_GCC_FUNC_ATTRIBUTE((format (printf, 2, 3)))
850 {
851 WT_DECL_RET;
852 WT_SESSION_IMPL *session;
853 va_list ap;
854
855 session = (WT_SESSION_IMPL *)wt_session;
856 SESSION_API_CALL_NOCONF_PREPARE_NOT_ALLOWED(session, log_printf);
857
858 va_start(ap, fmt);
859 ret = __wt_log_vprintf(session, fmt, ap);
860 va_end(ap);
861
862 err: API_END_RET(session, ret);
863 }
864
865 /*
866 * __session_log_printf_readonly --
867 * WT_SESSION->log_printf method; readonly version.
868 */
869 static int
__session_log_printf_readonly(WT_SESSION * wt_session,const char * fmt,...)870 __session_log_printf_readonly(WT_SESSION *wt_session, const char *fmt, ...)
871 WT_GCC_FUNC_ATTRIBUTE((format (printf, 2, 3)))
872 {
873 WT_DECL_RET;
874 WT_SESSION_IMPL *session;
875
876 WT_UNUSED(fmt);
877
878 session = (WT_SESSION_IMPL *)wt_session;
879 SESSION_API_CALL_NOCONF(session, log_printf);
880
881 ret = __wt_session_notsup(session);
882 err: API_END_RET(session, ret);
883 }
884
885 /*
886 * __session_rebalance --
887 * WT_SESSION->rebalance method.
888 */
889 static int
__session_rebalance(WT_SESSION * wt_session,const char * uri,const char * config)890 __session_rebalance(WT_SESSION *wt_session, const char *uri, const char *config)
891 {
892 WT_DECL_RET;
893 WT_SESSION_IMPL *session;
894
895 session = (WT_SESSION_IMPL *)wt_session;
896
897 SESSION_API_CALL(session, rebalance, config, cfg);
898
899 /* In-memory ignores rebalance operations. */
900 if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
901 goto err;
902
903 /* Block out checkpoints to avoid spurious EBUSY errors. */
904 WT_WITH_CHECKPOINT_LOCK(session,
905 WT_WITH_SCHEMA_LOCK(session,
906 ret = __wt_schema_worker(session, uri, __wt_bt_rebalance,
907 NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_REBALANCE)));
908
909 err:
910 if (ret != 0)
911 WT_STAT_CONN_INCR(session, session_table_rebalance_fail);
912 else
913 WT_STAT_CONN_INCR(session, session_table_rebalance_success);
914 API_END_RET_NOTFOUND_MAP(session, ret);
915 }
916
917 /*
918 * __session_rebalance_readonly --
919 * WT_SESSION->rebalance method; readonly version.
920 */
921 static int
__session_rebalance_readonly(WT_SESSION * wt_session,const char * uri,const char * config)922 __session_rebalance_readonly(
923 WT_SESSION *wt_session, const char *uri, const char *config)
924 {
925 WT_DECL_RET;
926 WT_SESSION_IMPL *session;
927
928 WT_UNUSED(uri);
929 WT_UNUSED(config);
930
931 session = (WT_SESSION_IMPL *)wt_session;
932 SESSION_API_CALL_NOCONF(session, rebalance);
933
934 WT_STAT_CONN_INCR(session, session_table_rebalance_fail);
935 ret = __wt_session_notsup(session);
936 err: API_END_RET(session, ret);
937 }
938
939 /*
940 * __session_rename --
941 * WT_SESSION->rename method.
942 */
943 static int
__session_rename(WT_SESSION * wt_session,const char * uri,const char * newuri,const char * config)944 __session_rename(WT_SESSION *wt_session,
945 const char *uri, const char *newuri, const char *config)
946 {
947 WT_DECL_RET;
948 WT_SESSION_IMPL *session;
949
950 session = (WT_SESSION_IMPL *)wt_session;
951 SESSION_API_CALL(session, rename, config, cfg);
952
953 /* Disallow objects in the WiredTiger name space. */
954 WT_ERR(__wt_str_name_check(session, uri));
955 WT_ERR(__wt_str_name_check(session, newuri));
956
957 WT_WITH_CHECKPOINT_LOCK(session,
958 WT_WITH_SCHEMA_LOCK(session,
959 WT_WITH_TABLE_WRITE_LOCK(session,
960 ret = __wt_schema_rename(session, uri, newuri, cfg))));
961 err:
962 if (ret != 0)
963 WT_STAT_CONN_INCR(session, session_table_rename_fail);
964 else
965 WT_STAT_CONN_INCR(session, session_table_rename_success);
966 API_END_RET_NOTFOUND_MAP(session, ret);
967 }
968
969 /*
970 * __session_rename_readonly --
971 * WT_SESSION->rename method; readonly version.
972 */
973 static int
__session_rename_readonly(WT_SESSION * wt_session,const char * uri,const char * newuri,const char * config)974 __session_rename_readonly(WT_SESSION *wt_session,
975 const char *uri, const char *newuri, const char *config)
976 {
977 WT_DECL_RET;
978 WT_SESSION_IMPL *session;
979
980 WT_UNUSED(uri);
981 WT_UNUSED(newuri);
982 WT_UNUSED(config);
983
984 session = (WT_SESSION_IMPL *)wt_session;
985 SESSION_API_CALL_NOCONF(session, rename);
986
987 WT_STAT_CONN_INCR(session, session_table_rename_fail);
988 ret = __wt_session_notsup(session);
989 err: API_END_RET(session, ret);
990 }
991
992 /*
993 * __session_reset --
994 * WT_SESSION->reset method.
995 */
996 static int
__session_reset(WT_SESSION * wt_session)997 __session_reset(WT_SESSION *wt_session)
998 {
999 WT_DECL_RET;
1000 WT_SESSION_IMPL *session;
1001
1002 session = (WT_SESSION_IMPL *)wt_session;
1003
1004 SESSION_API_CALL_NOCONF(session, reset);
1005
1006 WT_ERR(__wt_txn_context_check(session, false));
1007
1008 WT_TRET(__wt_session_reset_cursors(session, true));
1009
1010 WT_TRET(__wt_session_cursor_cache_sweep(session));
1011
1012 /* Release common session resources. */
1013 WT_TRET(__wt_session_release_resources(session));
1014
1015 err: API_END_RET_NOTFOUND_MAP(session, ret);
1016 }
1017
1018 /*
1019 * __session_drop --
1020 * WT_SESSION->drop method.
1021 */
1022 static int
__session_drop(WT_SESSION * wt_session,const char * uri,const char * config)1023 __session_drop(WT_SESSION *wt_session, const char *uri, const char *config)
1024 {
1025 WT_CONFIG_ITEM cval;
1026 WT_DECL_RET;
1027 WT_SESSION_IMPL *session;
1028 bool checkpoint_wait, lock_wait;
1029
1030 session = (WT_SESSION_IMPL *)wt_session;
1031 SESSION_API_CALL(session, drop, config, cfg);
1032
1033 /* Disallow objects in the WiredTiger name space. */
1034 WT_ERR(__wt_str_name_check(session, uri));
1035
1036 WT_ERR(__wt_config_gets_def(session, cfg, "checkpoint_wait", 1, &cval));
1037 checkpoint_wait = cval.val != 0;
1038 WT_ERR(__wt_config_gets_def(session, cfg, "lock_wait", 1, &cval));
1039 lock_wait = cval.val != 0;
1040
1041 /*
1042 * Take the checkpoint lock if there is a need to prevent the drop
1043 * operation from failing with EBUSY due to an ongoing checkpoint.
1044 */
1045 if (checkpoint_wait) {
1046 if (lock_wait)
1047 WT_WITH_CHECKPOINT_LOCK(session,
1048 WT_WITH_SCHEMA_LOCK(session,
1049 WT_WITH_TABLE_WRITE_LOCK(session, ret =
1050 __wt_schema_drop(session, uri, cfg))));
1051 else
1052 WT_WITH_CHECKPOINT_LOCK_NOWAIT(session, ret,
1053 WT_WITH_SCHEMA_LOCK_NOWAIT(session, ret,
1054 WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret,
1055 ret =
1056 __wt_schema_drop(session, uri, cfg))));
1057 } else {
1058 if (lock_wait)
1059 WT_WITH_SCHEMA_LOCK(session,
1060 WT_WITH_TABLE_WRITE_LOCK(session,
1061 ret = __wt_schema_drop(session, uri, cfg)));
1062 else
1063 WT_WITH_SCHEMA_LOCK_NOWAIT(session, ret,
1064 WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret,
1065 ret = __wt_schema_drop(session, uri, cfg)));
1066 }
1067
1068 err:
1069 if (ret != 0)
1070 WT_STAT_CONN_INCR(session, session_table_drop_fail);
1071 else
1072 WT_STAT_CONN_INCR(session, session_table_drop_success);
1073
1074 /* Note: drop operations cannot be unrolled (yet?). */
1075 API_END_RET_NOTFOUND_MAP(session, ret);
1076 }
1077
1078 /*
1079 * __session_drop_readonly --
1080 * WT_SESSION->drop method; readonly version.
1081 */
1082 static int
__session_drop_readonly(WT_SESSION * wt_session,const char * uri,const char * config)1083 __session_drop_readonly(
1084 WT_SESSION *wt_session, const char *uri, const char *config)
1085 {
1086 WT_DECL_RET;
1087 WT_SESSION_IMPL *session;
1088
1089 WT_UNUSED(uri);
1090 WT_UNUSED(config);
1091
1092 session = (WT_SESSION_IMPL *)wt_session;
1093 SESSION_API_CALL_NOCONF(session, drop);
1094
1095 WT_STAT_CONN_INCR(session, session_table_drop_fail);
1096 ret = __wt_session_notsup(session);
1097 err: API_END_RET(session, ret);
1098 }
1099
1100 /*
1101 * __session_join --
1102 * WT_SESSION->join method.
1103 */
1104 static int
__session_join(WT_SESSION * wt_session,WT_CURSOR * join_cursor,WT_CURSOR * ref_cursor,const char * config)1105 __session_join(WT_SESSION *wt_session, WT_CURSOR *join_cursor,
1106 WT_CURSOR *ref_cursor, const char *config)
1107 {
1108 WT_CONFIG_ITEM cval;
1109 WT_CURSOR *firstcg;
1110 WT_CURSOR_INDEX *cindex;
1111 WT_CURSOR_JOIN *cjoin;
1112 WT_CURSOR_TABLE *ctable;
1113 WT_DECL_RET;
1114 WT_INDEX *idx;
1115 WT_SESSION_IMPL *session;
1116 WT_TABLE *table;
1117 uint64_t count;
1118 uint32_t bloom_bit_count, bloom_hash_count;
1119 uint8_t flags, range;
1120 bool nested;
1121
1122 session = (WT_SESSION_IMPL *)wt_session;
1123 SESSION_API_CALL(session, join, config, cfg);
1124
1125 firstcg = NULL;
1126 table = NULL;
1127 nested = false;
1128 count = 0;
1129
1130 if (!WT_PREFIX_MATCH(join_cursor->uri, "join:"))
1131 WT_ERR_MSG(session, EINVAL, "not a join cursor");
1132
1133 if (WT_PREFIX_MATCH(ref_cursor->uri, "index:")) {
1134 cindex = (WT_CURSOR_INDEX *)ref_cursor;
1135 idx = cindex->index;
1136 table = cindex->table;
1137 firstcg = cindex->cg_cursors[0];
1138 } else if (WT_PREFIX_MATCH(ref_cursor->uri, "table:")) {
1139 idx = NULL;
1140 ctable = (WT_CURSOR_TABLE *)ref_cursor;
1141 table = ctable->table;
1142 firstcg = ctable->cg_cursors[0];
1143 } else if (WT_PREFIX_MATCH(ref_cursor->uri, "join:")) {
1144 idx = NULL;
1145 table = ((WT_CURSOR_JOIN *)ref_cursor)->table;
1146 nested = true;
1147 } else
1148 WT_ERR_MSG(session, EINVAL,
1149 "ref_cursor must be an index, table or join cursor");
1150
1151 if (firstcg != NULL && !F_ISSET(firstcg, WT_CURSTD_KEY_SET))
1152 WT_ERR_MSG(session, EINVAL,
1153 "requires reference cursor be positioned");
1154 cjoin = (WT_CURSOR_JOIN *)join_cursor;
1155 if (cjoin->table != table)
1156 WT_ERR_MSG(session, EINVAL,
1157 "table for join cursor does not match table for "
1158 "ref_cursor");
1159 if (F_ISSET(ref_cursor, WT_CURSTD_JOINED))
1160 WT_ERR_MSG(session, EINVAL, "cursor already used in a join");
1161
1162 /* "ge" is the default */
1163 range = WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ;
1164 flags = 0;
1165 WT_ERR(__wt_config_gets(session, cfg, "compare", &cval));
1166 if (cval.len != 0) {
1167 if (WT_STRING_MATCH("gt", cval.str, cval.len))
1168 range = WT_CURJOIN_END_GT;
1169 else if (WT_STRING_MATCH("lt", cval.str, cval.len))
1170 range = WT_CURJOIN_END_LT;
1171 else if (WT_STRING_MATCH("le", cval.str, cval.len))
1172 range = WT_CURJOIN_END_LE;
1173 else if (WT_STRING_MATCH("eq", cval.str, cval.len))
1174 range = WT_CURJOIN_END_EQ;
1175 else if (!WT_STRING_MATCH("ge", cval.str, cval.len))
1176 WT_ERR_MSG(session, EINVAL,
1177 "compare=%.*s not supported",
1178 (int)cval.len, cval.str);
1179 }
1180 WT_ERR(__wt_config_gets(session, cfg, "count", &cval));
1181 if (cval.len != 0)
1182 count = (uint64_t)cval.val;
1183
1184 WT_ERR(__wt_config_gets(session, cfg, "strategy", &cval));
1185 if (cval.len != 0) {
1186 if (WT_STRING_MATCH("bloom", cval.str, cval.len))
1187 LF_SET(WT_CURJOIN_ENTRY_BLOOM);
1188 else if (!WT_STRING_MATCH("default", cval.str, cval.len))
1189 WT_ERR_MSG(session, EINVAL,
1190 "strategy=%.*s not supported",
1191 (int)cval.len, cval.str);
1192 }
1193 WT_ERR(__wt_config_gets(session, cfg, "bloom_bit_count", &cval));
1194 if ((uint64_t)cval.val > UINT32_MAX)
1195 WT_ERR_MSG(session, EINVAL, "bloom_bit_count: value too large");
1196 bloom_bit_count = (uint32_t)cval.val;
1197 WT_ERR(__wt_config_gets(session, cfg, "bloom_hash_count", &cval));
1198 if ((uint64_t)cval.val > UINT32_MAX)
1199 WT_ERR_MSG(session, EINVAL,
1200 "bloom_hash_count: value too large");
1201 bloom_hash_count = (uint32_t)cval.val;
1202 if (LF_ISSET(WT_CURJOIN_ENTRY_BLOOM) && count == 0)
1203 WT_ERR_MSG(session, EINVAL,
1204 "count must be nonzero when strategy=bloom");
1205 WT_ERR(__wt_config_gets_def(
1206 session, cfg, "bloom_false_positives", 0, &cval));
1207 if (cval.val != 0)
1208 LF_SET(WT_CURJOIN_ENTRY_FALSE_POSITIVES);
1209
1210 WT_ERR(__wt_config_gets(session, cfg, "operation", &cval));
1211 if (cval.len != 0 && WT_STRING_MATCH("or", cval.str, cval.len))
1212 LF_SET(WT_CURJOIN_ENTRY_DISJUNCTION);
1213
1214 if (nested && (count != 0 || range != WT_CURJOIN_END_EQ ||
1215 LF_ISSET(WT_CURJOIN_ENTRY_BLOOM)))
1216 WT_ERR_MSG(session, EINVAL,
1217 "joining a nested join cursor is incompatible with "
1218 "setting \"strategy\", \"compare\" or \"count\"");
1219
1220 WT_ERR(__wt_curjoin_join(session, cjoin, idx, ref_cursor, flags,
1221 range, count, bloom_bit_count, bloom_hash_count));
1222 /*
1223 * There's an implied ownership ordering that isn't
1224 * known when the cursors are created: the join cursor
1225 * must be closed before any of the indices. Enforce
1226 * that here by reordering.
1227 */
1228 if (TAILQ_FIRST(&session->cursors) != join_cursor) {
1229 TAILQ_REMOVE(&session->cursors, join_cursor, q);
1230 TAILQ_INSERT_HEAD(&session->cursors, join_cursor, q);
1231 }
1232 /* Disable the reference cursor for regular operations */
1233 F_SET(ref_cursor, WT_CURSTD_JOINED);
1234
1235 err: API_END_RET_NOTFOUND_MAP(session, ret);
1236 }
1237
1238 /*
1239 * __session_salvage --
1240 * WT_SESSION->salvage method.
1241 */
1242 static int
__session_salvage(WT_SESSION * wt_session,const char * uri,const char * config)1243 __session_salvage(WT_SESSION *wt_session, const char *uri, const char *config)
1244 {
1245 WT_DECL_RET;
1246 WT_SESSION_IMPL *session;
1247
1248 session = (WT_SESSION_IMPL *)wt_session;
1249
1250 SESSION_API_CALL(session, salvage, config, cfg);
1251
1252 WT_ERR(__wt_inmem_unsupported_op(session, NULL));
1253
1254 /* Block out checkpoints to avoid spurious EBUSY errors. */
1255 WT_WITH_CHECKPOINT_LOCK(session,
1256 WT_WITH_SCHEMA_LOCK(session,
1257 ret = __wt_schema_worker(session, uri, __wt_salvage,
1258 NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_SALVAGE)));
1259
1260 err:
1261 if (ret != 0)
1262 WT_STAT_CONN_INCR(session, session_table_salvage_fail);
1263 else
1264 WT_STAT_CONN_INCR(session, session_table_salvage_success);
1265 API_END_RET_NOTFOUND_MAP(session, ret);
1266 }
1267
1268 /*
1269 * __session_salvage_readonly --
1270 * WT_SESSION->salvage method; readonly version.
1271 */
1272 static int
__session_salvage_readonly(WT_SESSION * wt_session,const char * uri,const char * config)1273 __session_salvage_readonly(
1274 WT_SESSION *wt_session, const char *uri, const char *config)
1275 {
1276 WT_DECL_RET;
1277 WT_SESSION_IMPL *session;
1278
1279 WT_UNUSED(uri);
1280 WT_UNUSED(config);
1281
1282 session = (WT_SESSION_IMPL *)wt_session;
1283 SESSION_API_CALL_NOCONF(session, salvage);
1284
1285 WT_STAT_CONN_INCR(session, session_table_salvage_fail);
1286 ret = __wt_session_notsup(session);
1287 err: API_END_RET(session, ret);
1288 }
1289
1290 /*
1291 * __wt_session_range_truncate --
1292 * Session handling of a range truncate.
1293 */
1294 int
__wt_session_range_truncate(WT_SESSION_IMPL * session,const char * uri,WT_CURSOR * start,WT_CURSOR * stop)1295 __wt_session_range_truncate(WT_SESSION_IMPL *session,
1296 const char *uri, WT_CURSOR *start, WT_CURSOR *stop)
1297 {
1298 WT_DECL_RET;
1299 int cmp;
1300 bool local_start;
1301
1302 local_start = false;
1303 if (uri != NULL) {
1304 WT_ASSERT(session, WT_PREFIX_MATCH(uri, "file:"));
1305 /*
1306 * A URI file truncate becomes a range truncate where we
1307 * set a start cursor at the beginning. We already
1308 * know the NULL stop goes to the end of the range.
1309 */
1310 WT_ERR(__session_open_cursor(
1311 (WT_SESSION *)session, uri, NULL, NULL, &start));
1312 local_start = true;
1313 ret = start->next(start);
1314 if (ret == WT_NOTFOUND) {
1315 /*
1316 * If there are no elements, there is nothing
1317 * to do.
1318 */
1319 ret = 0;
1320 goto done;
1321 }
1322 WT_ERR(ret);
1323 }
1324
1325 /*
1326 * Cursor truncate is only supported for some objects, check for a
1327 * supporting compare method.
1328 */
1329 if (start != NULL && start->compare == NULL)
1330 WT_ERR(__wt_bad_object_type(session, start->uri));
1331 if (stop != NULL && stop->compare == NULL)
1332 WT_ERR(__wt_bad_object_type(session, stop->uri));
1333
1334 /*
1335 * If both cursors set, check they're correctly ordered with respect to
1336 * each other. We have to test this before any search, the search can
1337 * change the initial cursor position.
1338 *
1339 * Rather happily, the compare routine will also confirm the cursors
1340 * reference the same object and the keys are set.
1341 *
1342 * The test for a NULL start comparison function isn't necessary (we
1343 * checked it above), but it quiets clang static analysis complaints.
1344 */
1345 if (start != NULL && stop != NULL && start->compare != NULL) {
1346 WT_ERR(start->compare(start, stop, &cmp));
1347 if (cmp > 0)
1348 WT_ERR_MSG(session, EINVAL,
1349 "the start cursor position is after the stop "
1350 "cursor position");
1351 }
1352
1353 /*
1354 * Truncate does not require keys actually exist so that applications
1355 * can discard parts of the object's name space without knowing exactly
1356 * what records currently appear in the object. For this reason, do a
1357 * search-near, rather than a search. Additionally, we have to correct
1358 * after calling search-near, to position the start/stop cursors on the
1359 * next record greater than/less than the original key. If we fail to
1360 * find a key in a search-near, there are no keys in the table. If we
1361 * fail to move forward or backward in a range, there are no keys in
1362 * the range. In either of those cases, we're done.
1363 */
1364 if (start != NULL)
1365 if ((ret = start->search_near(start, &cmp)) != 0 ||
1366 (cmp < 0 && (ret = start->next(start)) != 0)) {
1367 WT_ERR_NOTFOUND_OK(ret);
1368 goto done;
1369 }
1370 if (stop != NULL)
1371 if ((ret = stop->search_near(stop, &cmp)) != 0 ||
1372 (cmp > 0 && (ret = stop->prev(stop)) != 0)) {
1373 WT_ERR_NOTFOUND_OK(ret);
1374 goto done;
1375 }
1376
1377 /*
1378 * We always truncate in the forward direction because the underlying
1379 * data structures can move through pages faster forward than backward.
1380 * If we don't have a start cursor, create one and position it at the
1381 * first record.
1382 *
1383 * If start is NULL, stop must not be NULL, but static analyzers have
1384 * a hard time with that, test explicitly.
1385 */
1386 if (start == NULL && stop != NULL) {
1387 WT_ERR(__session_open_cursor(
1388 (WT_SESSION *)session, stop->uri, NULL, NULL, &start));
1389 local_start = true;
1390 WT_ERR(start->next(start));
1391 }
1392
1393 /*
1394 * If the start/stop keys cross, we're done, the range must be empty.
1395 */
1396 if (stop != NULL) {
1397 WT_ERR(start->compare(start, stop, &cmp));
1398 if (cmp > 0)
1399 goto done;
1400 }
1401
1402 WT_ERR(__wt_schema_range_truncate(session, start, stop));
1403
1404 done:
1405 err: /*
1406 * Close any locally-opened start cursor.
1407 *
1408 * Reset application cursors, they've possibly moved and the
1409 * application cannot use them. Note that we can make it here with a
1410 * NULL start cursor (e.g., if the truncate range is empty).
1411 */
1412 if (local_start)
1413 WT_TRET(start->close(start));
1414 else if (start != NULL)
1415 WT_TRET(start->reset(start));
1416 if (stop != NULL)
1417 WT_TRET(stop->reset(stop));
1418 return (ret);
1419 }
1420
1421 /*
1422 * __session_truncate --
1423 * WT_SESSION->truncate method.
1424 */
1425 static int
__session_truncate(WT_SESSION * wt_session,const char * uri,WT_CURSOR * start,WT_CURSOR * stop,const char * config)1426 __session_truncate(WT_SESSION *wt_session,
1427 const char *uri, WT_CURSOR *start, WT_CURSOR *stop, const char *config)
1428 {
1429 WT_DECL_RET;
1430 WT_SESSION_IMPL *session;
1431
1432 session = (WT_SESSION_IMPL *)wt_session;
1433 SESSION_TXN_API_CALL(session, truncate, config, cfg);
1434 WT_STAT_CONN_INCR(session, cursor_truncate);
1435
1436 /*
1437 * If the URI is specified, we don't need a start/stop, if start/stop
1438 * is specified, we don't need a URI. One exception is the log URI
1439 * which may truncate (archive) log files for a backup cursor.
1440 *
1441 * If no URI is specified, and both cursors are specified, start/stop
1442 * must reference the same object.
1443 *
1444 * Any specified cursor must have been initialized.
1445 */
1446 if ((uri == NULL && start == NULL && stop == NULL) ||
1447 (uri != NULL && !WT_PREFIX_MATCH(uri, "log:") &&
1448 (start != NULL || stop != NULL)))
1449 WT_ERR_MSG(session, EINVAL,
1450 "the truncate method should be passed either a URI or "
1451 "start/stop cursors, but not both");
1452
1453 if (uri != NULL) {
1454 /* Disallow objects in the WiredTiger name space. */
1455 WT_ERR(__wt_str_name_check(session, uri));
1456
1457 if (WT_PREFIX_MATCH(uri, "log:")) {
1458 /*
1459 * Verify the user only gave the URI prefix and not
1460 * a specific target name after that.
1461 */
1462 if (strcmp(uri, "log:") != 0)
1463 WT_ERR_MSG(session, EINVAL,
1464 "the truncate method should not specify any"
1465 "target after the log: URI prefix");
1466 WT_ERR(__wt_log_truncate_files(session, start, false));
1467 } else if (WT_PREFIX_MATCH(uri, "file:"))
1468 WT_ERR(__wt_session_range_truncate(
1469 session, uri, start, stop));
1470 else
1471 /* Wait for checkpoints to avoid EBUSY errors. */
1472 WT_WITH_CHECKPOINT_LOCK(session,
1473 WT_WITH_SCHEMA_LOCK(session,
1474 ret = __wt_schema_truncate(session, uri, cfg)));
1475 } else
1476 WT_ERR(__wt_session_range_truncate(session, uri, start, stop));
1477
1478 err: TXN_API_END_RETRY(session, ret, 0);
1479
1480 if (ret != 0)
1481 WT_STAT_CONN_INCR(session, session_table_truncate_fail);
1482 else
1483 WT_STAT_CONN_INCR(session, session_table_truncate_success);
1484 /*
1485 * Only map WT_NOTFOUND to ENOENT if a URI was specified.
1486 */
1487 return (ret == WT_NOTFOUND && uri != NULL ? ENOENT : ret);
1488 }
1489
1490 /*
1491 * __session_truncate_readonly --
1492 * WT_SESSION->truncate method; readonly version.
1493 */
1494 static int
__session_truncate_readonly(WT_SESSION * wt_session,const char * uri,WT_CURSOR * start,WT_CURSOR * stop,const char * config)1495 __session_truncate_readonly(WT_SESSION *wt_session,
1496 const char *uri, WT_CURSOR *start, WT_CURSOR *stop, const char *config)
1497 {
1498 WT_DECL_RET;
1499 WT_SESSION_IMPL *session;
1500
1501 WT_UNUSED(uri);
1502 WT_UNUSED(start);
1503 WT_UNUSED(stop);
1504 WT_UNUSED(config);
1505
1506 session = (WT_SESSION_IMPL *)wt_session;
1507 SESSION_API_CALL_NOCONF(session, truncate);
1508
1509 WT_STAT_CONN_INCR(session, session_table_truncate_fail);
1510 ret = __wt_session_notsup(session);
1511 err: API_END_RET(session, ret);
1512 }
1513
1514 /*
1515 * __session_upgrade --
1516 * WT_SESSION->upgrade method.
1517 */
1518 static int
__session_upgrade(WT_SESSION * wt_session,const char * uri,const char * config)1519 __session_upgrade(WT_SESSION *wt_session, const char *uri, const char *config)
1520 {
1521 WT_DECL_RET;
1522 WT_SESSION_IMPL *session;
1523
1524 session = (WT_SESSION_IMPL *)wt_session;
1525
1526 SESSION_API_CALL(session, upgrade, config, cfg);
1527
1528 WT_ERR(__wt_inmem_unsupported_op(session, NULL));
1529
1530 /* Block out checkpoints to avoid spurious EBUSY errors. */
1531 WT_WITH_CHECKPOINT_LOCK(session,
1532 WT_WITH_SCHEMA_LOCK(session,
1533 ret = __wt_schema_worker(session, uri, __wt_upgrade,
1534 NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_UPGRADE)));
1535
1536 err: API_END_RET_NOTFOUND_MAP(session, ret);
1537 }
1538
1539 /*
1540 * __session_upgrade_readonly --
1541 * WT_SESSION->upgrade method; readonly version.
1542 */
1543 static int
__session_upgrade_readonly(WT_SESSION * wt_session,const char * uri,const char * config)1544 __session_upgrade_readonly(
1545 WT_SESSION *wt_session, const char *uri, const char *config)
1546 {
1547 WT_DECL_RET;
1548 WT_SESSION_IMPL *session;
1549
1550 WT_UNUSED(uri);
1551 WT_UNUSED(config);
1552
1553 session = (WT_SESSION_IMPL *)wt_session;
1554 SESSION_API_CALL_NOCONF(session, upgrade);
1555
1556 ret = __wt_session_notsup(session);
1557 err: API_END_RET(session, ret);
1558 }
1559
1560 /*
1561 * __session_verify --
1562 * WT_SESSION->verify method.
1563 */
1564 static int
__session_verify(WT_SESSION * wt_session,const char * uri,const char * config)1565 __session_verify(WT_SESSION *wt_session, const char *uri, const char *config)
1566 {
1567 WT_DECL_RET;
1568 WT_SESSION_IMPL *session;
1569
1570 session = (WT_SESSION_IMPL *)wt_session;
1571
1572 SESSION_API_CALL(session, verify, config, cfg);
1573
1574 WT_ERR(__wt_inmem_unsupported_op(session, NULL));
1575
1576 /* Block out checkpoints to avoid spurious EBUSY errors. */
1577 WT_WITH_CHECKPOINT_LOCK(session,
1578 WT_WITH_SCHEMA_LOCK(session,
1579 ret = __wt_schema_worker(session, uri, __wt_verify,
1580 NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_VERIFY)));
1581
1582 err:
1583 if (ret != 0)
1584 WT_STAT_CONN_INCR(session, session_table_verify_fail);
1585 else
1586 WT_STAT_CONN_INCR(session, session_table_verify_success);
1587 API_END_RET_NOTFOUND_MAP(session, ret);
1588 }
1589
1590 /*
1591 * __session_begin_transaction --
1592 * WT_SESSION->begin_transaction method.
1593 */
1594 static int
__session_begin_transaction(WT_SESSION * wt_session,const char * config)1595 __session_begin_transaction(WT_SESSION *wt_session, const char *config)
1596 {
1597 WT_DECL_RET;
1598 WT_SESSION_IMPL *session;
1599
1600 session = (WT_SESSION_IMPL *)wt_session;
1601 /*
1602 * Indicated as allowed in prepared state, even though not allowed,
1603 * so that running transaction check below take precedence.
1604 */
1605 SESSION_API_CALL_PREPARE_ALLOWED(
1606 session, begin_transaction, config, cfg);
1607 WT_STAT_CONN_INCR(session, txn_begin);
1608
1609 WT_ERR(__wt_txn_context_check(session, false));
1610
1611 ret = __wt_txn_begin(session, cfg);
1612
1613 err: API_END_RET(session, ret);
1614 }
1615
1616 /*
1617 * __session_commit_transaction --
1618 * WT_SESSION->commit_transaction method.
1619 */
1620 static int
__session_commit_transaction(WT_SESSION * wt_session,const char * config)1621 __session_commit_transaction(WT_SESSION *wt_session, const char *config)
1622 {
1623 WT_DECL_RET;
1624 WT_SESSION_IMPL *session;
1625 WT_TXN *txn;
1626
1627 session = (WT_SESSION_IMPL *)wt_session;
1628 SESSION_API_CALL_PREPARE_ALLOWED(
1629 session, commit_transaction, config, cfg);
1630 WT_STAT_CONN_INCR(session, txn_commit);
1631
1632 txn = &session->txn;
1633 if (F_ISSET(txn, WT_TXN_PREPARE)) {
1634 WT_STAT_CONN_INCR(session, txn_prepare_commit);
1635 WT_STAT_CONN_DECR(session, txn_prepare_active);
1636 }
1637
1638 WT_ERR(__wt_txn_context_check(session, true));
1639
1640 if (F_ISSET(txn, WT_TXN_ERROR) && txn->mod_count != 0)
1641 WT_ERR_MSG(session, EINVAL,
1642 "failed transaction requires rollback%s%s",
1643 txn->rollback_reason == NULL ? "" : ": ",
1644 txn->rollback_reason == NULL ? "" : txn->rollback_reason);
1645
1646 if (ret == 0)
1647 ret = __wt_txn_commit(session, cfg);
1648 else {
1649 WT_TRET(__wt_session_reset_cursors(session, false));
1650 WT_TRET(__wt_txn_rollback(session, cfg));
1651 }
1652
1653 err: API_END_RET(session, ret);
1654 }
1655
1656 /*
1657 * __session_prepare_transaction --
1658 * WT_SESSION->prepare_transaction method.
1659 */
1660 static int
__session_prepare_transaction(WT_SESSION * wt_session,const char * config)1661 __session_prepare_transaction(WT_SESSION *wt_session, const char *config)
1662 {
1663 WT_DECL_RET;
1664 WT_SESSION_IMPL *session;
1665 WT_TXN *txn;
1666
1667 session = (WT_SESSION_IMPL *)wt_session;
1668 SESSION_API_CALL(session, prepare_transaction, config, cfg);
1669 WT_STAT_CONN_INCR(session, txn_prepare);
1670 WT_STAT_CONN_INCR(session, txn_prepare_active);
1671
1672 WT_ERR(__wt_txn_context_check(session, true));
1673
1674 /*
1675 * A failed transaction cannot be prepared, as it cannot guarantee
1676 * a subsequent commit.
1677 */
1678 txn = &session->txn;
1679 if (F_ISSET(txn, WT_TXN_ERROR) && txn->mod_count != 0)
1680 WT_ERR_MSG(session, EINVAL,
1681 "failed transaction requires rollback%s%s",
1682 txn->rollback_reason == NULL ? "" : ": ",
1683 txn->rollback_reason == NULL ? "" : txn->rollback_reason);
1684
1685 WT_ERR(__wt_txn_prepare(session, cfg));
1686
1687 err: API_END_RET(session, ret);
1688
1689 }
1690
1691 /*
1692 * __session_prepare_transaction_readonly --
1693 * WT_SESSION->prepare_transaction method; readonly version.
1694 */
1695 static int
__session_prepare_transaction_readonly(WT_SESSION * wt_session,const char * config)1696 __session_prepare_transaction_readonly(
1697 WT_SESSION *wt_session, const char *config)
1698 {
1699 WT_DECL_RET;
1700 WT_SESSION_IMPL *session;
1701
1702 WT_UNUSED(config);
1703
1704 session = (WT_SESSION_IMPL *)wt_session;
1705 SESSION_API_CALL_NOCONF(session, prepare_transaction);
1706
1707 ret = __wt_session_notsup(session);
1708 err: API_END_RET(session, ret);
1709 }
1710
1711 /*
1712 * __session_rollback_transaction --
1713 * WT_SESSION->rollback_transaction method.
1714 */
1715 static int
__session_rollback_transaction(WT_SESSION * wt_session,const char * config)1716 __session_rollback_transaction(WT_SESSION *wt_session, const char *config)
1717 {
1718 WT_DECL_RET;
1719 WT_SESSION_IMPL *session;
1720 WT_TXN *txn;
1721
1722 session = (WT_SESSION_IMPL *)wt_session;
1723 SESSION_API_CALL_PREPARE_ALLOWED(
1724 session, rollback_transaction, config, cfg);
1725 WT_STAT_CONN_INCR(session, txn_rollback);
1726
1727 txn = &session->txn;
1728 if (F_ISSET(txn, WT_TXN_PREPARE)) {
1729 WT_STAT_CONN_INCR(session, txn_prepare_rollback);
1730 WT_STAT_CONN_DECR(session, txn_prepare_active);
1731 }
1732
1733 WT_ERR(__wt_txn_context_check(session, true));
1734
1735 WT_TRET(__wt_session_reset_cursors(session, false));
1736
1737 WT_TRET(__wt_txn_rollback(session, cfg));
1738
1739 err: API_END_RET(session, ret);
1740 }
1741
1742 /*
1743 * __session_timestamp_transaction --
1744 * WT_SESSION->timestamp_transaction method.
1745 */
1746 static int
__session_timestamp_transaction(WT_SESSION * wt_session,const char * config)1747 __session_timestamp_transaction(WT_SESSION *wt_session, const char *config)
1748 {
1749 WT_DECL_RET;
1750 WT_SESSION_IMPL *session;
1751
1752 session = (WT_SESSION_IMPL *)wt_session;
1753 #ifdef HAVE_DIAGNOSTIC
1754 SESSION_API_CALL_PREPARE_ALLOWED(session,
1755 timestamp_transaction, config, cfg);
1756 #else
1757 SESSION_API_CALL_PREPARE_ALLOWED(session,
1758 timestamp_transaction, NULL, cfg);
1759 cfg[1] = config;
1760 #endif
1761 WT_TRET(__wt_txn_set_timestamp(session, cfg));
1762 err: API_END_RET(session, ret);
1763 }
1764
1765 /*
1766 * __session_query_timestamp --
1767 * WT_SESSION->query_timestamp method.
1768 */
1769 static int
__session_query_timestamp(WT_SESSION * wt_session,char * hex_timestamp,const char * config)1770 __session_query_timestamp(
1771 WT_SESSION *wt_session, char *hex_timestamp, const char *config)
1772 {
1773 WT_DECL_RET;
1774 WT_SESSION_IMPL *session;
1775
1776 session = (WT_SESSION_IMPL *)wt_session;
1777 SESSION_API_CALL_PREPARE_ALLOWED(session,
1778 query_timestamp, config, cfg);
1779 WT_TRET(__wt_txn_query_timestamp(session, hex_timestamp, cfg, false));
1780 err: API_END_RET(session, ret);
1781 }
1782
1783 /*
1784 * __session_transaction_pinned_range --
1785 * WT_SESSION->transaction_pinned_range method.
1786 */
1787 static int
__session_transaction_pinned_range(WT_SESSION * wt_session,uint64_t * prange)1788 __session_transaction_pinned_range(WT_SESSION *wt_session, uint64_t *prange)
1789 {
1790 WT_DECL_RET;
1791 WT_SESSION_IMPL *session;
1792 WT_TXN_STATE *txn_state;
1793 uint64_t pinned;
1794
1795 session = (WT_SESSION_IMPL *)wt_session;
1796 SESSION_API_CALL_NOCONF_PREPARE_NOT_ALLOWED(session, pinned_range);
1797
1798 txn_state = WT_SESSION_TXN_STATE(session);
1799
1800 /* Assign pinned to the lesser of id or snap_min */
1801 if (txn_state->id != WT_TXN_NONE &&
1802 WT_TXNID_LT(txn_state->id, txn_state->pinned_id))
1803 pinned = txn_state->id;
1804 else
1805 pinned = txn_state->pinned_id;
1806
1807 if (pinned == WT_TXN_NONE)
1808 *prange = 0;
1809 else
1810 *prange = S2C(session)->txn_global.current - pinned;
1811
1812 err: API_END_RET(session, ret);
1813 }
1814
1815 /*
1816 * __transaction_sync_run_chk --
1817 * Check to decide if the transaction sync call should continue running.
1818 */
1819 static bool
__transaction_sync_run_chk(WT_SESSION_IMPL * session)1820 __transaction_sync_run_chk(WT_SESSION_IMPL *session)
1821 {
1822 WT_CONNECTION_IMPL *conn;
1823
1824 conn = S2C(session);
1825
1826 return (FLD_ISSET(conn->flags, WT_CONN_SERVER_LOG));
1827 }
1828
1829 /*
1830 * __session_transaction_sync --
1831 * WT_SESSION->transaction_sync method.
1832 */
1833 static int
__session_transaction_sync(WT_SESSION * wt_session,const char * config)1834 __session_transaction_sync(WT_SESSION *wt_session, const char *config)
1835 {
1836 WT_CONFIG_ITEM cval;
1837 WT_CONNECTION_IMPL *conn;
1838 WT_DECL_RET;
1839 WT_LOG *log;
1840 WT_SESSION_IMPL *session;
1841 uint64_t remaining_usec, timeout_ms, waited_ms;
1842 uint64_t time_start, time_stop;
1843
1844 session = (WT_SESSION_IMPL *)wt_session;
1845 /*
1846 * Indicated as allowed in prepared state, even though not allowed,
1847 * so that running transaction check below take precedence.
1848 */
1849 SESSION_API_CALL_PREPARE_ALLOWED(
1850 session, transaction_sync, config, cfg);
1851 WT_STAT_CONN_INCR(session, txn_sync);
1852
1853 conn = S2C(session);
1854 WT_ERR(__wt_txn_context_check(session, false));
1855
1856 /*
1857 * If logging is not enabled there is nothing to do.
1858 */
1859 if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
1860 WT_ERR_MSG(session, EINVAL, "logging not enabled");
1861
1862 log = conn->log;
1863
1864 /*
1865 * If there is no background sync LSN in this session, there
1866 * is nothing to do.
1867 */
1868 if (WT_IS_INIT_LSN(&session->bg_sync_lsn))
1869 goto err;
1870
1871 /*
1872 * If our LSN is smaller than the current sync LSN then our
1873 * transaction is stable. We're done.
1874 */
1875 if (__wt_log_cmp(&session->bg_sync_lsn, &log->sync_lsn) <= 0)
1876 goto err;
1877
1878 /*
1879 * Our LSN is not yet stable. Wait and check again depending on the
1880 * timeout.
1881 */
1882 WT_ERR(__wt_config_gets_def(session,
1883 cfg, "timeout_ms", (int)WT_SESSION_BG_SYNC_MSEC, &cval));
1884 timeout_ms = (uint64_t)cval.val;
1885
1886 if (timeout_ms == 0)
1887 WT_ERR(ETIMEDOUT);
1888
1889 /*
1890 * Keep checking the LSNs until we find it is stable or we reach
1891 * our timeout, or there's some other reason to quit.
1892 */
1893 time_start = __wt_clock(session);
1894 while (__wt_log_cmp(&session->bg_sync_lsn, &log->sync_lsn) > 0) {
1895 if (!__transaction_sync_run_chk(session))
1896 WT_ERR(ETIMEDOUT);
1897
1898 __wt_cond_signal(session, conn->log_file_cond);
1899 time_stop = __wt_clock(session);
1900 waited_ms = WT_CLOCKDIFF_MS(time_stop, time_start);
1901 if (waited_ms < timeout_ms) {
1902 remaining_usec = (timeout_ms - waited_ms) * WT_THOUSAND;
1903 __wt_cond_wait(session, log->log_sync_cond,
1904 remaining_usec, __transaction_sync_run_chk);
1905 } else
1906 WT_ERR(ETIMEDOUT);
1907 }
1908
1909 err: API_END_RET(session, ret);
1910 }
1911
1912 /*
1913 * __session_transaction_sync_readonly --
1914 * WT_SESSION->transaction_sync method; readonly version.
1915 */
1916 static int
__session_transaction_sync_readonly(WT_SESSION * wt_session,const char * config)1917 __session_transaction_sync_readonly(WT_SESSION *wt_session, const char *config)
1918 {
1919 WT_DECL_RET;
1920 WT_SESSION_IMPL *session;
1921
1922 WT_UNUSED(config);
1923
1924 session = (WT_SESSION_IMPL *)wt_session;
1925 SESSION_API_CALL_NOCONF(session, transaction_sync);
1926
1927 ret = __wt_session_notsup(session);
1928 err: API_END_RET(session, ret);
1929 }
1930
1931 /*
1932 * __session_checkpoint --
1933 * WT_SESSION->checkpoint method.
1934 */
1935 static int
__session_checkpoint(WT_SESSION * wt_session,const char * config)1936 __session_checkpoint(WT_SESSION *wt_session, const char *config)
1937 {
1938 WT_DECL_RET;
1939 WT_SESSION_IMPL *session;
1940
1941 session = (WT_SESSION_IMPL *)wt_session;
1942
1943 WT_STAT_CONN_INCR(session, txn_checkpoint);
1944 /*
1945 * Indicated as allowed in prepared state, even though not allowed,
1946 * so that running transaction check below take precedence.
1947 */
1948 SESSION_API_CALL_PREPARE_ALLOWED(session, checkpoint, config, cfg);
1949
1950 WT_ERR(__wt_inmem_unsupported_op(session, NULL));
1951
1952 /*
1953 * Checkpoints require a snapshot to write a transactionally consistent
1954 * snapshot of the data.
1955 *
1956 * We can't use an application's transaction: if it has uncommitted
1957 * changes, they will be written in the checkpoint and may appear after
1958 * a crash.
1959 *
1960 * Use a real snapshot transaction: we don't want any chance of the
1961 * snapshot being updated during the checkpoint. Eviction is prevented
1962 * from evicting anything newer than this because we track the oldest
1963 * transaction ID in the system that is not visible to all readers.
1964 */
1965 WT_ERR(__wt_txn_context_check(session, false));
1966
1967 ret = __wt_txn_checkpoint(session, cfg, true);
1968
1969 /*
1970 * Release common session resources (for example, checkpoint may acquire
1971 * significant reconciliation structures/memory).
1972 */
1973 WT_TRET(__wt_session_release_resources(session));
1974
1975 err: API_END_RET_NOTFOUND_MAP(session, ret);
1976 }
1977
1978 /*
1979 * __session_checkpoint_readonly --
1980 * WT_SESSION->checkpoint method; readonly version.
1981 */
1982 static int
__session_checkpoint_readonly(WT_SESSION * wt_session,const char * config)1983 __session_checkpoint_readonly(WT_SESSION *wt_session, const char *config)
1984 {
1985 WT_DECL_RET;
1986 WT_SESSION_IMPL *session;
1987
1988 WT_UNUSED(config);
1989
1990 session = (WT_SESSION_IMPL *)wt_session;
1991 SESSION_API_CALL_NOCONF(session, checkpoint);
1992
1993 ret = __wt_session_notsup(session);
1994 err: API_END_RET(session, ret);
1995 }
1996
1997 /*
1998 * __session_snapshot --
1999 * WT_SESSION->snapshot method.
2000 */
2001 static int
__session_snapshot(WT_SESSION * wt_session,const char * config)2002 __session_snapshot(WT_SESSION *wt_session, const char *config)
2003 {
2004 WT_DECL_RET;
2005 WT_SESSION_IMPL *session;
2006 WT_TXN_GLOBAL *txn_global;
2007 bool has_create, has_drop;
2008
2009 has_create = has_drop = false;
2010 session = (WT_SESSION_IMPL *)wt_session;
2011 txn_global = &S2C(session)->txn_global;
2012
2013 SESSION_API_CALL(session, snapshot, config, cfg);
2014
2015 WT_ERR(__wt_txn_named_snapshot_config(
2016 session, cfg, &has_create, &has_drop));
2017
2018 __wt_writelock(session, &txn_global->nsnap_rwlock);
2019
2020 /* Drop any snapshots to be removed first. */
2021 if (has_drop)
2022 WT_ERR(__wt_txn_named_snapshot_drop(session, cfg));
2023
2024 /* Start the named snapshot if requested. */
2025 if (has_create)
2026 WT_ERR(__wt_txn_named_snapshot_begin(session, cfg));
2027
2028 err: __wt_writeunlock(session, &txn_global->nsnap_rwlock);
2029
2030 API_END_RET_NOTFOUND_MAP(session, ret);
2031 }
2032
2033 /*
2034 * __wt_session_strerror --
2035 * WT_SESSION->strerror method.
2036 */
2037 const char *
__wt_session_strerror(WT_SESSION * wt_session,int error)2038 __wt_session_strerror(WT_SESSION *wt_session, int error)
2039 {
2040 WT_SESSION_IMPL *session;
2041
2042 session = (WT_SESSION_IMPL *)wt_session;
2043
2044 return (__wt_strerror(session, error, NULL, 0));
2045 }
2046
2047 /*
2048 * __wt_session_breakpoint --
2049 * A place to put a breakpoint, if you need one, or call some check
2050 * code.
2051 */
2052 int
__wt_session_breakpoint(WT_SESSION * wt_session)2053 __wt_session_breakpoint(WT_SESSION *wt_session)
2054 {
2055 WT_UNUSED(wt_session);
2056
2057 return (0);
2058 }
2059
2060 /*
2061 * __open_session --
2062 * Allocate a session handle.
2063 */
2064 static int
__open_session(WT_CONNECTION_IMPL * conn,WT_EVENT_HANDLER * event_handler,const char * config,WT_SESSION_IMPL ** sessionp)2065 __open_session(WT_CONNECTION_IMPL *conn,
2066 WT_EVENT_HANDLER *event_handler, const char *config,
2067 WT_SESSION_IMPL **sessionp)
2068 {
2069 static const WT_SESSION stds = {
2070 NULL,
2071 NULL,
2072 __session_close,
2073 __session_reconfigure,
2074 __wt_session_strerror,
2075 __session_open_cursor,
2076 __session_alter,
2077 __session_create,
2078 __wt_session_compact,
2079 __session_drop,
2080 __session_join,
2081 __session_log_flush,
2082 __session_log_printf,
2083 __session_rebalance,
2084 __session_rename,
2085 __session_reset,
2086 __session_salvage,
2087 __session_truncate,
2088 __session_upgrade,
2089 __session_verify,
2090 __session_begin_transaction,
2091 __session_commit_transaction,
2092 __session_prepare_transaction,
2093 __session_rollback_transaction,
2094 __session_timestamp_transaction,
2095 __session_query_timestamp,
2096 __session_checkpoint,
2097 __session_snapshot,
2098 __session_transaction_pinned_range,
2099 __session_transaction_sync,
2100 __wt_session_breakpoint
2101 }, stds_readonly = {
2102 NULL,
2103 NULL,
2104 __session_close,
2105 __session_reconfigure,
2106 __wt_session_strerror,
2107 __session_open_cursor,
2108 __session_alter_readonly,
2109 __session_create_readonly,
2110 __wt_session_compact_readonly,
2111 __session_drop_readonly,
2112 __session_join,
2113 __session_log_flush_readonly,
2114 __session_log_printf_readonly,
2115 __session_rebalance_readonly,
2116 __session_rename_readonly,
2117 __session_reset,
2118 __session_salvage_readonly,
2119 __session_truncate_readonly,
2120 __session_upgrade_readonly,
2121 __session_verify,
2122 __session_begin_transaction,
2123 __session_commit_transaction,
2124 __session_prepare_transaction_readonly,
2125 __session_rollback_transaction,
2126 __session_timestamp_transaction,
2127 __session_query_timestamp,
2128 __session_checkpoint_readonly,
2129 __session_snapshot,
2130 __session_transaction_pinned_range,
2131 __session_transaction_sync_readonly,
2132 __wt_session_breakpoint
2133 };
2134 WT_DECL_RET;
2135 WT_SESSION_IMPL *session, *session_ret;
2136 uint32_t i;
2137
2138 *sessionp = NULL;
2139
2140 session = conn->default_session;
2141 session_ret = NULL;
2142
2143 __wt_spin_lock(session, &conn->api_lock);
2144
2145 /*
2146 * Make sure we don't try to open a new session after the application
2147 * closes the connection. This is particularly intended to catch
2148 * cases where server threads open sessions.
2149 */
2150 WT_ASSERT(session, !F_ISSET(conn, WT_CONN_CLOSING));
2151
2152 /* Find the first inactive session slot. */
2153 for (session_ret = conn->sessions,
2154 i = 0; i < conn->session_size; ++session_ret, ++i)
2155 if (!session_ret->active)
2156 break;
2157 if (i == conn->session_size)
2158 WT_ERR_MSG(session, WT_ERROR,
2159 "out of sessions, configured for %" PRIu32 " (including "
2160 "internal sessions)",
2161 conn->session_size);
2162
2163 /*
2164 * If the active session count is increasing, update it. We don't worry
2165 * about correcting the session count on error, as long as we don't mark
2166 * this session as active, we'll clean it up on close.
2167 */
2168 if (i >= conn->session_cnt) /* Defend against off-by-one errors. */
2169 conn->session_cnt = i + 1;
2170
2171 session_ret->iface =
2172 F_ISSET(conn, WT_CONN_READONLY) ? stds_readonly : stds;
2173 session_ret->iface.connection = &conn->iface;
2174
2175 session_ret->name = NULL;
2176 session_ret->id = i;
2177
2178 if (WT_SESSION_FIRST_USE(session_ret))
2179 __wt_random_init(&session_ret->rnd);
2180
2181 __wt_event_handler_set(session_ret,
2182 event_handler == NULL ? session->event_handler : event_handler);
2183
2184 TAILQ_INIT(&session_ret->cursors);
2185 TAILQ_INIT(&session_ret->dhandles);
2186
2187 /*
2188 * If we don't have them, allocate the cursor and dhandle hash arrays.
2189 * Allocate the table hash array as well.
2190 */
2191 if (session_ret->cursor_cache == NULL)
2192 WT_ERR(__wt_calloc_def(
2193 session, WT_HASH_ARRAY_SIZE, &session_ret->cursor_cache));
2194 if (session_ret->dhhash == NULL)
2195 WT_ERR(__wt_calloc_def(
2196 session, WT_HASH_ARRAY_SIZE, &session_ret->dhhash));
2197
2198 /* Initialize the dhandle hash array. */
2199 for (i = 0; i < WT_HASH_ARRAY_SIZE; i++)
2200 TAILQ_INIT(&session_ret->dhhash[i]);
2201
2202 /* Initialize the cursor cache hash buckets and sweep trigger. */
2203 for (i = 0; i < WT_HASH_ARRAY_SIZE; i++)
2204 TAILQ_INIT(&session_ret->cursor_cache[i]);
2205 session_ret->cursor_sweep_countdown = WT_SESSION_CURSOR_SWEEP_COUNTDOWN;
2206
2207 /* Initialize transaction support: default to read-committed. */
2208 session_ret->isolation = WT_ISO_READ_COMMITTED;
2209 WT_ERR(__wt_txn_init(session, session_ret));
2210
2211 /*
2212 * The session's hazard pointer memory isn't discarded during normal
2213 * session close because access to it isn't serialized. Allocate the
2214 * first time we open this session.
2215 */
2216 if (WT_SESSION_FIRST_USE(session_ret)) {
2217 WT_ERR(__wt_calloc_def(session,
2218 WT_SESSION_INITIAL_HAZARD_SLOTS, &session_ret->hazard));
2219 session_ret->hazard_size = WT_SESSION_INITIAL_HAZARD_SLOTS;
2220 session_ret->hazard_inuse = 0;
2221 session_ret->nhazard = 0;
2222 }
2223
2224 /* Cache the offset of this session's statistics bucket. */
2225 session_ret->stat_bucket = WT_STATS_SLOT_ID(session);
2226
2227 /* Allocate the buffer for operation tracking */
2228 if (F_ISSET(conn, WT_CONN_OPTRACK)) {
2229 WT_ERR(__wt_malloc(
2230 session, WT_OPTRACK_BUFSIZE, &session_ret->optrack_buf));
2231 session_ret->optrackbuf_ptr = 0;
2232 }
2233
2234 /* Set the default value for session flags. */
2235 if (F_ISSET(conn, WT_CONN_CACHE_CURSORS))
2236 F_SET(session_ret, WT_SESSION_CACHE_CURSORS);
2237
2238 /*
2239 * Configuration: currently, the configuration for open_session is the
2240 * same as session.reconfigure, so use that function.
2241 */
2242 if (config != NULL)
2243 WT_ERR(
2244 __session_reconfigure((WT_SESSION *)session_ret, config));
2245
2246 /*
2247 * Publish: make the entry visible to server threads. There must be a
2248 * barrier for two reasons, to ensure structure fields are set before
2249 * any other thread will consider the session, and to push the session
2250 * count to ensure the eviction thread can't review too few slots.
2251 */
2252 WT_PUBLISH(session_ret->active, 1);
2253
2254 WT_STATIC_ASSERT(offsetof(WT_SESSION_IMPL, iface) == 0);
2255 *sessionp = session_ret;
2256
2257 WT_STAT_CONN_INCR(session, session_open);
2258
2259 err: __wt_spin_unlock(session, &conn->api_lock);
2260 return (ret);
2261 }
2262
2263 /*
2264 * __wt_open_session --
2265 * Allocate a session handle.
2266 */
2267 int
__wt_open_session(WT_CONNECTION_IMPL * conn,WT_EVENT_HANDLER * event_handler,const char * config,bool open_metadata,WT_SESSION_IMPL ** sessionp)2268 __wt_open_session(WT_CONNECTION_IMPL *conn,
2269 WT_EVENT_HANDLER *event_handler, const char *config,
2270 bool open_metadata, WT_SESSION_IMPL **sessionp)
2271 {
2272 WT_DECL_RET;
2273 WT_SESSION *wt_session;
2274 WT_SESSION_IMPL *session;
2275
2276 *sessionp = NULL;
2277
2278 /* Acquire a session. */
2279 WT_RET(__open_session(conn, event_handler, config, &session));
2280
2281 /*
2282 * Acquiring the metadata handle requires the schema lock; we've seen
2283 * problems in the past where a session has acquired the schema lock
2284 * unexpectedly, relatively late in the run, and deadlocked. Be
2285 * defensive, get it now. The metadata file may not exist when the
2286 * connection first creates its default session or the shared cache
2287 * pool creates its sessions, let our caller decline this work.
2288 */
2289 if (open_metadata) {
2290 WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
2291 if ((ret = __wt_metadata_cursor(session, NULL)) != 0) {
2292 wt_session = &session->iface;
2293 WT_TRET(wt_session->close(wt_session, NULL));
2294 return (ret);
2295 }
2296 }
2297
2298 *sessionp = session;
2299 return (0);
2300 }
2301
2302 /*
2303 * __wt_open_internal_session --
2304 * Allocate a session for WiredTiger's use.
2305 */
2306 int
__wt_open_internal_session(WT_CONNECTION_IMPL * conn,const char * name,bool open_metadata,uint32_t session_flags,WT_SESSION_IMPL ** sessionp)2307 __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name,
2308 bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp)
2309 {
2310 WT_SESSION_IMPL *session;
2311
2312 *sessionp = NULL;
2313
2314 /* Acquire a session. */
2315 WT_RET(__wt_open_session(conn, NULL, NULL, open_metadata, &session));
2316 session->name = name;
2317
2318 /*
2319 * Public sessions are automatically closed during WT_CONNECTION->close.
2320 * If the session handles for internal threads were to go on the public
2321 * list, there would be complex ordering issues during close. Set a
2322 * flag to avoid this: internal sessions are not closed automatically.
2323 */
2324 F_SET(session, session_flags | WT_SESSION_INTERNAL);
2325
2326 *sessionp = session;
2327 return (0);
2328 }
2329