1 /*-
2  * Copyright (c) 2014-2018 MongoDB, Inc.
3  * Copyright (c) 2008-2014 WiredTiger, Inc.
4  *	All rights reserved.
5  *
6  * See the file LICENSE for redistribution information.
7  */
8 
9 #include "wt_internal.h"
10 
11 static int __session_checkpoint(WT_SESSION *, const char *);
12 static int __session_snapshot(WT_SESSION *, const char *);
13 static int __session_rollback_transaction(WT_SESSION *, const char *);
14 
15 /*
16  * __wt_session_notsup --
17  *	Unsupported session method.
18  */
19 int
__wt_session_notsup(WT_SESSION_IMPL * session)20 __wt_session_notsup(WT_SESSION_IMPL *session)
21 {
22 	WT_RET_MSG(session, ENOTSUP, "Unsupported session method");
23 }
24 
25 /*
26  * __wt_session_reset_cursors --
27  *	Reset all open cursors.
28  */
29 int
__wt_session_reset_cursors(WT_SESSION_IMPL * session,bool free_buffers)30 __wt_session_reset_cursors(WT_SESSION_IMPL *session, bool free_buffers)
31 {
32 	WT_CURSOR *cursor;
33 	WT_DECL_RET;
34 
35 	TAILQ_FOREACH(cursor, &session->cursors, q) {
36 		/* Stop when there are no positioned cursors. */
37 		if (session->ncursors == 0)
38 			break;
39 		if (!F_ISSET(cursor, WT_CURSTD_JOINED))
40 			WT_TRET(cursor->reset(cursor));
41 		/* Optionally, free the cursor buffers */
42 		if (free_buffers) {
43 			__wt_buf_free(session, &cursor->key);
44 			__wt_buf_free(session, &cursor->value);
45 		}
46 	}
47 
48 	WT_ASSERT(session, session->ncursors == 0);
49 	return (ret);
50 }
51 
52 /*
53  * __wt_session_cursor_cache_sweep --
54  *	Sweep the cursor cache.
55  */
56 int
__wt_session_cursor_cache_sweep(WT_SESSION_IMPL * session)57 __wt_session_cursor_cache_sweep(WT_SESSION_IMPL *session)
58 {
59 	WT_CURSOR *cursor, *cursor_tmp;
60 	WT_CURSOR_LIST *cached_list;
61 	WT_DECL_RET;
62 	time_t now;
63 	uint32_t position;
64 	int i, t_ret, nbuckets, nexamined, nclosed;
65 	bool productive;
66 
67 	if (!F_ISSET(session, WT_SESSION_CACHE_CURSORS))
68 		return (0);
69 
70 	/*
71 	 * Periodically sweep for dead cursors; if we've swept recently, don't
72 	 * do it again.
73 	 */
74 	__wt_seconds(session, &now);
75 	if (difftime(now, session->last_cursor_sweep) < 1)
76 		return (0);
77 	session->last_cursor_sweep = now;
78 
79 	position = session->cursor_sweep_position;
80 	productive = true;
81 	nbuckets = nexamined = nclosed = 0;
82 
83 	/* Turn off caching so that cursor close doesn't try to cache. */
84 	F_CLR(session, WT_SESSION_CACHE_CURSORS);
85 	for (i = 0; i < WT_SESSION_CURSOR_SWEEP_MAX && productive; i++) {
86 		++nbuckets;
87 		cached_list = &session->cursor_cache[position];
88 		position = (position + 1) % WT_HASH_ARRAY_SIZE;
89 		TAILQ_FOREACH_SAFE(cursor, cached_list, q, cursor_tmp) {
90 			/*
91 			 * First check to see if the cursor could be reopened.
92 			 */
93 			++nexamined;
94 			t_ret = cursor->reopen(cursor, true);
95 			if (t_ret != 0) {
96 				WT_TRET_NOTFOUND_OK(t_ret);
97 				WT_TRET_NOTFOUND_OK(
98 				    cursor->reopen(cursor, false));
99 				WT_TRET(cursor->close(cursor));
100 				++nclosed;
101 			}
102 		}
103 
104 		/*
105 		 * We continue sweeping as long as we have some good average
106 		 * productivity, or we are under the minimum.
107 		 */
108 		productive = (nclosed + WT_SESSION_CURSOR_SWEEP_MIN > i);
109 	}
110 
111 	session->cursor_sweep_position = position;
112 	F_SET(session, WT_SESSION_CACHE_CURSORS);
113 
114 	WT_STAT_CONN_INCR(session, cursor_sweep);
115 	WT_STAT_CONN_INCRV(session, cursor_sweep_buckets, nbuckets);
116 	WT_STAT_CONN_INCRV(session, cursor_sweep_examined, nexamined);
117 	WT_STAT_CONN_INCRV(session, cursor_sweep_closed, nclosed);
118 
119 	return (ret);
120 }
121 
122 /*
123  * __wt_session_copy_values --
124  *	Copy values into all positioned cursors, so that they don't keep
125  *	transaction IDs pinned.
126  */
127 int
__wt_session_copy_values(WT_SESSION_IMPL * session)128 __wt_session_copy_values(WT_SESSION_IMPL *session)
129 {
130 	WT_CURSOR *cursor;
131 
132 	TAILQ_FOREACH(cursor, &session->cursors, q)
133 		if (F_ISSET(cursor, WT_CURSTD_VALUE_INT)) {
134 #ifdef HAVE_DIAGNOSTIC
135 			/*
136 			 * We have to do this with a transaction ID pinned
137 			 * unless the cursor is reading from a checkpoint.
138 			 */
139 			WT_TXN_STATE *txn_state = WT_SESSION_TXN_STATE(session);
140 			WT_ASSERT(session,
141 			    txn_state->pinned_id != WT_TXN_NONE ||
142 			    (WT_PREFIX_MATCH(cursor->uri, "file:") &&
143 			    F_ISSET((WT_CURSOR_BTREE *)cursor, WT_CBT_NO_TXN)));
144 #endif
145 			WT_RET(__cursor_localvalue(cursor));
146 		}
147 
148 	return (0);
149 }
150 
151 /*
152  * __wt_session_release_resources --
153  *	Release common session resources.
154  */
155 int
__wt_session_release_resources(WT_SESSION_IMPL * session)156 __wt_session_release_resources(WT_SESSION_IMPL *session)
157 {
158 	WT_DECL_RET;
159 
160 	/* Transaction cleanup */
161 	__wt_txn_release_resources(session);
162 
163 	/* Block manager cleanup */
164 	if (session->block_manager_cleanup != NULL)
165 		WT_TRET(session->block_manager_cleanup(session));
166 
167 	/* Reconciliation cleanup */
168 	if (session->reconcile_cleanup != NULL)
169 		WT_TRET(session->reconcile_cleanup(session));
170 
171 	/* Stashed memory. */
172 	__wt_stash_discard(session);
173 
174 	/*
175 	 * Discard scratch buffers, error memory; last, just in case a cleanup
176 	 * routine uses scratch buffers.
177 	 */
178 	__wt_scr_discard(session);
179 	__wt_buf_free(session, &session->err);
180 
181 	return (ret);
182 }
183 
184 /*
185  * __session_clear --
186  *	Clear a session structure.
187  */
188 static void
__session_clear(WT_SESSION_IMPL * session)189 __session_clear(WT_SESSION_IMPL *session)
190 {
191 	/*
192 	 * There's no serialization support around the review of the hazard
193 	 * array, which means threads checking for hazard pointers first check
194 	 * the active field (which may be 0) and then use the hazard pointer
195 	 * (which cannot be NULL).
196 	 *
197 	 * Additionally, the session structure can include information that
198 	 * persists past the session's end-of-life, stored as part of page
199 	 * splits.
200 	 *
201 	 * For these reasons, be careful when clearing the session structure.
202 	 */
203 	__wt_txn_clear_timestamp_queues(session);
204 	memset(session, 0, WT_SESSION_CLEAR_SIZE);
205 
206 	WT_INIT_LSN(&session->bg_sync_lsn);
207 
208 	session->hazard_inuse = 0;
209 	session->nhazard = 0;
210 }
211 
212 /*
213  * __session_close_cursors --
214  *	Close all cursors in a list.
215  */
216 static int
__session_close_cursors(WT_SESSION_IMPL * session,WT_CURSOR_LIST * cursors)217 __session_close_cursors(WT_SESSION_IMPL *session, WT_CURSOR_LIST *cursors)
218 {
219 	WT_CURSOR *cursor, *cursor_tmp;
220 	WT_DECL_RET;
221 
222 	/* Close all open cursors. */
223 	WT_TAILQ_SAFE_REMOVE_BEGIN(cursor, cursors, q, cursor_tmp) {
224 		if (F_ISSET(cursor, WT_CURSTD_CACHED))
225 			/*
226 			 * Put the cached cursor in an open state
227 			 * that allows it to be closed.
228 			 */
229 			WT_TRET_NOTFOUND_OK(cursor->reopen(cursor, false));
230 		else if (session->event_handler->handle_close != NULL &&
231 		    strcmp(cursor->internal_uri, WT_LAS_URI) != 0)
232 			/*
233 			 * Notify the user that we are closing the cursor
234 			 * handle via the registered close callback.
235 			 */
236 			WT_TRET(session->event_handler->handle_close(
237 			    session->event_handler, &session->iface, cursor));
238 
239 		WT_TRET(cursor->close(cursor));
240 	} WT_TAILQ_SAFE_REMOVE_END
241 
242 	return (ret);
243 }
244 
245 /*
246  * __session_close_cached_cursors --
247  *	Fully close all cached cursors.
248  */
249 static int
__session_close_cached_cursors(WT_SESSION_IMPL * session)250 __session_close_cached_cursors(WT_SESSION_IMPL *session)
251 {
252 	WT_DECL_RET;
253 	int i;
254 
255 	for (i = 0; i < WT_HASH_ARRAY_SIZE; i++)
256 		WT_TRET(__session_close_cursors(session,
257 		    &session->cursor_cache[i]));
258 	return (ret);
259 }
260 
261 /*
262  * __session_close --
263  *	WT_SESSION->close method.
264  */
265 static int
__session_close(WT_SESSION * wt_session,const char * config)266 __session_close(WT_SESSION *wt_session, const char *config)
267 {
268 	WT_CONNECTION_IMPL *conn;
269 	WT_DECL_RET;
270 	WT_SESSION_IMPL *session;
271 
272 	conn = (WT_CONNECTION_IMPL *)wt_session->connection;
273 	session = (WT_SESSION_IMPL *)wt_session;
274 
275 	SESSION_API_CALL_PREPARE_ALLOWED(session, close, config, cfg);
276 	WT_UNUSED(cfg);
277 
278 	/* Close all open cursors while the cursor cache is disabled. */
279 	F_CLR(session, WT_SESSION_CACHE_CURSORS);
280 
281 	/* Rollback any active transaction. */
282 	if (F_ISSET(&session->txn, WT_TXN_RUNNING))
283 		WT_TRET(__session_rollback_transaction(wt_session, NULL));
284 
285 	/*
286 	 * Also release any pinned transaction ID from a non-transactional
287 	 * operation.
288 	 */
289 	if (conn->txn_global.states != NULL)
290 		__wt_txn_release_snapshot(session);
291 
292 	/* Close all open cursors. */
293 	WT_TRET(__session_close_cursors(session, &session->cursors));
294 	WT_TRET(__session_close_cached_cursors(session));
295 
296 	WT_ASSERT(session, session->ncursors == 0);
297 
298 	/* Discard cached handles. */
299 	__wt_session_close_cache(session);
300 
301 	/* Confirm we're not holding any hazard pointers. */
302 	__wt_hazard_close(session);
303 
304 	/* Discard metadata tracking. */
305 	__wt_meta_track_discard(session);
306 
307 	/* Free transaction information. */
308 	__wt_txn_destroy(session);
309 
310 	/*
311 	 * Close the file where we tracked long operations. Do this before
312 	 * releasing resources, as we do scratch buffer management when we
313 	 * flush optrack buffers to disk.
314 	 */
315 	if (F_ISSET(conn, WT_CONN_OPTRACK)) {
316 		if (session->optrackbuf_ptr > 0) {
317 			__wt_optrack_flush_buffer(session);
318 			WT_TRET(__wt_close(session, &session->optrack_fh));
319 		}
320 
321 		/* Free the operation tracking buffer */
322 		__wt_free(session, session->optrack_buf);
323 	}
324 
325 	/* Release common session resources. */
326 	WT_TRET(__wt_session_release_resources(session));
327 
328 	/* The API lock protects opening and closing of sessions. */
329 	__wt_spin_lock(session, &conn->api_lock);
330 
331 	/* Decrement the count of open sessions. */
332 	WT_STAT_CONN_DECR(session, session_open);
333 
334 	/*
335 	 * Sessions are re-used, clear the structure: the clear sets the active
336 	 * field to 0, which will exclude the hazard array from review by the
337 	 * eviction thread. Because some session fields are accessed by other
338 	 * threads, the structure must be cleared carefully.
339 	 *
340 	 * We don't need to publish here, because regardless of the active field
341 	 * being non-zero, the hazard pointer is always valid.
342 	 */
343 	__session_clear(session);
344 	session = conn->default_session;
345 
346 	/*
347 	 * Decrement the count of active sessions if that's possible: a session
348 	 * being closed may or may not be at the end of the array, step toward
349 	 * the beginning of the array until we reach an active session.
350 	 */
351 	while (conn->sessions[conn->session_cnt - 1].active == 0)
352 		if (--conn->session_cnt == 0)
353 			break;
354 
355 	__wt_spin_unlock(session, &conn->api_lock);
356 
357 	/* We no longer have a session, don't try to update it. */
358 	session = NULL;
359 
360 err:	API_END_RET_NOTFOUND_MAP(session, ret);
361 }
362 
363 /*
364  * __session_reconfigure --
365  *	WT_SESSION->reconfigure method.
366  */
367 static int
__session_reconfigure(WT_SESSION * wt_session,const char * config)368 __session_reconfigure(WT_SESSION *wt_session, const char *config)
369 {
370 	WT_CONFIG_ITEM cval;
371 	WT_DECL_RET;
372 	WT_SESSION_IMPL *session;
373 
374 	session = (WT_SESSION_IMPL *)wt_session;
375 	/*
376 	 * Indicated as allowed in prepared state, even though not allowed,
377 	 * so that running transaction check below take precedence.
378 	 */
379 	SESSION_API_CALL_PREPARE_ALLOWED(session, reconfigure, config, cfg);
380 
381 	/*
382 	 * Note that this method only checks keys that are passed in by the
383 	 * application: we don't want to reset other session settings to their
384 	 * default values.
385 	 */
386 	WT_UNUSED(cfg);
387 
388 	WT_ERR(__wt_txn_context_check(session, false));
389 
390 	WT_ERR(__wt_session_reset_cursors(session, false));
391 
392 	WT_ERR(__wt_txn_reconfigure(session, config));
393 
394 	ret = __wt_config_getones(session, config, "ignore_cache_size", &cval);
395 	if (ret == 0) {
396 		if (cval.val)
397 			F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE);
398 		else
399 			F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE);
400 	}
401 	WT_ERR_NOTFOUND_OK(ret);
402 
403 	ret = __wt_config_getones(session, config, "cache_cursors", &cval);
404 	if (ret == 0) {
405 		if (cval.val)
406 			F_SET(session, WT_SESSION_CACHE_CURSORS);
407 		else {
408 			F_CLR(session, WT_SESSION_CACHE_CURSORS);
409 			WT_ERR(__session_close_cached_cursors(session));
410 		}
411 	}
412 	WT_ERR_NOTFOUND_OK(ret);
413 
414 err:	API_END_RET_NOTFOUND_MAP(session, ret);
415 }
416 
417 /*
418  * __session_open_cursor_int --
419  *	Internal version of WT_SESSION::open_cursor, with second cursor arg.
420  */
421 static int
__session_open_cursor_int(WT_SESSION_IMPL * session,const char * uri,WT_CURSOR * owner,WT_CURSOR * other,const char * cfg[],WT_CURSOR ** cursorp)422 __session_open_cursor_int(WT_SESSION_IMPL *session, const char *uri,
423     WT_CURSOR *owner, WT_CURSOR *other, const char *cfg[], WT_CURSOR **cursorp)
424 {
425 	WT_COLGROUP *colgroup;
426 	WT_DATA_SOURCE *dsrc;
427 	WT_DECL_RET;
428 
429 	*cursorp = NULL;
430 
431 	/*
432 	 * Open specific cursor types we know about, or call the generic data
433 	 * source open function.
434 	 *
435 	 * Unwind a set of string comparisons into a switch statement hoping
436 	 * the compiler can make it fast, but list the common choices first
437 	 * instead of sorting so if/else patterns are still fast.
438 	 */
439 	switch (uri[0]) {
440 	/*
441 	 * Common cursor types.
442 	 */
443 	case 't':
444 		if (WT_PREFIX_MATCH(uri, "table:"))
445 			WT_RET(__wt_curtable_open(
446 			    session, uri, owner, cfg, cursorp));
447 		break;
448 	case 'c':
449 		if (WT_PREFIX_MATCH(uri, "colgroup:")) {
450 			/*
451 			 * Column groups are a special case: open a cursor on
452 			 * the underlying data source.
453 			 */
454 			WT_RET(__wt_schema_get_colgroup(
455 			    session, uri, false, NULL, &colgroup));
456 			WT_RET(__wt_open_cursor(
457 			    session, colgroup->source, owner, cfg, cursorp));
458 		} else if (WT_PREFIX_MATCH(uri, "config:"))
459 			WT_RET(__wt_curconfig_open(
460 			    session, uri, cfg, cursorp));
461 		break;
462 	case 'i':
463 		if (WT_PREFIX_MATCH(uri, "index:"))
464 			WT_RET(__wt_curindex_open(
465 			    session, uri, owner, cfg, cursorp));
466 		break;
467 	case 'j':
468 		if (WT_PREFIX_MATCH(uri, "join:"))
469 			WT_RET(__wt_curjoin_open(
470 			    session, uri, owner, cfg, cursorp));
471 		break;
472 	case 'l':
473 		if (WT_PREFIX_MATCH(uri, "lsm:"))
474 			WT_RET(__wt_clsm_open(
475 			    session, uri, owner, cfg, cursorp));
476 		else if (WT_PREFIX_MATCH(uri, "log:"))
477 			WT_RET(__wt_curlog_open(session, uri, cfg, cursorp));
478 		break;
479 
480 	/*
481 	 * Less common cursor types.
482 	 */
483 	case 'f':
484 		if (WT_PREFIX_MATCH(uri, "file:"))
485 			WT_RET(__wt_curfile_open(
486 			    session, uri, owner, cfg, cursorp));
487 		break;
488 	case 'm':
489 		if (WT_PREFIX_MATCH(uri, WT_METADATA_URI))
490 			WT_RET(__wt_curmetadata_open(
491 			    session, uri, owner, cfg, cursorp));
492 		break;
493 	case 'b':
494 		if (WT_PREFIX_MATCH(uri, "backup:"))
495 			WT_RET(__wt_curbackup_open(
496 			    session, uri, cfg, cursorp));
497 		break;
498 	case 's':
499 		if (WT_PREFIX_MATCH(uri, "statistics:"))
500 			WT_RET(__wt_curstat_open(session, uri, other, cfg,
501 			    cursorp));
502 		break;
503 	default:
504 		break;
505 	}
506 
507 	if (*cursorp == NULL &&
508 	    (dsrc = __wt_schema_get_source(session, uri)) != NULL)
509 		WT_RET(dsrc->open_cursor == NULL ?
510 		    __wt_object_unsupported(session, uri) :
511 		    __wt_curds_open(session, uri, owner, cfg, dsrc, cursorp));
512 
513 	if (*cursorp == NULL)
514 		return (__wt_bad_object_type(session, uri));
515 
516 	if (owner != NULL) {
517 		/*
518 		 * We support caching simple cursors that have no
519 		 * children. If this cursor is a child, we're not going
520 		 * to cache this child or its parent.
521 		 */
522 		F_CLR(owner, WT_CURSTD_CACHEABLE);
523 		F_CLR(*cursorp, WT_CURSTD_CACHEABLE);
524 	}
525 
526 	/*
527 	 * When opening simple tables, the table code calls this function on the
528 	 * underlying data source, in which case the application's URI has been
529 	 * copied.
530 	 */
531 	if ((*cursorp)->uri == NULL &&
532 	    (ret = __wt_strdup(session, uri, &(*cursorp)->uri)) != 0) {
533 		WT_TRET((*cursorp)->close(*cursorp));
534 		*cursorp = NULL;
535 	}
536 
537 	return (ret);
538 }
539 
540 /*
541  * __wt_open_cursor --
542  *	Internal version of WT_SESSION::open_cursor.
543  */
544 int
__wt_open_cursor(WT_SESSION_IMPL * session,const char * uri,WT_CURSOR * owner,const char * cfg[],WT_CURSOR ** cursorp)545 __wt_open_cursor(WT_SESSION_IMPL *session,
546     const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp)
547 {
548 	WT_DECL_RET;
549 
550 	/* We do not cache any subordinate tables/files cursors. */
551 	if (owner == NULL) {
552 		if ((ret = __wt_cursor_cache_get(
553 		    session, uri, NULL, cfg, cursorp)) == 0)
554 			return (0);
555 		WT_RET_NOTFOUND_OK(ret);
556 	}
557 
558 	return (__session_open_cursor_int(session, uri, owner, NULL, cfg,
559 	    cursorp));
560 }
561 
562 /*
563  * __session_open_cursor --
564  *	WT_SESSION->open_cursor method.
565  */
566 static int
__session_open_cursor(WT_SESSION * wt_session,const char * uri,WT_CURSOR * to_dup,const char * config,WT_CURSOR ** cursorp)567 __session_open_cursor(WT_SESSION *wt_session,
568     const char *uri, WT_CURSOR *to_dup, const char *config, WT_CURSOR **cursorp)
569 {
570 	WT_CURSOR *cursor;
571 	WT_DECL_RET;
572 	WT_SESSION_IMPL *session;
573 	bool statjoin;
574 
575 	cursor = *cursorp = NULL;
576 
577 	session = (WT_SESSION_IMPL *)wt_session;
578 	SESSION_API_CALL(session, open_cursor, config, cfg);
579 
580 	statjoin = (to_dup != NULL && uri != NULL &&
581 	    strcmp(uri, "statistics:join") == 0);
582 	if (!statjoin) {
583 		if ((to_dup == NULL && uri == NULL) ||
584 		    (to_dup != NULL && uri != NULL))
585 			WT_ERR_MSG(session, EINVAL,
586 			    "should be passed either a URI or a cursor to "
587 			    "duplicate, but not both");
588 
589 		if ((ret = __wt_cursor_cache_get(
590 		    session, uri, to_dup, cfg, &cursor)) == 0)
591 			goto done;
592 		WT_ERR_NOTFOUND_OK(ret);
593 
594 		if (to_dup != NULL) {
595 			uri = to_dup->uri;
596 			if (!WT_PREFIX_MATCH(uri, "colgroup:") &&
597 			    !WT_PREFIX_MATCH(uri, "index:") &&
598 			    !WT_PREFIX_MATCH(uri, "file:") &&
599 			    !WT_PREFIX_MATCH(uri, "lsm:") &&
600 			    !WT_PREFIX_MATCH(uri, WT_METADATA_URI) &&
601 			    !WT_PREFIX_MATCH(uri, "table:") &&
602 			    __wt_schema_get_source(session, uri) == NULL)
603 				WT_ERR(__wt_bad_object_type(session, uri));
604 		}
605 	}
606 
607 	WT_ERR(__session_open_cursor_int(session, uri, NULL,
608 	    statjoin ? to_dup : NULL, cfg, &cursor));
609 
610 done:
611 	if (to_dup != NULL && !statjoin)
612 		WT_ERR(__wt_cursor_dup_position(to_dup, cursor));
613 
614 	*cursorp = cursor;
615 
616 	if (0) {
617 err:		if (cursor != NULL)
618 			WT_TRET(cursor->close(cursor));
619 	}
620 	/*
621 	 * Opening a cursor on a non-existent data source will set ret to
622 	 * either of ENOENT or WT_NOTFOUND at this point. However,
623 	 * applications may reasonably do this inside a transaction to check
624 	 * for the existence of a table or index.
625 	 *
626 	 * Failure in opening a cursor should not set an error on the
627 	 * transaction and WT_NOTFOUND will be mapped to ENOENT.
628 	 */
629 
630 	API_END_RET_NO_TXN_ERROR(session, ret);
631 }
632 
633 /*
634  * __session_alter --
635  *	Alter a table setting.
636  */
637 static int
__session_alter(WT_SESSION * wt_session,const char * uri,const char * config)638 __session_alter(WT_SESSION *wt_session, const char *uri, const char *config)
639 {
640 	WT_DECL_RET;
641 	WT_SESSION_IMPL *session;
642 
643 	session = (WT_SESSION_IMPL *)wt_session;
644 
645 	SESSION_API_CALL(session, alter, config, cfg);
646 
647 	/* In-memory ignores alter operations. */
648 	if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
649 		goto err;
650 
651 	/* Disallow objects in the WiredTiger name space. */
652 	WT_ERR(__wt_str_name_check(session, uri));
653 
654 	/*
655 	 * We replace the default configuration listing with the current
656 	 * configuration.  Otherwise the defaults for values that can be
657 	 * altered would override settings used by the user in create.
658 	 */
659 	cfg[0] = cfg[1];
660 	cfg[1] = NULL;
661 	WT_WITH_CHECKPOINT_LOCK(session,
662 	    WT_WITH_SCHEMA_LOCK(session,
663 		ret = __wt_schema_alter(session, uri, cfg)));
664 
665 err:
666 	if (ret != 0)
667 		WT_STAT_CONN_INCR(session, session_table_alter_fail);
668 	else
669 		WT_STAT_CONN_INCR(session, session_table_alter_success);
670 	API_END_RET_NOTFOUND_MAP(session, ret);
671 }
672 
673 /*
674  * __session_alter_readonly --
675  *	WT_SESSION->alter method; readonly version.
676  */
677 static int
__session_alter_readonly(WT_SESSION * wt_session,const char * uri,const char * config)678 __session_alter_readonly(
679     WT_SESSION *wt_session, const char *uri, const char *config)
680 {
681 	WT_DECL_RET;
682 	WT_SESSION_IMPL *session;
683 
684 	WT_UNUSED(uri);
685 	WT_UNUSED(config);
686 
687 	session = (WT_SESSION_IMPL *)wt_session;
688 	SESSION_API_CALL_NOCONF(session, alter);
689 
690 	WT_STAT_CONN_INCR(session, session_table_alter_fail);
691 	ret = __wt_session_notsup(session);
692 err:	API_END_RET(session, ret);
693 }
694 
695 /*
696  * __wt_session_create --
697  *	Internal version of WT_SESSION::create.
698  */
699 int
__wt_session_create(WT_SESSION_IMPL * session,const char * uri,const char * config)700 __wt_session_create(
701     WT_SESSION_IMPL *session, const char *uri, const char *config)
702 {
703 	WT_DECL_RET;
704 
705 	WT_WITH_SCHEMA_LOCK(session,
706 	    WT_WITH_TABLE_WRITE_LOCK(session,
707 		ret = __wt_schema_create(session, uri, config)));
708 	return (ret);
709 }
710 
711 /*
712  * __session_create --
713  *	WT_SESSION->create method.
714  */
715 static int
__session_create(WT_SESSION * wt_session,const char * uri,const char * config)716 __session_create(WT_SESSION *wt_session, const char *uri, const char *config)
717 {
718 	WT_CONFIG_ITEM cval;
719 	WT_DECL_RET;
720 	WT_SESSION_IMPL *session;
721 
722 	session = (WT_SESSION_IMPL *)wt_session;
723 	SESSION_API_CALL(session, create, config, cfg);
724 	WT_UNUSED(cfg);
725 
726 	/* Disallow objects in the WiredTiger name space. */
727 	WT_ERR(__wt_str_name_check(session, uri));
728 
729 	/*
730 	 * Type configuration only applies to tables, column groups and indexes.
731 	 * We don't want applications to attempt to layer LSM on top of their
732 	 * extended data-sources, and the fact we allow LSM as a valid URI is an
733 	 * invitation to that mistake: nip it in the bud.
734 	 */
735 	if (!WT_PREFIX_MATCH(uri, "colgroup:") &&
736 	    !WT_PREFIX_MATCH(uri, "index:") &&
737 	    !WT_PREFIX_MATCH(uri, "table:")) {
738 		/*
739 		 * We can't disallow type entirely, a configuration string might
740 		 * innocently include it, for example, a dump/load pair.  If the
741 		 * underlying type is "file", it's OK ("file" is the underlying
742 		 * type for every type); if the URI type prefix and the type are
743 		 * the same, let it go.
744 		 */
745 		if ((ret =
746 		    __wt_config_getones(session, config, "type", &cval)) == 0 &&
747 		    !WT_STRING_MATCH("file", cval.str, cval.len) &&
748 		    (strncmp(uri, cval.str, cval.len) != 0 ||
749 		    uri[cval.len] != ':'))
750 			WT_ERR_MSG(session, EINVAL,
751 			    "%s: unsupported type configuration", uri);
752 		WT_ERR_NOTFOUND_OK(ret);
753 	}
754 
755 	ret = __wt_session_create(session, uri, config);
756 
757 err:
758 	if (ret != 0)
759 		WT_STAT_CONN_INCR(session, session_table_create_fail);
760 	else
761 		WT_STAT_CONN_INCR(session, session_table_create_success);
762 	API_END_RET_NOTFOUND_MAP(session, ret);
763 }
764 
765 /*
766  * __session_create_readonly --
767  *	WT_SESSION->create method; readonly version.
768  */
769 static int
__session_create_readonly(WT_SESSION * wt_session,const char * uri,const char * config)770 __session_create_readonly(
771     WT_SESSION *wt_session, const char *uri, const char *config)
772 {
773 	WT_DECL_RET;
774 	WT_SESSION_IMPL *session;
775 
776 	WT_UNUSED(uri);
777 	WT_UNUSED(config);
778 
779 	session = (WT_SESSION_IMPL *)wt_session;
780 	SESSION_API_CALL_NOCONF(session, create);
781 
782 	WT_STAT_CONN_INCR(session, session_table_create_fail);
783 	ret = __wt_session_notsup(session);
784 err:	API_END_RET(session, ret);
785 }
786 
787 /*
788  * __session_log_flush --
789  *	WT_SESSION->log_flush method.
790  */
791 static int
__session_log_flush(WT_SESSION * wt_session,const char * config)792 __session_log_flush(WT_SESSION *wt_session, const char *config)
793 {
794 	WT_CONFIG_ITEM cval;
795 	WT_CONNECTION_IMPL *conn;
796 	WT_DECL_RET;
797 	WT_SESSION_IMPL *session;
798 	uint32_t flags;
799 
800 	session = (WT_SESSION_IMPL *)wt_session;
801 	SESSION_API_CALL(session, log_flush, config, cfg);
802 	WT_STAT_CONN_INCR(session, log_flush);
803 
804 	conn = S2C(session);
805 	flags = 0;
806 	/*
807 	 * If logging is not enabled there is nothing to do.
808 	 */
809 	if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
810 		WT_ERR_MSG(session, EINVAL, "logging not enabled");
811 
812 	WT_ERR(__wt_config_gets_def(session, cfg, "sync", 0, &cval));
813 	if (WT_STRING_MATCH("background", cval.str, cval.len))
814 		flags = WT_LOG_BACKGROUND;
815 	else if (WT_STRING_MATCH("off", cval.str, cval.len))
816 		flags = WT_LOG_FLUSH;
817 	else if (WT_STRING_MATCH("on", cval.str, cval.len))
818 		flags = WT_LOG_FSYNC;
819 	ret = __wt_log_flush(session, flags);
820 
821 err:	API_END_RET(session, ret);
822 }
823 
824 /*
825  * __session_log_flush_readonly --
826  *	WT_SESSION->log_flush method; readonly version.
827  */
828 static int
__session_log_flush_readonly(WT_SESSION * wt_session,const char * config)829 __session_log_flush_readonly(WT_SESSION *wt_session, const char *config)
830 {
831 	WT_DECL_RET;
832 	WT_SESSION_IMPL *session;
833 
834 	WT_UNUSED(config);
835 
836 	session = (WT_SESSION_IMPL *)wt_session;
837 	SESSION_API_CALL_NOCONF(session, log_flush);
838 
839 	ret = __wt_session_notsup(session);
840 err:	API_END_RET(session, ret);
841 }
842 
843 /*
844  * __session_log_printf --
845  *	WT_SESSION->log_printf method.
846  */
847 static int
__session_log_printf(WT_SESSION * wt_session,const char * fmt,...)848 __session_log_printf(WT_SESSION *wt_session, const char *fmt, ...)
849     WT_GCC_FUNC_ATTRIBUTE((format (printf, 2, 3)))
850 {
851 	WT_DECL_RET;
852 	WT_SESSION_IMPL *session;
853 	va_list ap;
854 
855 	session = (WT_SESSION_IMPL *)wt_session;
856 	SESSION_API_CALL_NOCONF_PREPARE_NOT_ALLOWED(session, log_printf);
857 
858 	va_start(ap, fmt);
859 	ret = __wt_log_vprintf(session, fmt, ap);
860 	va_end(ap);
861 
862 err:	API_END_RET(session, ret);
863 }
864 
865 /*
866  * __session_log_printf_readonly --
867  *	WT_SESSION->log_printf method; readonly version.
868  */
869 static int
__session_log_printf_readonly(WT_SESSION * wt_session,const char * fmt,...)870 __session_log_printf_readonly(WT_SESSION *wt_session, const char *fmt, ...)
871     WT_GCC_FUNC_ATTRIBUTE((format (printf, 2, 3)))
872 {
873 	WT_DECL_RET;
874 	WT_SESSION_IMPL *session;
875 
876 	WT_UNUSED(fmt);
877 
878 	session = (WT_SESSION_IMPL *)wt_session;
879 	SESSION_API_CALL_NOCONF(session, log_printf);
880 
881 	ret = __wt_session_notsup(session);
882 err:	API_END_RET(session, ret);
883 }
884 
885 /*
886  * __session_rebalance --
887  *	WT_SESSION->rebalance method.
888  */
889 static int
__session_rebalance(WT_SESSION * wt_session,const char * uri,const char * config)890 __session_rebalance(WT_SESSION *wt_session, const char *uri, const char *config)
891 {
892 	WT_DECL_RET;
893 	WT_SESSION_IMPL *session;
894 
895 	session = (WT_SESSION_IMPL *)wt_session;
896 
897 	SESSION_API_CALL(session, rebalance, config, cfg);
898 
899 	/* In-memory ignores rebalance operations. */
900 	if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
901 		goto err;
902 
903 	/* Block out checkpoints to avoid spurious EBUSY errors. */
904 	WT_WITH_CHECKPOINT_LOCK(session,
905 	    WT_WITH_SCHEMA_LOCK(session,
906 		ret = __wt_schema_worker(session, uri, __wt_bt_rebalance,
907 		NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_REBALANCE)));
908 
909 err:
910 	if (ret != 0)
911 		WT_STAT_CONN_INCR(session, session_table_rebalance_fail);
912 	else
913 		WT_STAT_CONN_INCR(session, session_table_rebalance_success);
914 	API_END_RET_NOTFOUND_MAP(session, ret);
915 }
916 
917 /*
918  * __session_rebalance_readonly --
919  *	WT_SESSION->rebalance method; readonly version.
920  */
921 static int
__session_rebalance_readonly(WT_SESSION * wt_session,const char * uri,const char * config)922 __session_rebalance_readonly(
923     WT_SESSION *wt_session, const char *uri, const char *config)
924 {
925 	WT_DECL_RET;
926 	WT_SESSION_IMPL *session;
927 
928 	WT_UNUSED(uri);
929 	WT_UNUSED(config);
930 
931 	session = (WT_SESSION_IMPL *)wt_session;
932 	SESSION_API_CALL_NOCONF(session, rebalance);
933 
934 	WT_STAT_CONN_INCR(session, session_table_rebalance_fail);
935 	ret = __wt_session_notsup(session);
936 err:	API_END_RET(session, ret);
937 }
938 
939 /*
940  * __session_rename --
941  *	WT_SESSION->rename method.
942  */
943 static int
__session_rename(WT_SESSION * wt_session,const char * uri,const char * newuri,const char * config)944 __session_rename(WT_SESSION *wt_session,
945     const char *uri, const char *newuri, const char *config)
946 {
947 	WT_DECL_RET;
948 	WT_SESSION_IMPL *session;
949 
950 	session = (WT_SESSION_IMPL *)wt_session;
951 	SESSION_API_CALL(session, rename, config, cfg);
952 
953 	/* Disallow objects in the WiredTiger name space. */
954 	WT_ERR(__wt_str_name_check(session, uri));
955 	WT_ERR(__wt_str_name_check(session, newuri));
956 
957 	WT_WITH_CHECKPOINT_LOCK(session,
958 	    WT_WITH_SCHEMA_LOCK(session,
959 		WT_WITH_TABLE_WRITE_LOCK(session,
960 		    ret = __wt_schema_rename(session, uri, newuri, cfg))));
961 err:
962 	if (ret != 0)
963 		WT_STAT_CONN_INCR(session, session_table_rename_fail);
964 	else
965 		WT_STAT_CONN_INCR(session, session_table_rename_success);
966 	API_END_RET_NOTFOUND_MAP(session, ret);
967 }
968 
969 /*
970  * __session_rename_readonly --
971  *	WT_SESSION->rename method; readonly version.
972  */
973 static int
__session_rename_readonly(WT_SESSION * wt_session,const char * uri,const char * newuri,const char * config)974 __session_rename_readonly(WT_SESSION *wt_session,
975     const char *uri, const char *newuri, const char *config)
976 {
977 	WT_DECL_RET;
978 	WT_SESSION_IMPL *session;
979 
980 	WT_UNUSED(uri);
981 	WT_UNUSED(newuri);
982 	WT_UNUSED(config);
983 
984 	session = (WT_SESSION_IMPL *)wt_session;
985 	SESSION_API_CALL_NOCONF(session, rename);
986 
987 	WT_STAT_CONN_INCR(session, session_table_rename_fail);
988 	ret = __wt_session_notsup(session);
989 err:	API_END_RET(session, ret);
990 }
991 
992 /*
993  * __session_reset --
994  *	WT_SESSION->reset method.
995  */
996 static int
__session_reset(WT_SESSION * wt_session)997 __session_reset(WT_SESSION *wt_session)
998 {
999 	WT_DECL_RET;
1000 	WT_SESSION_IMPL *session;
1001 
1002 	session = (WT_SESSION_IMPL *)wt_session;
1003 
1004 	SESSION_API_CALL_NOCONF(session, reset);
1005 
1006 	WT_ERR(__wt_txn_context_check(session, false));
1007 
1008 	WT_TRET(__wt_session_reset_cursors(session, true));
1009 
1010 	WT_TRET(__wt_session_cursor_cache_sweep(session));
1011 
1012 	/* Release common session resources. */
1013 	WT_TRET(__wt_session_release_resources(session));
1014 
1015 err:	API_END_RET_NOTFOUND_MAP(session, ret);
1016 }
1017 
1018 /*
1019  * __session_drop --
1020  *	WT_SESSION->drop method.
1021  */
1022 static int
__session_drop(WT_SESSION * wt_session,const char * uri,const char * config)1023 __session_drop(WT_SESSION *wt_session, const char *uri, const char *config)
1024 {
1025 	WT_CONFIG_ITEM cval;
1026 	WT_DECL_RET;
1027 	WT_SESSION_IMPL *session;
1028 	bool checkpoint_wait, lock_wait;
1029 
1030 	session = (WT_SESSION_IMPL *)wt_session;
1031 	SESSION_API_CALL(session, drop, config, cfg);
1032 
1033 	/* Disallow objects in the WiredTiger name space. */
1034 	WT_ERR(__wt_str_name_check(session, uri));
1035 
1036 	WT_ERR(__wt_config_gets_def(session, cfg, "checkpoint_wait", 1, &cval));
1037 	checkpoint_wait = cval.val != 0;
1038 	WT_ERR(__wt_config_gets_def(session, cfg, "lock_wait", 1, &cval));
1039 	lock_wait = cval.val != 0;
1040 
1041 	/*
1042 	 * Take the checkpoint lock if there is a need to prevent the drop
1043 	 * operation from failing with EBUSY due to an ongoing checkpoint.
1044 	 */
1045 	if (checkpoint_wait) {
1046 		if (lock_wait)
1047 			WT_WITH_CHECKPOINT_LOCK(session,
1048 			    WT_WITH_SCHEMA_LOCK(session,
1049 				WT_WITH_TABLE_WRITE_LOCK(session, ret =
1050 				    __wt_schema_drop(session, uri, cfg))));
1051 		else
1052 			WT_WITH_CHECKPOINT_LOCK_NOWAIT(session, ret,
1053 			    WT_WITH_SCHEMA_LOCK_NOWAIT(session, ret,
1054 				WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret,
1055 				    ret =
1056 				    __wt_schema_drop(session, uri, cfg))));
1057 	} else {
1058 		if (lock_wait)
1059 			WT_WITH_SCHEMA_LOCK(session,
1060 			    WT_WITH_TABLE_WRITE_LOCK(session,
1061 				ret = __wt_schema_drop(session, uri, cfg)));
1062 		else
1063 			WT_WITH_SCHEMA_LOCK_NOWAIT(session, ret,
1064 			    WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret,
1065 				ret = __wt_schema_drop(session, uri, cfg)));
1066 	}
1067 
1068 err:
1069 	if (ret != 0)
1070 		WT_STAT_CONN_INCR(session, session_table_drop_fail);
1071 	else
1072 		WT_STAT_CONN_INCR(session, session_table_drop_success);
1073 
1074 	/* Note: drop operations cannot be unrolled (yet?). */
1075 	API_END_RET_NOTFOUND_MAP(session, ret);
1076 }
1077 
1078 /*
1079  * __session_drop_readonly --
1080  *	WT_SESSION->drop method; readonly version.
1081  */
1082 static int
__session_drop_readonly(WT_SESSION * wt_session,const char * uri,const char * config)1083 __session_drop_readonly(
1084     WT_SESSION *wt_session, const char *uri, const char *config)
1085 {
1086 	WT_DECL_RET;
1087 	WT_SESSION_IMPL *session;
1088 
1089 	WT_UNUSED(uri);
1090 	WT_UNUSED(config);
1091 
1092 	session = (WT_SESSION_IMPL *)wt_session;
1093 	SESSION_API_CALL_NOCONF(session, drop);
1094 
1095 	WT_STAT_CONN_INCR(session, session_table_drop_fail);
1096 	ret = __wt_session_notsup(session);
1097 err:	API_END_RET(session, ret);
1098 }
1099 
1100 /*
1101  * __session_join --
1102  *	WT_SESSION->join method.
1103  */
1104 static int
__session_join(WT_SESSION * wt_session,WT_CURSOR * join_cursor,WT_CURSOR * ref_cursor,const char * config)1105 __session_join(WT_SESSION *wt_session, WT_CURSOR *join_cursor,
1106     WT_CURSOR *ref_cursor, const char *config)
1107 {
1108 	WT_CONFIG_ITEM cval;
1109 	WT_CURSOR *firstcg;
1110 	WT_CURSOR_INDEX *cindex;
1111 	WT_CURSOR_JOIN *cjoin;
1112 	WT_CURSOR_TABLE *ctable;
1113 	WT_DECL_RET;
1114 	WT_INDEX *idx;
1115 	WT_SESSION_IMPL *session;
1116 	WT_TABLE *table;
1117 	uint64_t count;
1118 	uint32_t bloom_bit_count, bloom_hash_count;
1119 	uint8_t flags, range;
1120 	bool nested;
1121 
1122 	session = (WT_SESSION_IMPL *)wt_session;
1123 	SESSION_API_CALL(session, join, config, cfg);
1124 
1125 	firstcg = NULL;
1126 	table = NULL;
1127 	nested = false;
1128 	count = 0;
1129 
1130 	if (!WT_PREFIX_MATCH(join_cursor->uri, "join:"))
1131 		WT_ERR_MSG(session, EINVAL, "not a join cursor");
1132 
1133 	if (WT_PREFIX_MATCH(ref_cursor->uri, "index:")) {
1134 		cindex = (WT_CURSOR_INDEX *)ref_cursor;
1135 		idx = cindex->index;
1136 		table = cindex->table;
1137 		firstcg = cindex->cg_cursors[0];
1138 	} else if (WT_PREFIX_MATCH(ref_cursor->uri, "table:")) {
1139 		idx = NULL;
1140 		ctable = (WT_CURSOR_TABLE *)ref_cursor;
1141 		table = ctable->table;
1142 		firstcg = ctable->cg_cursors[0];
1143 	} else if (WT_PREFIX_MATCH(ref_cursor->uri, "join:")) {
1144 		idx = NULL;
1145 		table = ((WT_CURSOR_JOIN *)ref_cursor)->table;
1146 		nested = true;
1147 	} else
1148 		WT_ERR_MSG(session, EINVAL,
1149 		    "ref_cursor must be an index, table or join cursor");
1150 
1151 	if (firstcg != NULL && !F_ISSET(firstcg, WT_CURSTD_KEY_SET))
1152 		WT_ERR_MSG(session, EINVAL,
1153 		    "requires reference cursor be positioned");
1154 	cjoin = (WT_CURSOR_JOIN *)join_cursor;
1155 	if (cjoin->table != table)
1156 		WT_ERR_MSG(session, EINVAL,
1157 		    "table for join cursor does not match table for "
1158 		    "ref_cursor");
1159 	if (F_ISSET(ref_cursor, WT_CURSTD_JOINED))
1160 		WT_ERR_MSG(session, EINVAL, "cursor already used in a join");
1161 
1162 	/* "ge" is the default */
1163 	range = WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ;
1164 	flags = 0;
1165 	WT_ERR(__wt_config_gets(session, cfg, "compare", &cval));
1166 	if (cval.len != 0) {
1167 		if (WT_STRING_MATCH("gt", cval.str, cval.len))
1168 			range = WT_CURJOIN_END_GT;
1169 		else if (WT_STRING_MATCH("lt", cval.str, cval.len))
1170 			range = WT_CURJOIN_END_LT;
1171 		else if (WT_STRING_MATCH("le", cval.str, cval.len))
1172 			range = WT_CURJOIN_END_LE;
1173 		else if (WT_STRING_MATCH("eq", cval.str, cval.len))
1174 			range = WT_CURJOIN_END_EQ;
1175 		else if (!WT_STRING_MATCH("ge", cval.str, cval.len))
1176 			WT_ERR_MSG(session, EINVAL,
1177 			    "compare=%.*s not supported",
1178 			    (int)cval.len, cval.str);
1179 	}
1180 	WT_ERR(__wt_config_gets(session, cfg, "count", &cval));
1181 	if (cval.len != 0)
1182 		count = (uint64_t)cval.val;
1183 
1184 	WT_ERR(__wt_config_gets(session, cfg, "strategy", &cval));
1185 	if (cval.len != 0) {
1186 		if (WT_STRING_MATCH("bloom", cval.str, cval.len))
1187 			LF_SET(WT_CURJOIN_ENTRY_BLOOM);
1188 		else if (!WT_STRING_MATCH("default", cval.str, cval.len))
1189 			WT_ERR_MSG(session, EINVAL,
1190 			    "strategy=%.*s not supported",
1191 			    (int)cval.len, cval.str);
1192 	}
1193 	WT_ERR(__wt_config_gets(session, cfg, "bloom_bit_count", &cval));
1194 	if ((uint64_t)cval.val > UINT32_MAX)
1195 		WT_ERR_MSG(session, EINVAL, "bloom_bit_count: value too large");
1196 	bloom_bit_count = (uint32_t)cval.val;
1197 	WT_ERR(__wt_config_gets(session, cfg, "bloom_hash_count", &cval));
1198 	if ((uint64_t)cval.val > UINT32_MAX)
1199 		WT_ERR_MSG(session, EINVAL,
1200 		    "bloom_hash_count: value too large");
1201 	bloom_hash_count = (uint32_t)cval.val;
1202 	if (LF_ISSET(WT_CURJOIN_ENTRY_BLOOM) && count == 0)
1203 		WT_ERR_MSG(session, EINVAL,
1204 		    "count must be nonzero when strategy=bloom");
1205 	WT_ERR(__wt_config_gets_def(
1206 	    session, cfg, "bloom_false_positives", 0, &cval));
1207 	if (cval.val != 0)
1208 		LF_SET(WT_CURJOIN_ENTRY_FALSE_POSITIVES);
1209 
1210 	WT_ERR(__wt_config_gets(session, cfg, "operation", &cval));
1211 	if (cval.len != 0 && WT_STRING_MATCH("or", cval.str, cval.len))
1212 		LF_SET(WT_CURJOIN_ENTRY_DISJUNCTION);
1213 
1214 	if (nested && (count != 0 || range != WT_CURJOIN_END_EQ ||
1215 	    LF_ISSET(WT_CURJOIN_ENTRY_BLOOM)))
1216 		WT_ERR_MSG(session, EINVAL,
1217 		    "joining a nested join cursor is incompatible with "
1218 		    "setting \"strategy\", \"compare\" or \"count\"");
1219 
1220 	WT_ERR(__wt_curjoin_join(session, cjoin, idx, ref_cursor, flags,
1221 	    range, count, bloom_bit_count, bloom_hash_count));
1222 	/*
1223 	 * There's an implied ownership ordering that isn't
1224 	 * known when the cursors are created: the join cursor
1225 	 * must be closed before any of the indices.  Enforce
1226 	 * that here by reordering.
1227 	 */
1228 	if (TAILQ_FIRST(&session->cursors) != join_cursor) {
1229 		TAILQ_REMOVE(&session->cursors, join_cursor, q);
1230 		TAILQ_INSERT_HEAD(&session->cursors, join_cursor, q);
1231 	}
1232 	/* Disable the reference cursor for regular operations */
1233 	F_SET(ref_cursor, WT_CURSTD_JOINED);
1234 
1235 err:	API_END_RET_NOTFOUND_MAP(session, ret);
1236 }
1237 
1238 /*
1239  * __session_salvage --
1240  *	WT_SESSION->salvage method.
1241  */
1242 static int
__session_salvage(WT_SESSION * wt_session,const char * uri,const char * config)1243 __session_salvage(WT_SESSION *wt_session, const char *uri, const char *config)
1244 {
1245 	WT_DECL_RET;
1246 	WT_SESSION_IMPL *session;
1247 
1248 	session = (WT_SESSION_IMPL *)wt_session;
1249 
1250 	SESSION_API_CALL(session, salvage, config, cfg);
1251 
1252 	WT_ERR(__wt_inmem_unsupported_op(session, NULL));
1253 
1254 	/* Block out checkpoints to avoid spurious EBUSY errors. */
1255 	WT_WITH_CHECKPOINT_LOCK(session,
1256 	    WT_WITH_SCHEMA_LOCK(session,
1257 		ret = __wt_schema_worker(session, uri, __wt_salvage,
1258 		NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_SALVAGE)));
1259 
1260 err:
1261 	if (ret != 0)
1262 		WT_STAT_CONN_INCR(session, session_table_salvage_fail);
1263 	else
1264 		WT_STAT_CONN_INCR(session, session_table_salvage_success);
1265 	API_END_RET_NOTFOUND_MAP(session, ret);
1266 }
1267 
1268 /*
1269  * __session_salvage_readonly --
1270  *	WT_SESSION->salvage method; readonly version.
1271  */
1272 static int
__session_salvage_readonly(WT_SESSION * wt_session,const char * uri,const char * config)1273 __session_salvage_readonly(
1274     WT_SESSION *wt_session, const char *uri, const char *config)
1275 {
1276 	WT_DECL_RET;
1277 	WT_SESSION_IMPL *session;
1278 
1279 	WT_UNUSED(uri);
1280 	WT_UNUSED(config);
1281 
1282 	session = (WT_SESSION_IMPL *)wt_session;
1283 	SESSION_API_CALL_NOCONF(session, salvage);
1284 
1285 	WT_STAT_CONN_INCR(session, session_table_salvage_fail);
1286 	ret = __wt_session_notsup(session);
1287 err:	API_END_RET(session, ret);
1288 }
1289 
1290 /*
1291  * __wt_session_range_truncate --
1292  *	Session handling of a range truncate.
1293  */
1294 int
__wt_session_range_truncate(WT_SESSION_IMPL * session,const char * uri,WT_CURSOR * start,WT_CURSOR * stop)1295 __wt_session_range_truncate(WT_SESSION_IMPL *session,
1296     const char *uri, WT_CURSOR *start, WT_CURSOR *stop)
1297 {
1298 	WT_DECL_RET;
1299 	int cmp;
1300 	bool local_start;
1301 
1302 	local_start = false;
1303 	if (uri != NULL) {
1304 		WT_ASSERT(session, WT_PREFIX_MATCH(uri, "file:"));
1305 		/*
1306 		 * A URI file truncate becomes a range truncate where we
1307 		 * set a start cursor at the beginning.  We already
1308 		 * know the NULL stop goes to the end of the range.
1309 		 */
1310 		WT_ERR(__session_open_cursor(
1311 		    (WT_SESSION *)session, uri, NULL, NULL, &start));
1312 		local_start = true;
1313 		ret = start->next(start);
1314 		if (ret == WT_NOTFOUND) {
1315 			/*
1316 			 * If there are no elements, there is nothing
1317 			 * to do.
1318 			 */
1319 			ret = 0;
1320 			goto done;
1321 		}
1322 		WT_ERR(ret);
1323 	}
1324 
1325 	/*
1326 	 * Cursor truncate is only supported for some objects, check for a
1327 	 * supporting compare method.
1328 	 */
1329 	if (start != NULL && start->compare == NULL)
1330 		WT_ERR(__wt_bad_object_type(session, start->uri));
1331 	if (stop != NULL && stop->compare == NULL)
1332 		WT_ERR(__wt_bad_object_type(session, stop->uri));
1333 
1334 	/*
1335 	 * If both cursors set, check they're correctly ordered with respect to
1336 	 * each other.  We have to test this before any search, the search can
1337 	 * change the initial cursor position.
1338 	 *
1339 	 * Rather happily, the compare routine will also confirm the cursors
1340 	 * reference the same object and the keys are set.
1341 	 *
1342 	 * The test for a NULL start comparison function isn't necessary (we
1343 	 * checked it above), but it quiets clang static analysis complaints.
1344 	 */
1345 	if (start != NULL && stop != NULL && start->compare != NULL) {
1346 		WT_ERR(start->compare(start, stop, &cmp));
1347 		if (cmp > 0)
1348 			WT_ERR_MSG(session, EINVAL,
1349 			    "the start cursor position is after the stop "
1350 			    "cursor position");
1351 	}
1352 
1353 	/*
1354 	 * Truncate does not require keys actually exist so that applications
1355 	 * can discard parts of the object's name space without knowing exactly
1356 	 * what records currently appear in the object.  For this reason, do a
1357 	 * search-near, rather than a search.  Additionally, we have to correct
1358 	 * after calling search-near, to position the start/stop cursors on the
1359 	 * next record greater than/less than the original key. If we fail to
1360 	 * find a key in a search-near, there are no keys in the table. If we
1361 	 * fail to move forward or backward in a range, there are no keys in
1362 	 * the range. In either of those cases, we're done.
1363 	 */
1364 	if (start != NULL)
1365 		if ((ret = start->search_near(start, &cmp)) != 0 ||
1366 		    (cmp < 0 && (ret = start->next(start)) != 0)) {
1367 			WT_ERR_NOTFOUND_OK(ret);
1368 			goto done;
1369 		}
1370 	if (stop != NULL)
1371 		if ((ret = stop->search_near(stop, &cmp)) != 0 ||
1372 		    (cmp > 0 && (ret = stop->prev(stop)) != 0)) {
1373 			WT_ERR_NOTFOUND_OK(ret);
1374 			goto done;
1375 		}
1376 
1377 	/*
1378 	 * We always truncate in the forward direction because the underlying
1379 	 * data structures can move through pages faster forward than backward.
1380 	 * If we don't have a start cursor, create one and position it at the
1381 	 * first record.
1382 	 *
1383 	 * If start is NULL, stop must not be NULL, but static analyzers have
1384 	 * a hard time with that, test explicitly.
1385 	 */
1386 	if (start == NULL && stop != NULL) {
1387 		WT_ERR(__session_open_cursor(
1388 		    (WT_SESSION *)session, stop->uri, NULL, NULL, &start));
1389 		local_start = true;
1390 		WT_ERR(start->next(start));
1391 	}
1392 
1393 	/*
1394 	 * If the start/stop keys cross, we're done, the range must be empty.
1395 	 */
1396 	if (stop != NULL) {
1397 		WT_ERR(start->compare(start, stop, &cmp));
1398 		if (cmp > 0)
1399 			goto done;
1400 	}
1401 
1402 	WT_ERR(__wt_schema_range_truncate(session, start, stop));
1403 
1404 done:
1405 err:	/*
1406 	 * Close any locally-opened start cursor.
1407 	 *
1408 	 * Reset application cursors, they've possibly moved and the
1409 	 * application cannot use them.  Note that we can make it here with a
1410 	 * NULL start cursor (e.g., if the truncate range is empty).
1411 	 */
1412 	if (local_start)
1413 		WT_TRET(start->close(start));
1414 	else if (start != NULL)
1415 		WT_TRET(start->reset(start));
1416 	if (stop != NULL)
1417 		WT_TRET(stop->reset(stop));
1418 	return (ret);
1419 }
1420 
1421 /*
1422  * __session_truncate --
1423  *	WT_SESSION->truncate method.
1424  */
1425 static int
__session_truncate(WT_SESSION * wt_session,const char * uri,WT_CURSOR * start,WT_CURSOR * stop,const char * config)1426 __session_truncate(WT_SESSION *wt_session,
1427     const char *uri, WT_CURSOR *start, WT_CURSOR *stop, const char *config)
1428 {
1429 	WT_DECL_RET;
1430 	WT_SESSION_IMPL *session;
1431 
1432 	session = (WT_SESSION_IMPL *)wt_session;
1433 	SESSION_TXN_API_CALL(session, truncate, config, cfg);
1434 	WT_STAT_CONN_INCR(session, cursor_truncate);
1435 
1436 	/*
1437 	 * If the URI is specified, we don't need a start/stop, if start/stop
1438 	 * is specified, we don't need a URI.  One exception is the log URI
1439 	 * which may truncate (archive) log files for a backup cursor.
1440 	 *
1441 	 * If no URI is specified, and both cursors are specified, start/stop
1442 	 * must reference the same object.
1443 	 *
1444 	 * Any specified cursor must have been initialized.
1445 	 */
1446 	if ((uri == NULL && start == NULL && stop == NULL) ||
1447 	    (uri != NULL && !WT_PREFIX_MATCH(uri, "log:") &&
1448 	    (start != NULL || stop != NULL)))
1449 		WT_ERR_MSG(session, EINVAL,
1450 		    "the truncate method should be passed either a URI or "
1451 		    "start/stop cursors, but not both");
1452 
1453 	if (uri != NULL) {
1454 		/* Disallow objects in the WiredTiger name space. */
1455 		WT_ERR(__wt_str_name_check(session, uri));
1456 
1457 		if (WT_PREFIX_MATCH(uri, "log:")) {
1458 			/*
1459 			 * Verify the user only gave the URI prefix and not
1460 			 * a specific target name after that.
1461 			 */
1462 			if (strcmp(uri, "log:") != 0)
1463 				WT_ERR_MSG(session, EINVAL,
1464 				    "the truncate method should not specify any"
1465 				    "target after the log: URI prefix");
1466 			WT_ERR(__wt_log_truncate_files(session, start, false));
1467 		} else if (WT_PREFIX_MATCH(uri, "file:"))
1468 			WT_ERR(__wt_session_range_truncate(
1469 			    session, uri, start, stop));
1470 		else
1471 			/* Wait for checkpoints to avoid EBUSY errors. */
1472 			WT_WITH_CHECKPOINT_LOCK(session,
1473 			    WT_WITH_SCHEMA_LOCK(session,
1474 				ret = __wt_schema_truncate(session, uri, cfg)));
1475 	} else
1476 		WT_ERR(__wt_session_range_truncate(session, uri, start, stop));
1477 
1478 err:	TXN_API_END_RETRY(session, ret, 0);
1479 
1480 	if (ret != 0)
1481 		WT_STAT_CONN_INCR(session, session_table_truncate_fail);
1482 	else
1483 		WT_STAT_CONN_INCR(session, session_table_truncate_success);
1484 	/*
1485 	 * Only map WT_NOTFOUND to ENOENT if a URI was specified.
1486 	 */
1487 	return (ret == WT_NOTFOUND && uri != NULL ? ENOENT : ret);
1488 }
1489 
1490 /*
1491  * __session_truncate_readonly --
1492  *	WT_SESSION->truncate method; readonly version.
1493  */
1494 static int
__session_truncate_readonly(WT_SESSION * wt_session,const char * uri,WT_CURSOR * start,WT_CURSOR * stop,const char * config)1495 __session_truncate_readonly(WT_SESSION *wt_session,
1496     const char *uri, WT_CURSOR *start, WT_CURSOR *stop, const char *config)
1497 {
1498 	WT_DECL_RET;
1499 	WT_SESSION_IMPL *session;
1500 
1501 	WT_UNUSED(uri);
1502 	WT_UNUSED(start);
1503 	WT_UNUSED(stop);
1504 	WT_UNUSED(config);
1505 
1506 	session = (WT_SESSION_IMPL *)wt_session;
1507 	SESSION_API_CALL_NOCONF(session, truncate);
1508 
1509 	WT_STAT_CONN_INCR(session, session_table_truncate_fail);
1510 	ret = __wt_session_notsup(session);
1511 err:	API_END_RET(session, ret);
1512 }
1513 
1514 /*
1515  * __session_upgrade --
1516  *	WT_SESSION->upgrade method.
1517  */
1518 static int
__session_upgrade(WT_SESSION * wt_session,const char * uri,const char * config)1519 __session_upgrade(WT_SESSION *wt_session, const char *uri, const char *config)
1520 {
1521 	WT_DECL_RET;
1522 	WT_SESSION_IMPL *session;
1523 
1524 	session = (WT_SESSION_IMPL *)wt_session;
1525 
1526 	SESSION_API_CALL(session, upgrade, config, cfg);
1527 
1528 	WT_ERR(__wt_inmem_unsupported_op(session, NULL));
1529 
1530 	/* Block out checkpoints to avoid spurious EBUSY errors. */
1531 	WT_WITH_CHECKPOINT_LOCK(session,
1532 	    WT_WITH_SCHEMA_LOCK(session,
1533 		ret = __wt_schema_worker(session, uri, __wt_upgrade,
1534 		NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_UPGRADE)));
1535 
1536 err:	API_END_RET_NOTFOUND_MAP(session, ret);
1537 }
1538 
1539 /*
1540  * __session_upgrade_readonly --
1541  *	WT_SESSION->upgrade method; readonly version.
1542  */
1543 static int
__session_upgrade_readonly(WT_SESSION * wt_session,const char * uri,const char * config)1544 __session_upgrade_readonly(
1545     WT_SESSION *wt_session, const char *uri, const char *config)
1546 {
1547 	WT_DECL_RET;
1548 	WT_SESSION_IMPL *session;
1549 
1550 	WT_UNUSED(uri);
1551 	WT_UNUSED(config);
1552 
1553 	session = (WT_SESSION_IMPL *)wt_session;
1554 	SESSION_API_CALL_NOCONF(session, upgrade);
1555 
1556 	ret = __wt_session_notsup(session);
1557 err:	API_END_RET(session, ret);
1558 }
1559 
1560 /*
1561  * __session_verify --
1562  *	WT_SESSION->verify method.
1563  */
1564 static int
__session_verify(WT_SESSION * wt_session,const char * uri,const char * config)1565 __session_verify(WT_SESSION *wt_session, const char *uri, const char *config)
1566 {
1567 	WT_DECL_RET;
1568 	WT_SESSION_IMPL *session;
1569 
1570 	session = (WT_SESSION_IMPL *)wt_session;
1571 
1572 	SESSION_API_CALL(session, verify, config, cfg);
1573 
1574 	WT_ERR(__wt_inmem_unsupported_op(session, NULL));
1575 
1576 	/* Block out checkpoints to avoid spurious EBUSY errors. */
1577 	WT_WITH_CHECKPOINT_LOCK(session,
1578 	    WT_WITH_SCHEMA_LOCK(session,
1579 		ret = __wt_schema_worker(session, uri, __wt_verify,
1580 		NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_VERIFY)));
1581 
1582 err:
1583 	if (ret != 0)
1584 		WT_STAT_CONN_INCR(session, session_table_verify_fail);
1585 	else
1586 		WT_STAT_CONN_INCR(session, session_table_verify_success);
1587 	API_END_RET_NOTFOUND_MAP(session, ret);
1588 }
1589 
1590 /*
1591  * __session_begin_transaction --
1592  *	WT_SESSION->begin_transaction method.
1593  */
1594 static int
__session_begin_transaction(WT_SESSION * wt_session,const char * config)1595 __session_begin_transaction(WT_SESSION *wt_session, const char *config)
1596 {
1597 	WT_DECL_RET;
1598 	WT_SESSION_IMPL *session;
1599 
1600 	session = (WT_SESSION_IMPL *)wt_session;
1601 	/*
1602 	 * Indicated as allowed in prepared state, even though not allowed,
1603 	 * so that running transaction check below take precedence.
1604 	 */
1605 	SESSION_API_CALL_PREPARE_ALLOWED(
1606 	    session, begin_transaction, config, cfg);
1607 	WT_STAT_CONN_INCR(session, txn_begin);
1608 
1609 	WT_ERR(__wt_txn_context_check(session, false));
1610 
1611 	ret = __wt_txn_begin(session, cfg);
1612 
1613 err:	API_END_RET(session, ret);
1614 }
1615 
1616 /*
1617  * __session_commit_transaction --
1618  *	WT_SESSION->commit_transaction method.
1619  */
1620 static int
__session_commit_transaction(WT_SESSION * wt_session,const char * config)1621 __session_commit_transaction(WT_SESSION *wt_session, const char *config)
1622 {
1623 	WT_DECL_RET;
1624 	WT_SESSION_IMPL *session;
1625 	WT_TXN *txn;
1626 
1627 	session = (WT_SESSION_IMPL *)wt_session;
1628 	SESSION_API_CALL_PREPARE_ALLOWED(
1629 	    session, commit_transaction, config, cfg);
1630 	WT_STAT_CONN_INCR(session, txn_commit);
1631 
1632 	txn = &session->txn;
1633 	if (F_ISSET(txn, WT_TXN_PREPARE)) {
1634 		WT_STAT_CONN_INCR(session, txn_prepare_commit);
1635 		WT_STAT_CONN_DECR(session, txn_prepare_active);
1636 	}
1637 
1638 	WT_ERR(__wt_txn_context_check(session, true));
1639 
1640 	if (F_ISSET(txn, WT_TXN_ERROR) && txn->mod_count != 0)
1641 		WT_ERR_MSG(session, EINVAL,
1642 		    "failed transaction requires rollback%s%s",
1643 		    txn->rollback_reason == NULL ? "" : ": ",
1644 		    txn->rollback_reason == NULL ? "" : txn->rollback_reason);
1645 
1646 	if (ret == 0)
1647 		ret = __wt_txn_commit(session, cfg);
1648 	else {
1649 		WT_TRET(__wt_session_reset_cursors(session, false));
1650 		WT_TRET(__wt_txn_rollback(session, cfg));
1651 	}
1652 
1653 err:	API_END_RET(session, ret);
1654 }
1655 
1656 /*
1657  * __session_prepare_transaction --
1658  *	WT_SESSION->prepare_transaction method.
1659  */
1660 static int
__session_prepare_transaction(WT_SESSION * wt_session,const char * config)1661 __session_prepare_transaction(WT_SESSION *wt_session, const char *config)
1662 {
1663 	WT_DECL_RET;
1664 	WT_SESSION_IMPL *session;
1665 	WT_TXN *txn;
1666 
1667 	session = (WT_SESSION_IMPL *)wt_session;
1668 	SESSION_API_CALL(session, prepare_transaction, config, cfg);
1669 	WT_STAT_CONN_INCR(session, txn_prepare);
1670 	WT_STAT_CONN_INCR(session, txn_prepare_active);
1671 
1672 	WT_ERR(__wt_txn_context_check(session, true));
1673 
1674 	/*
1675 	 * A failed transaction cannot be prepared, as it cannot guarantee
1676 	 * a subsequent commit.
1677 	 */
1678 	txn = &session->txn;
1679 	if (F_ISSET(txn, WT_TXN_ERROR) && txn->mod_count != 0)
1680 		WT_ERR_MSG(session, EINVAL,
1681 		    "failed transaction requires rollback%s%s",
1682 		    txn->rollback_reason == NULL ? "" : ": ",
1683 		    txn->rollback_reason == NULL ? "" : txn->rollback_reason);
1684 
1685 	WT_ERR(__wt_txn_prepare(session, cfg));
1686 
1687 err:	API_END_RET(session, ret);
1688 
1689 }
1690 
1691 /*
1692  * __session_prepare_transaction_readonly --
1693  *	WT_SESSION->prepare_transaction method; readonly version.
1694  */
1695 static int
__session_prepare_transaction_readonly(WT_SESSION * wt_session,const char * config)1696 __session_prepare_transaction_readonly(
1697     WT_SESSION *wt_session, const char *config)
1698 {
1699 	WT_DECL_RET;
1700 	WT_SESSION_IMPL *session;
1701 
1702 	WT_UNUSED(config);
1703 
1704 	session = (WT_SESSION_IMPL *)wt_session;
1705 	SESSION_API_CALL_NOCONF(session, prepare_transaction);
1706 
1707 	ret = __wt_session_notsup(session);
1708 err:	API_END_RET(session, ret);
1709 }
1710 
1711 /*
1712  * __session_rollback_transaction --
1713  *	WT_SESSION->rollback_transaction method.
1714  */
1715 static int
__session_rollback_transaction(WT_SESSION * wt_session,const char * config)1716 __session_rollback_transaction(WT_SESSION *wt_session, const char *config)
1717 {
1718 	WT_DECL_RET;
1719 	WT_SESSION_IMPL *session;
1720 	WT_TXN *txn;
1721 
1722 	session = (WT_SESSION_IMPL *)wt_session;
1723 	SESSION_API_CALL_PREPARE_ALLOWED(
1724 	    session, rollback_transaction, config, cfg);
1725 	WT_STAT_CONN_INCR(session, txn_rollback);
1726 
1727 	txn = &session->txn;
1728 	if (F_ISSET(txn, WT_TXN_PREPARE)) {
1729 		WT_STAT_CONN_INCR(session, txn_prepare_rollback);
1730 		WT_STAT_CONN_DECR(session, txn_prepare_active);
1731 	}
1732 
1733 	WT_ERR(__wt_txn_context_check(session, true));
1734 
1735 	WT_TRET(__wt_session_reset_cursors(session, false));
1736 
1737 	WT_TRET(__wt_txn_rollback(session, cfg));
1738 
1739 err:	API_END_RET(session, ret);
1740 }
1741 
1742 /*
1743  * __session_timestamp_transaction --
1744  *	WT_SESSION->timestamp_transaction method.
1745  */
1746 static int
__session_timestamp_transaction(WT_SESSION * wt_session,const char * config)1747 __session_timestamp_transaction(WT_SESSION *wt_session, const char *config)
1748 {
1749 	WT_DECL_RET;
1750 	WT_SESSION_IMPL *session;
1751 
1752 	session = (WT_SESSION_IMPL *)wt_session;
1753 #ifdef HAVE_DIAGNOSTIC
1754 	SESSION_API_CALL_PREPARE_ALLOWED(session,
1755 	    timestamp_transaction, config, cfg);
1756 #else
1757 	SESSION_API_CALL_PREPARE_ALLOWED(session,
1758 	    timestamp_transaction, NULL, cfg);
1759 	cfg[1] = config;
1760 #endif
1761 	WT_TRET(__wt_txn_set_timestamp(session, cfg));
1762 err:	API_END_RET(session, ret);
1763 }
1764 
1765 /*
1766  * __session_query_timestamp --
1767  *	WT_SESSION->query_timestamp method.
1768  */
1769 static int
__session_query_timestamp(WT_SESSION * wt_session,char * hex_timestamp,const char * config)1770 __session_query_timestamp(
1771     WT_SESSION *wt_session, char *hex_timestamp, const char *config)
1772 {
1773 	WT_DECL_RET;
1774 	WT_SESSION_IMPL *session;
1775 
1776 	session = (WT_SESSION_IMPL *)wt_session;
1777 	SESSION_API_CALL_PREPARE_ALLOWED(session,
1778 	    query_timestamp, config, cfg);
1779 	WT_TRET(__wt_txn_query_timestamp(session, hex_timestamp, cfg, false));
1780 err:	API_END_RET(session, ret);
1781 }
1782 
1783 /*
1784  * __session_transaction_pinned_range --
1785  *	WT_SESSION->transaction_pinned_range method.
1786  */
1787 static int
__session_transaction_pinned_range(WT_SESSION * wt_session,uint64_t * prange)1788 __session_transaction_pinned_range(WT_SESSION *wt_session, uint64_t *prange)
1789 {
1790 	WT_DECL_RET;
1791 	WT_SESSION_IMPL *session;
1792 	WT_TXN_STATE *txn_state;
1793 	uint64_t pinned;
1794 
1795 	session = (WT_SESSION_IMPL *)wt_session;
1796 	SESSION_API_CALL_NOCONF_PREPARE_NOT_ALLOWED(session, pinned_range);
1797 
1798 	txn_state = WT_SESSION_TXN_STATE(session);
1799 
1800 	/* Assign pinned to the lesser of id or snap_min */
1801 	if (txn_state->id != WT_TXN_NONE &&
1802 	    WT_TXNID_LT(txn_state->id, txn_state->pinned_id))
1803 		pinned = txn_state->id;
1804 	else
1805 		pinned = txn_state->pinned_id;
1806 
1807 	if (pinned == WT_TXN_NONE)
1808 		*prange = 0;
1809 	else
1810 		*prange = S2C(session)->txn_global.current - pinned;
1811 
1812 err:	API_END_RET(session, ret);
1813 }
1814 
1815 /*
1816  * __transaction_sync_run_chk --
1817  *	Check to decide if the transaction sync call should continue running.
1818  */
1819 static bool
__transaction_sync_run_chk(WT_SESSION_IMPL * session)1820 __transaction_sync_run_chk(WT_SESSION_IMPL *session)
1821 {
1822 	WT_CONNECTION_IMPL *conn;
1823 
1824 	conn = S2C(session);
1825 
1826 	return (FLD_ISSET(conn->flags, WT_CONN_SERVER_LOG));
1827 }
1828 
1829 /*
1830  * __session_transaction_sync --
1831  *	WT_SESSION->transaction_sync method.
1832  */
1833 static int
__session_transaction_sync(WT_SESSION * wt_session,const char * config)1834 __session_transaction_sync(WT_SESSION *wt_session, const char *config)
1835 {
1836 	WT_CONFIG_ITEM cval;
1837 	WT_CONNECTION_IMPL *conn;
1838 	WT_DECL_RET;
1839 	WT_LOG *log;
1840 	WT_SESSION_IMPL *session;
1841 	uint64_t remaining_usec, timeout_ms, waited_ms;
1842 	uint64_t time_start, time_stop;
1843 
1844 	session = (WT_SESSION_IMPL *)wt_session;
1845 	/*
1846 	 * Indicated as allowed in prepared state, even though not allowed,
1847 	 * so that running transaction check below take precedence.
1848 	 */
1849 	SESSION_API_CALL_PREPARE_ALLOWED(
1850 	    session, transaction_sync, config, cfg);
1851 	WT_STAT_CONN_INCR(session, txn_sync);
1852 
1853 	conn = S2C(session);
1854 	WT_ERR(__wt_txn_context_check(session, false));
1855 
1856 	/*
1857 	 * If logging is not enabled there is nothing to do.
1858 	 */
1859 	if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
1860 		WT_ERR_MSG(session, EINVAL, "logging not enabled");
1861 
1862 	log = conn->log;
1863 
1864 	/*
1865 	 * If there is no background sync LSN in this session, there
1866 	 * is nothing to do.
1867 	 */
1868 	if (WT_IS_INIT_LSN(&session->bg_sync_lsn))
1869 		goto err;
1870 
1871 	/*
1872 	 * If our LSN is smaller than the current sync LSN then our
1873 	 * transaction is stable.  We're done.
1874 	 */
1875 	if (__wt_log_cmp(&session->bg_sync_lsn, &log->sync_lsn) <= 0)
1876 		goto err;
1877 
1878 	/*
1879 	 * Our LSN is not yet stable.  Wait and check again depending on the
1880 	 * timeout.
1881 	 */
1882 	WT_ERR(__wt_config_gets_def(session,
1883 	    cfg, "timeout_ms", (int)WT_SESSION_BG_SYNC_MSEC, &cval));
1884 	timeout_ms = (uint64_t)cval.val;
1885 
1886 	if (timeout_ms == 0)
1887 		WT_ERR(ETIMEDOUT);
1888 
1889 	/*
1890 	 * Keep checking the LSNs until we find it is stable or we reach
1891 	 * our timeout, or there's some other reason to quit.
1892 	 */
1893 	time_start = __wt_clock(session);
1894 	while (__wt_log_cmp(&session->bg_sync_lsn, &log->sync_lsn) > 0) {
1895 		if (!__transaction_sync_run_chk(session))
1896 			WT_ERR(ETIMEDOUT);
1897 
1898 		__wt_cond_signal(session, conn->log_file_cond);
1899 		time_stop = __wt_clock(session);
1900 		waited_ms = WT_CLOCKDIFF_MS(time_stop, time_start);
1901 		if (waited_ms < timeout_ms) {
1902 			remaining_usec = (timeout_ms - waited_ms) * WT_THOUSAND;
1903 			__wt_cond_wait(session, log->log_sync_cond,
1904 			    remaining_usec, __transaction_sync_run_chk);
1905 		} else
1906 			WT_ERR(ETIMEDOUT);
1907 	}
1908 
1909 err:	API_END_RET(session, ret);
1910 }
1911 
1912 /*
1913  * __session_transaction_sync_readonly --
1914  *	WT_SESSION->transaction_sync method; readonly version.
1915  */
1916 static int
__session_transaction_sync_readonly(WT_SESSION * wt_session,const char * config)1917 __session_transaction_sync_readonly(WT_SESSION *wt_session, const char *config)
1918 {
1919 	WT_DECL_RET;
1920 	WT_SESSION_IMPL *session;
1921 
1922 	WT_UNUSED(config);
1923 
1924 	session = (WT_SESSION_IMPL *)wt_session;
1925 	SESSION_API_CALL_NOCONF(session, transaction_sync);
1926 
1927 	ret = __wt_session_notsup(session);
1928 err:	API_END_RET(session, ret);
1929 }
1930 
1931 /*
1932  * __session_checkpoint --
1933  *	WT_SESSION->checkpoint method.
1934  */
1935 static int
__session_checkpoint(WT_SESSION * wt_session,const char * config)1936 __session_checkpoint(WT_SESSION *wt_session, const char *config)
1937 {
1938 	WT_DECL_RET;
1939 	WT_SESSION_IMPL *session;
1940 
1941 	session = (WT_SESSION_IMPL *)wt_session;
1942 
1943 	WT_STAT_CONN_INCR(session, txn_checkpoint);
1944 	/*
1945 	 * Indicated as allowed in prepared state, even though not allowed,
1946 	 * so that running transaction check below take precedence.
1947 	 */
1948 	SESSION_API_CALL_PREPARE_ALLOWED(session, checkpoint, config, cfg);
1949 
1950 	WT_ERR(__wt_inmem_unsupported_op(session, NULL));
1951 
1952 	/*
1953 	 * Checkpoints require a snapshot to write a transactionally consistent
1954 	 * snapshot of the data.
1955 	 *
1956 	 * We can't use an application's transaction: if it has uncommitted
1957 	 * changes, they will be written in the checkpoint and may appear after
1958 	 * a crash.
1959 	 *
1960 	 * Use a real snapshot transaction: we don't want any chance of the
1961 	 * snapshot being updated during the checkpoint.  Eviction is prevented
1962 	 * from evicting anything newer than this because we track the oldest
1963 	 * transaction ID in the system that is not visible to all readers.
1964 	 */
1965 	WT_ERR(__wt_txn_context_check(session, false));
1966 
1967 	ret = __wt_txn_checkpoint(session, cfg, true);
1968 
1969 	/*
1970 	 * Release common session resources (for example, checkpoint may acquire
1971 	 * significant reconciliation structures/memory).
1972 	 */
1973 	WT_TRET(__wt_session_release_resources(session));
1974 
1975 err:	API_END_RET_NOTFOUND_MAP(session, ret);
1976 }
1977 
1978 /*
1979  * __session_checkpoint_readonly --
1980  *	WT_SESSION->checkpoint method; readonly version.
1981  */
1982 static int
__session_checkpoint_readonly(WT_SESSION * wt_session,const char * config)1983 __session_checkpoint_readonly(WT_SESSION *wt_session, const char *config)
1984 {
1985 	WT_DECL_RET;
1986 	WT_SESSION_IMPL *session;
1987 
1988 	WT_UNUSED(config);
1989 
1990 	session = (WT_SESSION_IMPL *)wt_session;
1991 	SESSION_API_CALL_NOCONF(session, checkpoint);
1992 
1993 	ret = __wt_session_notsup(session);
1994 err:	API_END_RET(session, ret);
1995 }
1996 
1997 /*
1998  * __session_snapshot --
1999  *	WT_SESSION->snapshot method.
2000  */
2001 static int
__session_snapshot(WT_SESSION * wt_session,const char * config)2002 __session_snapshot(WT_SESSION *wt_session, const char *config)
2003 {
2004 	WT_DECL_RET;
2005 	WT_SESSION_IMPL *session;
2006 	WT_TXN_GLOBAL *txn_global;
2007 	bool has_create, has_drop;
2008 
2009 	has_create = has_drop = false;
2010 	session = (WT_SESSION_IMPL *)wt_session;
2011 	txn_global = &S2C(session)->txn_global;
2012 
2013 	SESSION_API_CALL(session, snapshot, config, cfg);
2014 
2015 	WT_ERR(__wt_txn_named_snapshot_config(
2016 	    session, cfg, &has_create, &has_drop));
2017 
2018 	__wt_writelock(session, &txn_global->nsnap_rwlock);
2019 
2020 	/* Drop any snapshots to be removed first. */
2021 	if (has_drop)
2022 		WT_ERR(__wt_txn_named_snapshot_drop(session, cfg));
2023 
2024 	/* Start the named snapshot if requested. */
2025 	if (has_create)
2026 		WT_ERR(__wt_txn_named_snapshot_begin(session, cfg));
2027 
2028 err:	__wt_writeunlock(session, &txn_global->nsnap_rwlock);
2029 
2030 	API_END_RET_NOTFOUND_MAP(session, ret);
2031 }
2032 
2033 /*
2034  * __wt_session_strerror --
2035  *	WT_SESSION->strerror method.
2036  */
2037 const char *
__wt_session_strerror(WT_SESSION * wt_session,int error)2038 __wt_session_strerror(WT_SESSION *wt_session, int error)
2039 {
2040 	WT_SESSION_IMPL *session;
2041 
2042 	session = (WT_SESSION_IMPL *)wt_session;
2043 
2044 	return (__wt_strerror(session, error, NULL, 0));
2045 }
2046 
2047 /*
2048  * __wt_session_breakpoint --
2049  *	A place to put a breakpoint, if you need one, or call some check
2050  * code.
2051  */
2052 int
__wt_session_breakpoint(WT_SESSION * wt_session)2053 __wt_session_breakpoint(WT_SESSION *wt_session)
2054 {
2055 	WT_UNUSED(wt_session);
2056 
2057 	return (0);
2058 }
2059 
2060 /*
2061  * __open_session --
2062  *	Allocate a session handle.
2063  */
2064 static int
__open_session(WT_CONNECTION_IMPL * conn,WT_EVENT_HANDLER * event_handler,const char * config,WT_SESSION_IMPL ** sessionp)2065 __open_session(WT_CONNECTION_IMPL *conn,
2066     WT_EVENT_HANDLER *event_handler, const char *config,
2067     WT_SESSION_IMPL **sessionp)
2068 {
2069 	static const WT_SESSION stds = {
2070 		NULL,
2071 		NULL,
2072 		__session_close,
2073 		__session_reconfigure,
2074 		__wt_session_strerror,
2075 		__session_open_cursor,
2076 		__session_alter,
2077 		__session_create,
2078 		__wt_session_compact,
2079 		__session_drop,
2080 		__session_join,
2081 		__session_log_flush,
2082 		__session_log_printf,
2083 		__session_rebalance,
2084 		__session_rename,
2085 		__session_reset,
2086 		__session_salvage,
2087 		__session_truncate,
2088 		__session_upgrade,
2089 		__session_verify,
2090 		__session_begin_transaction,
2091 		__session_commit_transaction,
2092 		__session_prepare_transaction,
2093 		__session_rollback_transaction,
2094 		__session_timestamp_transaction,
2095 		__session_query_timestamp,
2096 		__session_checkpoint,
2097 		__session_snapshot,
2098 		__session_transaction_pinned_range,
2099 		__session_transaction_sync,
2100 		__wt_session_breakpoint
2101 	}, stds_readonly = {
2102 		NULL,
2103 		NULL,
2104 		__session_close,
2105 		__session_reconfigure,
2106 		__wt_session_strerror,
2107 		__session_open_cursor,
2108 		__session_alter_readonly,
2109 		__session_create_readonly,
2110 		__wt_session_compact_readonly,
2111 		__session_drop_readonly,
2112 		__session_join,
2113 		__session_log_flush_readonly,
2114 		__session_log_printf_readonly,
2115 		__session_rebalance_readonly,
2116 		__session_rename_readonly,
2117 		__session_reset,
2118 		__session_salvage_readonly,
2119 		__session_truncate_readonly,
2120 		__session_upgrade_readonly,
2121 		__session_verify,
2122 		__session_begin_transaction,
2123 		__session_commit_transaction,
2124 		__session_prepare_transaction_readonly,
2125 		__session_rollback_transaction,
2126 		__session_timestamp_transaction,
2127 		__session_query_timestamp,
2128 		__session_checkpoint_readonly,
2129 		__session_snapshot,
2130 		__session_transaction_pinned_range,
2131 		__session_transaction_sync_readonly,
2132 		__wt_session_breakpoint
2133 	};
2134 	WT_DECL_RET;
2135 	WT_SESSION_IMPL *session, *session_ret;
2136 	uint32_t i;
2137 
2138 	*sessionp = NULL;
2139 
2140 	session = conn->default_session;
2141 	session_ret = NULL;
2142 
2143 	__wt_spin_lock(session, &conn->api_lock);
2144 
2145 	/*
2146 	 * Make sure we don't try to open a new session after the application
2147 	 * closes the connection.  This is particularly intended to catch
2148 	 * cases where server threads open sessions.
2149 	 */
2150 	WT_ASSERT(session, !F_ISSET(conn, WT_CONN_CLOSING));
2151 
2152 	/* Find the first inactive session slot. */
2153 	for (session_ret = conn->sessions,
2154 	    i = 0; i < conn->session_size; ++session_ret, ++i)
2155 		if (!session_ret->active)
2156 			break;
2157 	if (i == conn->session_size)
2158 		WT_ERR_MSG(session, WT_ERROR,
2159 		    "out of sessions, configured for %" PRIu32 " (including "
2160 		    "internal sessions)",
2161 		    conn->session_size);
2162 
2163 	/*
2164 	 * If the active session count is increasing, update it.  We don't worry
2165 	 * about correcting the session count on error, as long as we don't mark
2166 	 * this session as active, we'll clean it up on close.
2167 	 */
2168 	if (i >= conn->session_cnt)	/* Defend against off-by-one errors. */
2169 		conn->session_cnt = i + 1;
2170 
2171 	session_ret->iface =
2172 	    F_ISSET(conn, WT_CONN_READONLY) ? stds_readonly : stds;
2173 	session_ret->iface.connection = &conn->iface;
2174 
2175 	session_ret->name = NULL;
2176 	session_ret->id = i;
2177 
2178 	if (WT_SESSION_FIRST_USE(session_ret))
2179 		__wt_random_init(&session_ret->rnd);
2180 
2181 	__wt_event_handler_set(session_ret,
2182 	    event_handler == NULL ? session->event_handler : event_handler);
2183 
2184 	TAILQ_INIT(&session_ret->cursors);
2185 	TAILQ_INIT(&session_ret->dhandles);
2186 
2187 	/*
2188 	 * If we don't have them, allocate the cursor and dhandle hash arrays.
2189 	 * Allocate the table hash array as well.
2190 	 */
2191 	if (session_ret->cursor_cache == NULL)
2192 		WT_ERR(__wt_calloc_def(
2193 		    session, WT_HASH_ARRAY_SIZE, &session_ret->cursor_cache));
2194 	if (session_ret->dhhash == NULL)
2195 		WT_ERR(__wt_calloc_def(
2196 		    session, WT_HASH_ARRAY_SIZE, &session_ret->dhhash));
2197 
2198 	/* Initialize the dhandle hash array. */
2199 	for (i = 0; i < WT_HASH_ARRAY_SIZE; i++)
2200 		TAILQ_INIT(&session_ret->dhhash[i]);
2201 
2202 	/* Initialize the cursor cache hash buckets and sweep trigger. */
2203 	for (i = 0; i < WT_HASH_ARRAY_SIZE; i++)
2204 		TAILQ_INIT(&session_ret->cursor_cache[i]);
2205 	session_ret->cursor_sweep_countdown = WT_SESSION_CURSOR_SWEEP_COUNTDOWN;
2206 
2207 	/* Initialize transaction support: default to read-committed. */
2208 	session_ret->isolation = WT_ISO_READ_COMMITTED;
2209 	WT_ERR(__wt_txn_init(session, session_ret));
2210 
2211 	/*
2212 	 * The session's hazard pointer memory isn't discarded during normal
2213 	 * session close because access to it isn't serialized.  Allocate the
2214 	 * first time we open this session.
2215 	 */
2216 	if (WT_SESSION_FIRST_USE(session_ret)) {
2217 		WT_ERR(__wt_calloc_def(session,
2218 		    WT_SESSION_INITIAL_HAZARD_SLOTS, &session_ret->hazard));
2219 		session_ret->hazard_size = WT_SESSION_INITIAL_HAZARD_SLOTS;
2220 		session_ret->hazard_inuse = 0;
2221 		session_ret->nhazard = 0;
2222 	}
2223 
2224 	/* Cache the offset of this session's statistics bucket. */
2225 	session_ret->stat_bucket = WT_STATS_SLOT_ID(session);
2226 
2227 	/* Allocate the buffer for operation tracking */
2228 	if (F_ISSET(conn, WT_CONN_OPTRACK)) {
2229 		WT_ERR(__wt_malloc(
2230 		    session, WT_OPTRACK_BUFSIZE, &session_ret->optrack_buf));
2231 		session_ret->optrackbuf_ptr = 0;
2232 	}
2233 
2234 	/* Set the default value for session flags. */
2235 	if (F_ISSET(conn, WT_CONN_CACHE_CURSORS))
2236 		F_SET(session_ret, WT_SESSION_CACHE_CURSORS);
2237 
2238 	/*
2239 	 * Configuration: currently, the configuration for open_session is the
2240 	 * same as session.reconfigure, so use that function.
2241 	 */
2242 	if (config != NULL)
2243 		WT_ERR(
2244 		    __session_reconfigure((WT_SESSION *)session_ret, config));
2245 
2246 	/*
2247 	 * Publish: make the entry visible to server threads.  There must be a
2248 	 * barrier for two reasons, to ensure structure fields are set before
2249 	 * any other thread will consider the session, and to push the session
2250 	 * count to ensure the eviction thread can't review too few slots.
2251 	 */
2252 	WT_PUBLISH(session_ret->active, 1);
2253 
2254 	WT_STATIC_ASSERT(offsetof(WT_SESSION_IMPL, iface) == 0);
2255 	*sessionp = session_ret;
2256 
2257 	WT_STAT_CONN_INCR(session, session_open);
2258 
2259 err:	__wt_spin_unlock(session, &conn->api_lock);
2260 	return (ret);
2261 }
2262 
2263 /*
2264  * __wt_open_session --
2265  *	Allocate a session handle.
2266  */
2267 int
__wt_open_session(WT_CONNECTION_IMPL * conn,WT_EVENT_HANDLER * event_handler,const char * config,bool open_metadata,WT_SESSION_IMPL ** sessionp)2268 __wt_open_session(WT_CONNECTION_IMPL *conn,
2269     WT_EVENT_HANDLER *event_handler, const char *config,
2270     bool open_metadata, WT_SESSION_IMPL **sessionp)
2271 {
2272 	WT_DECL_RET;
2273 	WT_SESSION *wt_session;
2274 	WT_SESSION_IMPL *session;
2275 
2276 	*sessionp = NULL;
2277 
2278 	/* Acquire a session. */
2279 	WT_RET(__open_session(conn, event_handler, config, &session));
2280 
2281 	/*
2282 	 * Acquiring the metadata handle requires the schema lock; we've seen
2283 	 * problems in the past where a session has acquired the schema lock
2284 	 * unexpectedly, relatively late in the run, and deadlocked. Be
2285 	 * defensive, get it now.  The metadata file may not exist when the
2286 	 * connection first creates its default session or the shared cache
2287 	 * pool creates its sessions, let our caller decline this work.
2288 	 */
2289 	if (open_metadata) {
2290 		WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
2291 		if ((ret = __wt_metadata_cursor(session, NULL)) != 0) {
2292 			wt_session = &session->iface;
2293 			WT_TRET(wt_session->close(wt_session, NULL));
2294 			return (ret);
2295 		}
2296 	}
2297 
2298 	*sessionp = session;
2299 	return (0);
2300 }
2301 
2302 /*
2303  * __wt_open_internal_session --
2304  *	Allocate a session for WiredTiger's use.
2305  */
2306 int
__wt_open_internal_session(WT_CONNECTION_IMPL * conn,const char * name,bool open_metadata,uint32_t session_flags,WT_SESSION_IMPL ** sessionp)2307 __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name,
2308     bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp)
2309 {
2310 	WT_SESSION_IMPL *session;
2311 
2312 	*sessionp = NULL;
2313 
2314 	/* Acquire a session. */
2315 	WT_RET(__wt_open_session(conn, NULL, NULL, open_metadata, &session));
2316 	session->name = name;
2317 
2318 	/*
2319 	 * Public sessions are automatically closed during WT_CONNECTION->close.
2320 	 * If the session handles for internal threads were to go on the public
2321 	 * list, there would be complex ordering issues during close.  Set a
2322 	 * flag to avoid this: internal sessions are not closed automatically.
2323 	 */
2324 	F_SET(session, session_flags | WT_SESSION_INTERNAL);
2325 
2326 	*sessionp = session;
2327 	return (0);
2328 }
2329