1 /*-
2  * Copyright (c) 2014-2018 MongoDB, Inc.
3  * Copyright (c) 2008-2014 WiredTiger, Inc.
4  *	All rights reserved.
5  *
6  * See the file LICENSE for redistribution information.
7  */
8 
9 /*
10  * WT_DATA_HANDLE_CACHE --
11  *	Per-session cache of handles to avoid synchronization when opening
12  *	cursors.
13  */
14 struct __wt_data_handle_cache {
15 	WT_DATA_HANDLE *dhandle;
16 
17 	TAILQ_ENTRY(__wt_data_handle_cache) q;
18 	TAILQ_ENTRY(__wt_data_handle_cache) hashq;
19 };
20 
21 /*
22  * WT_HAZARD --
23  *	A hazard pointer.
24  */
25 struct __wt_hazard {
26 	WT_REF *ref;			/* Page reference */
27 #ifdef HAVE_DIAGNOSTIC
28 	const char *func;		/* Function/line hazard acquired */
29 	int	    line;
30 #endif
31 };
32 
33 /* Get the connection implementation for a session */
34 #define	S2C(session)	  ((WT_CONNECTION_IMPL *)(session)->iface.connection)
35 
36 /* Get the btree for a session */
37 #define	S2BT(session)	   ((WT_BTREE *)(session)->dhandle->handle)
38 #define	S2BT_SAFE(session) ((session)->dhandle == NULL ? NULL : S2BT(session))
39 
40 typedef TAILQ_HEAD(__wt_cursor_list, __wt_cursor)	WT_CURSOR_LIST;
41 
42 /* Number of cursors cached to trigger cursor sweep. */
43 #define	WT_SESSION_CURSOR_SWEEP_COUNTDOWN	20
44 
45 /* Minimum number of buckets to visit during cursor sweep. */
46 #define	WT_SESSION_CURSOR_SWEEP_MIN		5
47 
48 /* Maximum number of buckets to visit during cursor sweep. */
49 #define	WT_SESSION_CURSOR_SWEEP_MAX		32
50 /*
51  * WT_SESSION_IMPL --
52  *	Implementation of WT_SESSION.
53  */
54 struct __wt_session_impl {
55 	WT_SESSION iface;
56 
57 	void	*lang_private;		/* Language specific private storage */
58 
59 	u_int active;			/* Non-zero if the session is in-use */
60 
61 	const char *name;		/* Name */
62 	const char *lastop;		/* Last operation */
63 	uint32_t id;			/* UID, offset in session array */
64 
65 	WT_EVENT_HANDLER *event_handler;/* Application's event handlers */
66 
67 	WT_DATA_HANDLE *dhandle;	/* Current data handle */
68 
69 	/*
70 	 * Each session keeps a cache of data handles. The set of handles can
71 	 * grow quite large so we maintain both a simple list and a hash table
72 	 * of lists. The hash table key is based on a hash of the data handle's
73 	 * URI.  The hash table list is kept in allocated memory that lives
74 	 * across session close - so it is declared further down.
75 	 */
76 					/* Session handle reference list */
77 	TAILQ_HEAD(__dhandles, __wt_data_handle_cache) dhandles;
78 	time_t last_sweep;		/* Last sweep for dead handles */
79 	struct timespec last_epoch;	/* Last epoch time returned */
80 
81 	WT_CURSOR_LIST cursors;		/* Cursors closed with the session */
82 	uint32_t cursor_sweep_position;	/* Position in cursor_cache for sweep */
83 	uint32_t cursor_sweep_countdown;/* Countdown to cursor sweep */
84 	time_t last_cursor_sweep;	/* Last sweep for dead cursors */
85 
86 	WT_CURSOR_BACKUP *bkp_cursor;	/* Hot backup cursor */
87 
88 	WT_COMPACT_STATE *compact;	/* Compaction information */
89 	enum { WT_COMPACT_NONE=0,
90 	    WT_COMPACT_RUNNING, WT_COMPACT_SUCCESS } compact_state;
91 
92 	WT_CURSOR	*las_cursor;	/* Lookaside table cursor */
93 
94 	WT_CURSOR *meta_cursor;		/* Metadata file */
95 	void	  *meta_track;		/* Metadata operation tracking */
96 	void	  *meta_track_next;	/* Current position */
97 	void	  *meta_track_sub;	/* Child transaction / save point */
98 	size_t	   meta_track_alloc;	/* Currently allocated */
99 	int	   meta_track_nest;	/* Nesting level of meta transaction */
100 #define	WT_META_TRACKING(session)	((session)->meta_track_next != NULL)
101 
102 	/* Current rwlock for callback. */
103 	WT_RWLOCK *current_rwlock;
104 	uint8_t current_rwticket;
105 
106 	WT_ITEM	**scratch;		/* Temporary memory for any function */
107 	u_int	  scratch_alloc;	/* Currently allocated */
108 	size_t	  scratch_cached;	/* Scratch bytes cached */
109 #ifdef HAVE_DIAGNOSTIC
110 	/*
111 	 * Variables used to look for violations of the contract that a
112 	 * session is only used by a single session at once.
113 	 */
114 	volatile uintmax_t api_tid;
115 	volatile uint32_t api_enter_refcnt;
116 	/*
117 	 * It's hard to figure out from where a buffer was allocated after it's
118 	 * leaked, so in diagnostic mode we track them; DIAGNOSTIC can't simply
119 	 * add additional fields to WT_ITEM structures because they are visible
120 	 * to applications, create a parallel structure instead.
121 	 */
122 	struct __wt_scratch_track {
123 		const char *func;	/* Allocating function, line */
124 		int line;
125 	} *scratch_track;
126 #endif
127 
128 	WT_ITEM err;			/* Error buffer */
129 
130 	WT_TXN_ISOLATION isolation;
131 	WT_TXN	txn;			/* Transaction state */
132 #define	WT_SESSION_BG_SYNC_MSEC		1200000
133 	WT_LSN	bg_sync_lsn;		/* Background sync operation LSN. */
134 	u_int	ncursors;		/* Count of active file cursors. */
135 
136 	void	*block_manager;		/* Block-manager support */
137 	int	(*block_manager_cleanup)(WT_SESSION_IMPL *);
138 
139 					/* Checkpoint handles */
140 	WT_DATA_HANDLE **ckpt_handle;	/* Handle list */
141 	u_int   ckpt_handle_next;	/* Next empty slot */
142 	size_t  ckpt_handle_allocated;	/* Bytes allocated */
143 
144 	uint64_t cache_wait_us; /* Wait time for cache for current operation */
145 
146 	/*
147 	 * Operations acting on handles.
148 	 *
149 	 * The preferred pattern is to gather all of the required handles at
150 	 * the beginning of an operation, then drop any other locks, perform
151 	 * the operation, then release the handles.  This cannot be easily
152 	 * merged with the list of checkpoint handles because some operations
153 	 * (such as compact) do checkpoints internally.
154 	 */
155 	WT_DATA_HANDLE **op_handle;	/* Handle list */
156 	u_int   op_handle_next;		/* Next empty slot */
157 	size_t  op_handle_allocated;	/* Bytes allocated */
158 
159 	void	*reconcile;		/* Reconciliation support */
160 	int	(*reconcile_cleanup)(WT_SESSION_IMPL *);
161 
162 	/* Sessions have an associated statistics bucket based on its ID. */
163 	u_int	stat_bucket;		/* Statistics bucket offset */
164 
165 /* AUTOMATIC FLAG VALUE GENERATION START */
166 #define	WT_SESSION_CACHE_CURSORS		0x000001u
167 #define	WT_SESSION_CAN_WAIT			0x000002u
168 #define	WT_SESSION_IGNORE_CACHE_SIZE		0x000004u
169 #define	WT_SESSION_INTERNAL			0x000008u
170 #define	WT_SESSION_LOCKED_CHECKPOINT		0x000010u
171 #define	WT_SESSION_LOCKED_HANDLE_LIST_READ	0x000020u
172 #define	WT_SESSION_LOCKED_HANDLE_LIST_WRITE	0x000040u
173 #define	WT_SESSION_LOCKED_METADATA		0x000080u
174 #define	WT_SESSION_LOCKED_PASS			0x000100u
175 #define	WT_SESSION_LOCKED_SCHEMA		0x000200u
176 #define	WT_SESSION_LOCKED_SLOT			0x000400u
177 #define	WT_SESSION_LOCKED_TABLE_READ		0x000800u
178 #define	WT_SESSION_LOCKED_TABLE_WRITE		0x001000u
179 #define	WT_SESSION_LOCKED_TURTLE		0x002000u
180 #define	WT_SESSION_LOGGING_INMEM		0x004000u
181 #define	WT_SESSION_LOOKASIDE_CURSOR		0x008000u
182 #define	WT_SESSION_NO_DATA_HANDLES		0x010000u
183 #define	WT_SESSION_NO_LOGGING			0x020000u
184 #define	WT_SESSION_NO_RECONCILE			0x040000u
185 #define	WT_SESSION_NO_SCHEMA_LOCK		0x080000u
186 #define	WT_SESSION_QUIET_CORRUPT_FILE		0x100000u
187 #define	WT_SESSION_READ_WONT_NEED		0x200000u
188 #define	WT_SESSION_SCHEMA_TXN			0x400000u
189 #define	WT_SESSION_SERVER_ASYNC			0x800000u
190 /* AUTOMATIC FLAG VALUE GENERATION STOP */
191 	uint32_t flags;
192 
193 	/*
194 	 * All of the following fields live at the end of the structure so it's
195 	 * easier to clear everything but the fields that persist.
196 	 */
197 #define	WT_SESSION_CLEAR_SIZE	(offsetof(WT_SESSION_IMPL, rnd))
198 
199 	/*
200 	 * The random number state persists past session close because we don't
201 	 * want to repeatedly use the same values for skiplist depth when the
202 	 * application isn't caching sessions.
203 	 */
204 	WT_RAND_STATE rnd;		/* Random number generation state */
205 
206 	/*
207 	 * Hash tables are allocated lazily as sessions are used to keep the
208 	 * size of this structure from growing too large.
209 	 */
210 	WT_CURSOR_LIST *cursor_cache;	/* Hash table of cached cursors */
211 
212 					/* Hashed handle reference list array */
213 	TAILQ_HEAD(__dhandles_hash, __wt_data_handle_cache) *dhhash;
214 
215 					/* Generations manager */
216 #define	WT_GEN_CHECKPOINT	0	/* Checkpoint generation */
217 #define	WT_GEN_COMMIT		1	/* Commit generation */
218 #define	WT_GEN_EVICT		2	/* Eviction generation */
219 #define	WT_GEN_HAZARD		3	/* Hazard pointer */
220 #define	WT_GEN_SPLIT		4	/* Page splits */
221 #define	WT_GENERATIONS		5	/* Total generation manager entries */
222 	volatile uint64_t generations[WT_GENERATIONS];
223 
224 	/*
225 	 * Session memory persists past session close because it's accessed by
226 	 * threads of control other than the thread owning the session. For
227 	 * example, btree splits and hazard pointers can "free" memory that's
228 	 * still in use. In order to eventually free it, it's stashed here with
229 	 * with its generation number; when no thread is reading in generation,
230 	 * the memory can be freed for real.
231 	 */
232 	struct __wt_session_stash {
233 		struct __wt_stash {
234 			void	*p;	/* Memory, length */
235 			size_t	 len;
236 			uint64_t gen;	/* Generation */
237 		} *list;
238 		size_t  cnt;		/* Array entries */
239 		size_t  alloc;		/* Allocated bytes */
240 	} stash[WT_GENERATIONS];
241 
242 	/*
243 	 * Hazard pointers.
244 	 *
245 	 * Hazard information persists past session close because it's accessed
246 	 * by threads of control other than the thread owning the session.
247 	 *
248 	 * Use the non-NULL state of the hazard field to know if the session has
249 	 * previously been initialized.
250 	 */
251 #define	WT_SESSION_FIRST_USE(s)						\
252 	((s)->hazard == NULL)
253 
254 	/*
255 	 * The hazard pointer array grows as necessary, initialize with 250
256 	 * slots.
257 	 */
258 #define	WT_SESSION_INITIAL_HAZARD_SLOTS	250
259 	uint32_t   hazard_size;		/* Hazard pointer array slots */
260 	uint32_t   hazard_inuse;	/* Hazard pointer array slots in-use */
261 	uint32_t   nhazard;		/* Count of active hazard pointers */
262 	WT_HAZARD *hazard;		/* Hazard pointer array */
263 
264 	/*
265 	 * Operation tracking.
266 	 */
267 	WT_OPTRACK_RECORD *optrack_buf;
268 	u_int optrackbuf_ptr;
269 	uint64_t optrack_offset;
270 	WT_FH *optrack_fh;
271 };
272