1 /*-
2  * Copyright (c) 2014-2018 MongoDB, Inc.
3  * Copyright (c) 2008-2014 WiredTiger, Inc.
4  *	All rights reserved.
5  *
6  * See the file LICENSE for redistribution information.
7  */
8 
9 #define	WT_TXN_NONE	0		/* No txn running in a session. */
10 #define	WT_TXN_FIRST	1		/* First transaction to run. */
11 #define	WT_TXN_ABORTED	UINT64_MAX	/* Update rolled back, ignore. */
12 
13 /* AUTOMATIC FLAG VALUE GENERATION START */
14 #define	WT_TXN_LOG_CKPT_CLEANUP	0x01u
15 #define	WT_TXN_LOG_CKPT_PREPARE	0x02u
16 #define	WT_TXN_LOG_CKPT_START	0x04u
17 #define	WT_TXN_LOG_CKPT_STOP	0x08u
18 #define	WT_TXN_LOG_CKPT_SYNC	0x10u
19 /* AUTOMATIC FLAG VALUE GENERATION STOP */
20 
21 /* AUTOMATIC FLAG VALUE GENERATION START */
22 #define	WT_TXN_OLDEST_STRICT	0x1u
23 #define	WT_TXN_OLDEST_WAIT	0x2u
24 /* AUTOMATIC FLAG VALUE GENERATION STOP */
25 
26 /*
27  * Transaction ID comparison dealing with edge cases.
28  *
29  * WT_TXN_ABORTED is the largest possible ID (never visible to a running
30  * transaction), WT_TXN_NONE is smaller than any possible ID (visible to all
31  * running transactions).
32  */
33 #define	WT_TXNID_LE(t1, t2)						\
34 	((t1) <= (t2))
35 
36 #define	WT_TXNID_LT(t1, t2)						\
37 	((t1) < (t2))
38 
39 #define	WT_SESSION_TXN_STATE(s) (&S2C(s)->txn_global.states[(s)->id])
40 
41 #define	WT_SESSION_IS_CHECKPOINT(s)					\
42 	((s)->id != 0 && (s)->id == S2C(s)->txn_global.checkpoint_id)
43 
44 /*
45  * Perform an operation at the specified isolation level.
46  *
47  * This is fiddly: we can't cope with operations that begin transactions
48  * (leaving an ID allocated), and operations must not move our published
49  * snap_min forwards (or updates we need could be freed while this operation is
50  * in progress).  Check for those cases: the bugs they cause are hard to debug.
51  */
52 #define	WT_WITH_TXN_ISOLATION(s, iso, op) do {				\
53 	WT_TXN_ISOLATION saved_iso = (s)->isolation;		        \
54 	WT_TXN_ISOLATION saved_txn_iso = (s)->txn.isolation;		\
55 	WT_TXN_STATE *txn_state = WT_SESSION_TXN_STATE(s);		\
56 	WT_TXN_STATE saved_state = *txn_state;				\
57 	(s)->txn.forced_iso++;						\
58 	(s)->isolation = (s)->txn.isolation = (iso);			\
59 	op;								\
60 	(s)->isolation = saved_iso;					\
61 	(s)->txn.isolation = saved_txn_iso;				\
62 	WT_ASSERT((s), (s)->txn.forced_iso > 0);                        \
63 	(s)->txn.forced_iso--;						\
64 	WT_ASSERT((s), txn_state->id == saved_state.id &&		\
65 	    (txn_state->metadata_pinned == saved_state.metadata_pinned ||\
66 	    saved_state.metadata_pinned == WT_TXN_NONE) &&		\
67 	    (txn_state->pinned_id == saved_state.pinned_id ||		\
68 	    saved_state.pinned_id == WT_TXN_NONE));			\
69 	txn_state->metadata_pinned = saved_state.metadata_pinned;	\
70 	txn_state->pinned_id = saved_state.pinned_id;			\
71 } while (0)
72 
73 struct __wt_named_snapshot {
74 	const char *name;
75 
76 	TAILQ_ENTRY(__wt_named_snapshot) q;
77 
78 	uint64_t id, pinned_id, snap_min, snap_max;
79 	uint64_t *snapshot;
80 	uint32_t snapshot_count;
81 };
82 
83 struct __wt_txn_state {
84 	WT_CACHE_LINE_PAD_BEGIN
85 	volatile uint64_t id;
86 	volatile uint64_t pinned_id;
87 	volatile uint64_t metadata_pinned;
88 
89 	WT_CACHE_LINE_PAD_END
90 };
91 
92 struct __wt_txn_global {
93 	volatile uint64_t current;	/* Current transaction ID. */
94 
95 	/* The oldest running transaction ID (may race). */
96 	volatile uint64_t last_running;
97 
98 	/*
99 	 * The oldest transaction ID that is not yet visible to some
100 	 * transaction in the system.
101 	 */
102 	volatile uint64_t oldest_id;
103 
104 	WT_DECL_TIMESTAMP(commit_timestamp)
105 	WT_DECL_TIMESTAMP(last_ckpt_timestamp)
106 	WT_DECL_TIMESTAMP(meta_ckpt_timestamp)
107 	WT_DECL_TIMESTAMP(oldest_timestamp)
108 	WT_DECL_TIMESTAMP(pinned_timestamp)
109 	WT_DECL_TIMESTAMP(recovery_timestamp)
110 	WT_DECL_TIMESTAMP(stable_timestamp)
111 	bool has_commit_timestamp;
112 	bool has_oldest_timestamp;
113 	bool has_pinned_timestamp;
114 	bool has_stable_timestamp;
115 	bool oldest_is_pinned;
116 	bool stable_is_pinned;
117 
118 	WT_SPINLOCK id_lock;
119 
120 	/* Protects the active transaction states. */
121 	WT_RWLOCK rwlock;
122 
123 	/* Protects logging, checkpoints and transaction visibility. */
124 	WT_RWLOCK visibility_rwlock;
125 
126 	/* List of transactions sorted by commit timestamp. */
127 	WT_RWLOCK commit_timestamp_rwlock;
128 	TAILQ_HEAD(__wt_txn_cts_qh, __wt_txn) commit_timestamph;
129 	uint32_t commit_timestampq_len;
130 
131 	/* List of transactions sorted by read timestamp. */
132 	WT_RWLOCK read_timestamp_rwlock;
133 	TAILQ_HEAD(__wt_txn_rts_qh, __wt_txn) read_timestamph;
134 	uint32_t read_timestampq_len;
135 
136 	/*
137 	 * Track information about the running checkpoint. The transaction
138 	 * snapshot used when checkpointing are special. Checkpoints can run
139 	 * for a long time so we keep them out of regular visibility checks.
140 	 * Eviction and checkpoint operations know when they need to be aware
141 	 * of checkpoint transactions.
142 	 *
143 	 * We rely on the fact that (a) the only table a checkpoint updates is
144 	 * the metadata; and (b) once checkpoint has finished reading a table,
145 	 * it won't revisit it.
146 	 */
147 	volatile bool	  checkpoint_running;	/* Checkpoint running */
148 	volatile uint32_t checkpoint_id;	/* Checkpoint's session ID */
149 	WT_TXN_STATE	  checkpoint_state;	/* Checkpoint's txn state */
150 	WT_DECL_TIMESTAMP(checkpoint_timestamp)	/* Checkpoint's timestamp */
151 
152 	volatile uint64_t metadata_pinned;	/* Oldest ID for metadata */
153 
154 	/* Named snapshot state. */
155 	WT_RWLOCK nsnap_rwlock;
156 	volatile uint64_t nsnap_oldest_id;
157 	TAILQ_HEAD(__wt_nsnap_qh, __wt_named_snapshot) nsnaph;
158 
159 	WT_TXN_STATE *states;		/* Per-session transaction states */
160 };
161 
162 typedef enum __wt_txn_isolation {
163 	WT_ISO_READ_COMMITTED,
164 	WT_ISO_READ_UNCOMMITTED,
165 	WT_ISO_SNAPSHOT
166 } WT_TXN_ISOLATION;
167 
168 /*
169  * WT_TXN_OP --
170  *	A transactional operation.  Each transaction builds an in-memory array
171  *	of these operations as it runs, then uses the array to either write log
172  *	records during commit or undo the operations during rollback.
173  */
174 struct __wt_txn_op {
175 	WT_BTREE *btree;
176 	enum {
177 		WT_TXN_OP_NONE=0,
178 		WT_TXN_OP_BASIC_COL,
179 		WT_TXN_OP_BASIC_ROW,
180 		WT_TXN_OP_INMEM_COL,
181 		WT_TXN_OP_INMEM_ROW,
182 		WT_TXN_OP_REF_DELETE,
183 		WT_TXN_OP_TRUNCATE_COL,
184 		WT_TXN_OP_TRUNCATE_ROW
185 	} type;
186 	union {
187 		/* WT_TXN_OP_BASIC_ROW, WT_TXN_OP_INMEM_ROW */
188 		struct {
189 			WT_UPDATE *upd;
190 			WT_ITEM key;
191 		} op_row;
192 
193 		/* WT_TXN_OP_BASIC_COL, WT_TXN_OP_INMEM_COL */
194 		struct {
195 			WT_UPDATE *upd;
196 			uint64_t recno;
197 		} op_col;
198 /*
199  * upd is pointing to same memory in both op_row and op_col, so for simplicity
200  * just chose op_row upd
201  */
202 #undef op_upd
203 #define	op_upd	op_row.upd
204 
205 		/* WT_TXN_OP_REF_DELETE */
206 		WT_REF *ref;
207 		/* WT_TXN_OP_TRUNCATE_COL */
208 		struct {
209 			uint64_t start, stop;
210 		} truncate_col;
211 		/* WT_TXN_OP_TRUNCATE_ROW */
212 		struct {
213 			WT_ITEM start, stop;
214 			enum {
215 				WT_TXN_TRUNC_ALL,
216 				WT_TXN_TRUNC_BOTH,
217 				WT_TXN_TRUNC_START,
218 				WT_TXN_TRUNC_STOP
219 			} mode;
220 		} truncate_row;
221 	} u;
222 };
223 
224 /*
225  * WT_TXN --
226  *	Per-session transaction context.
227  */
228 struct __wt_txn {
229 	uint64_t id;
230 
231 	WT_TXN_ISOLATION isolation;
232 
233 	uint32_t forced_iso;	/* Isolation is currently forced. */
234 
235 	/*
236 	 * Snapshot data:
237 	 *	ids < snap_min are visible,
238 	 *	ids > snap_max are invisible,
239 	 *	everything else is visible unless it is in the snapshot.
240 	 */
241 	uint64_t snap_min, snap_max;
242 	uint64_t *snapshot;
243 	uint32_t snapshot_count;
244 	uint32_t txn_logsync;	/* Log sync configuration */
245 
246 	/*
247 	 * Timestamp copied into updates created by this transaction.
248 	 *
249 	 * In some use cases, this can be updated while the transaction is
250 	 * running.
251 	 */
252 	WT_DECL_TIMESTAMP(commit_timestamp)
253 
254 	/*
255 	 * Set to the first commit timestamp used in the transaction and fixed
256 	 * while the transaction is on the public list of committed timestamps.
257 	 */
258 	WT_DECL_TIMESTAMP(first_commit_timestamp)
259 
260 	/*
261 	 * Timestamp copied into updates created by this transaction, when this
262 	 * transaction is prepared.
263 	 */
264 	WT_DECL_TIMESTAMP(prepare_timestamp)
265 
266 	/* Read updates committed as of this timestamp. */
267 	WT_DECL_TIMESTAMP(read_timestamp)
268 
269 	TAILQ_ENTRY(__wt_txn) commit_timestampq;
270 	TAILQ_ENTRY(__wt_txn) read_timestampq;
271 	bool clear_commit_q;	/* Set if need to clear from the commit queue */
272 	bool clear_read_q;	/* Set if need to clear from the read queue */
273 
274 	/* Array of modifications by this transaction. */
275 	WT_TXN_OP      *mod;
276 	size_t		mod_alloc;
277 	u_int		mod_count;
278 
279 	/* Scratch buffer for in-memory log records. */
280 	WT_ITEM	       *logrec;
281 
282 	/* Requested notification when transactions are resolved. */
283 	WT_TXN_NOTIFY *notify;
284 
285 	/* Checkpoint status. */
286 	WT_LSN		ckpt_lsn;
287 	uint32_t	ckpt_nsnapshot;
288 	WT_ITEM		*ckpt_snapshot;
289 	bool		full_ckpt;
290 
291 	const char *rollback_reason;		/* If rollback, the reason */
292 
293 /* AUTOMATIC FLAG VALUE GENERATION START */
294 #define	WT_TXN_AUTOCOMMIT	0x00001u
295 #define	WT_TXN_ERROR		0x00002u
296 #define	WT_TXN_HAS_ID		0x00004u
297 #define	WT_TXN_HAS_SNAPSHOT	0x00008u
298 #define	WT_TXN_HAS_TS_COMMIT	0x00010u
299 #define	WT_TXN_HAS_TS_READ	0x00020u
300 #define	WT_TXN_IGNORE_PREPARE	0x00040u
301 #define	WT_TXN_NAMED_SNAPSHOT	0x00080u
302 #define	WT_TXN_PREPARE		0x00100u
303 #define	WT_TXN_PUBLIC_TS_COMMIT	0x00200u
304 #define	WT_TXN_PUBLIC_TS_READ	0x00400u
305 #define	WT_TXN_READONLY		0x00800u
306 #define	WT_TXN_RUNNING		0x01000u
307 #define	WT_TXN_SYNC_SET		0x02000u
308 #define	WT_TXN_TS_COMMIT_ALWAYS	0x04000u
309 #define	WT_TXN_TS_COMMIT_KEYS	0x08000u
310 #define	WT_TXN_TS_COMMIT_NEVER	0x10000u
311 #define	WT_TXN_UPDATE	        0x20000u
312 /* AUTOMATIC FLAG VALUE GENERATION STOP */
313 	uint32_t flags;
314 };
315