1 /*- 2 * Copyright (c) 2014-2018 MongoDB, Inc. 3 * Copyright (c) 2008-2014 WiredTiger, Inc. 4 * All rights reserved. 5 * 6 * See the file LICENSE for redistribution information. 7 */ 8 9 #define WT_TXN_NONE 0 /* No txn running in a session. */ 10 #define WT_TXN_FIRST 1 /* First transaction to run. */ 11 #define WT_TXN_ABORTED UINT64_MAX /* Update rolled back, ignore. */ 12 13 /* AUTOMATIC FLAG VALUE GENERATION START */ 14 #define WT_TXN_LOG_CKPT_CLEANUP 0x01u 15 #define WT_TXN_LOG_CKPT_PREPARE 0x02u 16 #define WT_TXN_LOG_CKPT_START 0x04u 17 #define WT_TXN_LOG_CKPT_STOP 0x08u 18 #define WT_TXN_LOG_CKPT_SYNC 0x10u 19 /* AUTOMATIC FLAG VALUE GENERATION STOP */ 20 21 /* AUTOMATIC FLAG VALUE GENERATION START */ 22 #define WT_TXN_OLDEST_STRICT 0x1u 23 #define WT_TXN_OLDEST_WAIT 0x2u 24 /* AUTOMATIC FLAG VALUE GENERATION STOP */ 25 26 /* 27 * Transaction ID comparison dealing with edge cases. 28 * 29 * WT_TXN_ABORTED is the largest possible ID (never visible to a running 30 * transaction), WT_TXN_NONE is smaller than any possible ID (visible to all 31 * running transactions). 32 */ 33 #define WT_TXNID_LE(t1, t2) \ 34 ((t1) <= (t2)) 35 36 #define WT_TXNID_LT(t1, t2) \ 37 ((t1) < (t2)) 38 39 #define WT_SESSION_TXN_STATE(s) (&S2C(s)->txn_global.states[(s)->id]) 40 41 #define WT_SESSION_IS_CHECKPOINT(s) \ 42 ((s)->id != 0 && (s)->id == S2C(s)->txn_global.checkpoint_id) 43 44 /* 45 * Perform an operation at the specified isolation level. 46 * 47 * This is fiddly: we can't cope with operations that begin transactions 48 * (leaving an ID allocated), and operations must not move our published 49 * snap_min forwards (or updates we need could be freed while this operation is 50 * in progress). Check for those cases: the bugs they cause are hard to debug. 51 */ 52 #define WT_WITH_TXN_ISOLATION(s, iso, op) do { \ 53 WT_TXN_ISOLATION saved_iso = (s)->isolation; \ 54 WT_TXN_ISOLATION saved_txn_iso = (s)->txn.isolation; \ 55 WT_TXN_STATE *txn_state = WT_SESSION_TXN_STATE(s); \ 56 WT_TXN_STATE saved_state = *txn_state; \ 57 (s)->txn.forced_iso++; \ 58 (s)->isolation = (s)->txn.isolation = (iso); \ 59 op; \ 60 (s)->isolation = saved_iso; \ 61 (s)->txn.isolation = saved_txn_iso; \ 62 WT_ASSERT((s), (s)->txn.forced_iso > 0); \ 63 (s)->txn.forced_iso--; \ 64 WT_ASSERT((s), txn_state->id == saved_state.id && \ 65 (txn_state->metadata_pinned == saved_state.metadata_pinned ||\ 66 saved_state.metadata_pinned == WT_TXN_NONE) && \ 67 (txn_state->pinned_id == saved_state.pinned_id || \ 68 saved_state.pinned_id == WT_TXN_NONE)); \ 69 txn_state->metadata_pinned = saved_state.metadata_pinned; \ 70 txn_state->pinned_id = saved_state.pinned_id; \ 71 } while (0) 72 73 struct __wt_named_snapshot { 74 const char *name; 75 76 TAILQ_ENTRY(__wt_named_snapshot) q; 77 78 uint64_t id, pinned_id, snap_min, snap_max; 79 uint64_t *snapshot; 80 uint32_t snapshot_count; 81 }; 82 83 struct __wt_txn_state { 84 WT_CACHE_LINE_PAD_BEGIN 85 volatile uint64_t id; 86 volatile uint64_t pinned_id; 87 volatile uint64_t metadata_pinned; 88 89 WT_CACHE_LINE_PAD_END 90 }; 91 92 struct __wt_txn_global { 93 volatile uint64_t current; /* Current transaction ID. */ 94 95 /* The oldest running transaction ID (may race). */ 96 volatile uint64_t last_running; 97 98 /* 99 * The oldest transaction ID that is not yet visible to some 100 * transaction in the system. 101 */ 102 volatile uint64_t oldest_id; 103 104 WT_DECL_TIMESTAMP(commit_timestamp) 105 WT_DECL_TIMESTAMP(last_ckpt_timestamp) 106 WT_DECL_TIMESTAMP(meta_ckpt_timestamp) 107 WT_DECL_TIMESTAMP(oldest_timestamp) 108 WT_DECL_TIMESTAMP(pinned_timestamp) 109 WT_DECL_TIMESTAMP(recovery_timestamp) 110 WT_DECL_TIMESTAMP(stable_timestamp) 111 bool has_commit_timestamp; 112 bool has_oldest_timestamp; 113 bool has_pinned_timestamp; 114 bool has_stable_timestamp; 115 bool oldest_is_pinned; 116 bool stable_is_pinned; 117 118 WT_SPINLOCK id_lock; 119 120 /* Protects the active transaction states. */ 121 WT_RWLOCK rwlock; 122 123 /* Protects logging, checkpoints and transaction visibility. */ 124 WT_RWLOCK visibility_rwlock; 125 126 /* List of transactions sorted by commit timestamp. */ 127 WT_RWLOCK commit_timestamp_rwlock; 128 TAILQ_HEAD(__wt_txn_cts_qh, __wt_txn) commit_timestamph; 129 uint32_t commit_timestampq_len; 130 131 /* List of transactions sorted by read timestamp. */ 132 WT_RWLOCK read_timestamp_rwlock; 133 TAILQ_HEAD(__wt_txn_rts_qh, __wt_txn) read_timestamph; 134 uint32_t read_timestampq_len; 135 136 /* 137 * Track information about the running checkpoint. The transaction 138 * snapshot used when checkpointing are special. Checkpoints can run 139 * for a long time so we keep them out of regular visibility checks. 140 * Eviction and checkpoint operations know when they need to be aware 141 * of checkpoint transactions. 142 * 143 * We rely on the fact that (a) the only table a checkpoint updates is 144 * the metadata; and (b) once checkpoint has finished reading a table, 145 * it won't revisit it. 146 */ 147 volatile bool checkpoint_running; /* Checkpoint running */ 148 volatile uint32_t checkpoint_id; /* Checkpoint's session ID */ 149 WT_TXN_STATE checkpoint_state; /* Checkpoint's txn state */ 150 WT_DECL_TIMESTAMP(checkpoint_timestamp) /* Checkpoint's timestamp */ 151 152 volatile uint64_t metadata_pinned; /* Oldest ID for metadata */ 153 154 /* Named snapshot state. */ 155 WT_RWLOCK nsnap_rwlock; 156 volatile uint64_t nsnap_oldest_id; 157 TAILQ_HEAD(__wt_nsnap_qh, __wt_named_snapshot) nsnaph; 158 159 WT_TXN_STATE *states; /* Per-session transaction states */ 160 }; 161 162 typedef enum __wt_txn_isolation { 163 WT_ISO_READ_COMMITTED, 164 WT_ISO_READ_UNCOMMITTED, 165 WT_ISO_SNAPSHOT 166 } WT_TXN_ISOLATION; 167 168 /* 169 * WT_TXN_OP -- 170 * A transactional operation. Each transaction builds an in-memory array 171 * of these operations as it runs, then uses the array to either write log 172 * records during commit or undo the operations during rollback. 173 */ 174 struct __wt_txn_op { 175 WT_BTREE *btree; 176 enum { 177 WT_TXN_OP_NONE=0, 178 WT_TXN_OP_BASIC_COL, 179 WT_TXN_OP_BASIC_ROW, 180 WT_TXN_OP_INMEM_COL, 181 WT_TXN_OP_INMEM_ROW, 182 WT_TXN_OP_REF_DELETE, 183 WT_TXN_OP_TRUNCATE_COL, 184 WT_TXN_OP_TRUNCATE_ROW 185 } type; 186 union { 187 /* WT_TXN_OP_BASIC_ROW, WT_TXN_OP_INMEM_ROW */ 188 struct { 189 WT_UPDATE *upd; 190 WT_ITEM key; 191 } op_row; 192 193 /* WT_TXN_OP_BASIC_COL, WT_TXN_OP_INMEM_COL */ 194 struct { 195 WT_UPDATE *upd; 196 uint64_t recno; 197 } op_col; 198 /* 199 * upd is pointing to same memory in both op_row and op_col, so for simplicity 200 * just chose op_row upd 201 */ 202 #undef op_upd 203 #define op_upd op_row.upd 204 205 /* WT_TXN_OP_REF_DELETE */ 206 WT_REF *ref; 207 /* WT_TXN_OP_TRUNCATE_COL */ 208 struct { 209 uint64_t start, stop; 210 } truncate_col; 211 /* WT_TXN_OP_TRUNCATE_ROW */ 212 struct { 213 WT_ITEM start, stop; 214 enum { 215 WT_TXN_TRUNC_ALL, 216 WT_TXN_TRUNC_BOTH, 217 WT_TXN_TRUNC_START, 218 WT_TXN_TRUNC_STOP 219 } mode; 220 } truncate_row; 221 } u; 222 }; 223 224 /* 225 * WT_TXN -- 226 * Per-session transaction context. 227 */ 228 struct __wt_txn { 229 uint64_t id; 230 231 WT_TXN_ISOLATION isolation; 232 233 uint32_t forced_iso; /* Isolation is currently forced. */ 234 235 /* 236 * Snapshot data: 237 * ids < snap_min are visible, 238 * ids > snap_max are invisible, 239 * everything else is visible unless it is in the snapshot. 240 */ 241 uint64_t snap_min, snap_max; 242 uint64_t *snapshot; 243 uint32_t snapshot_count; 244 uint32_t txn_logsync; /* Log sync configuration */ 245 246 /* 247 * Timestamp copied into updates created by this transaction. 248 * 249 * In some use cases, this can be updated while the transaction is 250 * running. 251 */ 252 WT_DECL_TIMESTAMP(commit_timestamp) 253 254 /* 255 * Set to the first commit timestamp used in the transaction and fixed 256 * while the transaction is on the public list of committed timestamps. 257 */ 258 WT_DECL_TIMESTAMP(first_commit_timestamp) 259 260 /* 261 * Timestamp copied into updates created by this transaction, when this 262 * transaction is prepared. 263 */ 264 WT_DECL_TIMESTAMP(prepare_timestamp) 265 266 /* Read updates committed as of this timestamp. */ 267 WT_DECL_TIMESTAMP(read_timestamp) 268 269 TAILQ_ENTRY(__wt_txn) commit_timestampq; 270 TAILQ_ENTRY(__wt_txn) read_timestampq; 271 bool clear_commit_q; /* Set if need to clear from the commit queue */ 272 bool clear_read_q; /* Set if need to clear from the read queue */ 273 274 /* Array of modifications by this transaction. */ 275 WT_TXN_OP *mod; 276 size_t mod_alloc; 277 u_int mod_count; 278 279 /* Scratch buffer for in-memory log records. */ 280 WT_ITEM *logrec; 281 282 /* Requested notification when transactions are resolved. */ 283 WT_TXN_NOTIFY *notify; 284 285 /* Checkpoint status. */ 286 WT_LSN ckpt_lsn; 287 uint32_t ckpt_nsnapshot; 288 WT_ITEM *ckpt_snapshot; 289 bool full_ckpt; 290 291 const char *rollback_reason; /* If rollback, the reason */ 292 293 /* AUTOMATIC FLAG VALUE GENERATION START */ 294 #define WT_TXN_AUTOCOMMIT 0x00001u 295 #define WT_TXN_ERROR 0x00002u 296 #define WT_TXN_HAS_ID 0x00004u 297 #define WT_TXN_HAS_SNAPSHOT 0x00008u 298 #define WT_TXN_HAS_TS_COMMIT 0x00010u 299 #define WT_TXN_HAS_TS_READ 0x00020u 300 #define WT_TXN_IGNORE_PREPARE 0x00040u 301 #define WT_TXN_NAMED_SNAPSHOT 0x00080u 302 #define WT_TXN_PREPARE 0x00100u 303 #define WT_TXN_PUBLIC_TS_COMMIT 0x00200u 304 #define WT_TXN_PUBLIC_TS_READ 0x00400u 305 #define WT_TXN_READONLY 0x00800u 306 #define WT_TXN_RUNNING 0x01000u 307 #define WT_TXN_SYNC_SET 0x02000u 308 #define WT_TXN_TS_COMMIT_ALWAYS 0x04000u 309 #define WT_TXN_TS_COMMIT_KEYS 0x08000u 310 #define WT_TXN_TS_COMMIT_NEVER 0x10000u 311 #define WT_TXN_UPDATE 0x20000u 312 /* AUTOMATIC FLAG VALUE GENERATION STOP */ 313 uint32_t flags; 314 }; 315