1 /*-
2 * Copyright (c) 2014-2018 MongoDB, Inc.
3 * Copyright (c) 2008-2014 WiredTiger, Inc.
4 * All rights reserved.
5 *
6 * See the file LICENSE for redistribution information.
7 */
8
9 #include "wt_internal.h"
10
11 #ifdef HAVE_TIMESTAMPS
12 /*
13 * __wt_timestamp_to_hex_string --
14 * Convert a timestamp to hex string representation.
15 */
16 int
__wt_timestamp_to_hex_string(WT_SESSION_IMPL * session,char * hex_timestamp,const wt_timestamp_t * ts_src)17 __wt_timestamp_to_hex_string(
18 WT_SESSION_IMPL *session, char *hex_timestamp, const wt_timestamp_t *ts_src)
19 {
20 wt_timestamp_t ts;
21
22 __wt_timestamp_set(&ts, ts_src);
23
24 if (__wt_timestamp_iszero(&ts)) {
25 hex_timestamp[0] = '0';
26 hex_timestamp[1] = '\0';
27 return (0);
28 }
29
30 #if WT_TIMESTAMP_SIZE == 8
31 {
32 char *p, v;
33
34 for (p = hex_timestamp; ts.val != 0; ts.val >>= 4)
35 *p++ = (char)__wt_hex((u_char)(ts.val & 0x0f));
36 *p = '\0';
37
38 /* Reverse the string. */
39 for (--p; p > hex_timestamp;) {
40 v = *p;
41 *p-- = *hex_timestamp;
42 *hex_timestamp++ = v;
43 }
44 WT_UNUSED(session);
45 }
46 #else
47 {
48 WT_ITEM hexts;
49 size_t len;
50 uint8_t *tsp;
51
52 /* Avoid memory allocation: set up an item guaranteed large enough. */
53 hexts.data = hexts.mem = hex_timestamp;
54 hexts.memsize = 2 * WT_TIMESTAMP_SIZE + 1;
55 /* Trim leading zeros. */
56 for (tsp = ts.ts, len = WT_TIMESTAMP_SIZE;
57 len > 0 && *tsp == 0;
58 ++tsp, --len)
59 ;
60 WT_RET(__wt_raw_to_hex(session, tsp, len, &hexts));
61 }
62 #endif
63 return (0);
64 }
65
66 /*
67 * __wt_verbose_timestamp --
68 * Output a verbose message along with the specified timestamp.
69 */
70 void
__wt_verbose_timestamp(WT_SESSION_IMPL * session,const wt_timestamp_t * ts,const char * msg)71 __wt_verbose_timestamp(WT_SESSION_IMPL *session,
72 const wt_timestamp_t *ts, const char *msg)
73 {
74 char timestamp_buf[2 * WT_TIMESTAMP_SIZE + 1];
75
76 if (!WT_VERBOSE_ISSET(session, WT_VERB_TIMESTAMP) ||
77 (__wt_timestamp_to_hex_string(session, timestamp_buf, ts) != 0))
78 return;
79
80 __wt_verbose(session,
81 WT_VERB_TIMESTAMP, "Timestamp %s : %s", timestamp_buf, msg);
82 }
83
84 /*
85 * __wt_txn_parse_timestamp_raw --
86 * Decodes and sets a timestamp. Don't do any checking.
87 */
88 int
__wt_txn_parse_timestamp_raw(WT_SESSION_IMPL * session,const char * name,wt_timestamp_t * timestamp,WT_CONFIG_ITEM * cval)89 __wt_txn_parse_timestamp_raw(WT_SESSION_IMPL *session, const char *name,
90 wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval)
91 {
92 __wt_timestamp_set_zero(timestamp);
93
94 if (cval->len == 0)
95 return (0);
96
97 /* Protect against unexpectedly long hex strings. */
98 if (cval->len > 2 * WT_TIMESTAMP_SIZE)
99 WT_RET_MSG(session, EINVAL,
100 "%s timestamp too long '%.*s'",
101 name, (int)cval->len, cval->str);
102
103 #if WT_TIMESTAMP_SIZE == 8
104 {
105 static const int8_t hextable[] = {
106 -1, -1, -1, -1, -1, -1, -1, -1,
107 -1, -1, -1, -1, -1, -1, -1, -1,
108 -1, -1, -1, -1, -1, -1, -1, -1,
109 -1, -1, -1, -1, -1, -1, -1, -1,
110 -1, -1, -1, -1, -1, -1, -1, -1,
111 -1, -1, -1, -1, -1, -1, -1, -1,
112 0, 1, 2, 3, 4, 5, 6, 7,
113 8, 9, -1, -1, -1, -1, -1, -1,
114 -1, 10, 11, 12, 13, 14, 15, -1,
115 -1, -1, -1, -1, -1, -1, -1, -1,
116 -1, -1, -1, -1, -1, -1, -1, -1,
117 -1, -1, -1, -1, -1, -1, -1, -1,
118 -1, 10, 11, 12, 13, 14, 15, -1
119 };
120 wt_timestamp_t ts;
121 size_t len;
122 int hex_val;
123 const char *hex_itr;
124
125 for (ts.val = 0, hex_itr = cval->str, len = cval->len; len > 0; --len) {
126 if ((size_t)*hex_itr < WT_ELEMENTS(hextable))
127 hex_val = hextable[(size_t)*hex_itr++];
128 else
129 hex_val = -1;
130 if (hex_val < 0)
131 WT_RET_MSG(session, EINVAL,
132 "Failed to parse %s timestamp '%.*s'",
133 name, (int)cval->len, cval->str);
134 ts.val = (ts.val << 4) | (uint64_t)hex_val;
135 }
136 __wt_timestamp_set(timestamp, &ts);
137 }
138 #else
139 {
140 WT_DECL_RET;
141 WT_ITEM ts;
142 wt_timestamp_t tsbuf;
143 size_t hexlen;
144 const char *hexts;
145 char padbuf[2 * WT_TIMESTAMP_SIZE + 1];
146
147 /*
148 * The decoding function assumes it is decoding data produced by dump
149 * and so requires an even number of hex digits.
150 */
151 if ((cval->len & 1) == 0) {
152 hexts = cval->str;
153 hexlen = cval->len;
154 } else {
155 padbuf[0] = '0';
156 memcpy(padbuf + 1, cval->str, cval->len);
157 hexts = padbuf;
158 hexlen = cval->len + 1;
159 }
160
161 /* Avoid memory allocation to decode timestamps. */
162 ts.data = ts.mem = tsbuf.ts;
163 ts.memsize = sizeof(tsbuf.ts);
164
165 if ((ret = __wt_nhex_to_raw(session, hexts, hexlen, &ts)) != 0)
166 WT_RET_MSG(session, ret, "Failed to parse %s timestamp '%.*s'",
167 name, (int)cval->len, cval->str);
168 WT_ASSERT(session, ts.size <= WT_TIMESTAMP_SIZE);
169
170 /* Copy the raw value to the end of the timestamp. */
171 memcpy(timestamp->ts + WT_TIMESTAMP_SIZE - ts.size,
172 ts.data, ts.size);
173 }
174 #endif
175 return (0);
176 }
177
178 /*
179 * __wt_txn_parse_timestamp --
180 * Decodes and sets a timestamp checking it is non-zero.
181 */
182 int
__wt_txn_parse_timestamp(WT_SESSION_IMPL * session,const char * name,wt_timestamp_t * timestamp,WT_CONFIG_ITEM * cval)183 __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, const char *name,
184 wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval)
185 {
186 WT_RET(__wt_txn_parse_timestamp_raw(session, name, timestamp, cval));
187 if (cval->len != 0 && __wt_timestamp_iszero(timestamp))
188 WT_RET_MSG(session, EINVAL,
189 "Failed to parse %s timestamp '%.*s': zero not permitted",
190 name, (int)cval->len, cval->str);
191
192 return (0);
193 }
194
195 /*
196 * __txn_get_pinned_timestamp --
197 * Calculate the current pinned timestamp.
198 */
199 static int
__txn_get_pinned_timestamp(WT_SESSION_IMPL * session,wt_timestamp_t * tsp,bool include_checkpoint,bool include_oldest)200 __txn_get_pinned_timestamp(
201 WT_SESSION_IMPL *session, wt_timestamp_t *tsp, bool include_checkpoint,
202 bool include_oldest)
203 {
204 WT_CONNECTION_IMPL *conn;
205 WT_DECL_TIMESTAMP(tmp_ts)
206 WT_TXN *txn;
207 WT_TXN_GLOBAL *txn_global;
208
209 conn = S2C(session);
210 txn_global = &conn->txn_global;
211
212 if (include_oldest && !txn_global->has_oldest_timestamp)
213 return (WT_NOTFOUND);
214
215 __wt_readlock(session, &txn_global->rwlock);
216 if (include_oldest)
217 __wt_timestamp_set(&tmp_ts, &txn_global->oldest_timestamp);
218 else
219 __wt_timestamp_set_zero(&tmp_ts);
220
221 /* Check for a running checkpoint */
222 if (include_checkpoint &&
223 !__wt_timestamp_iszero(&txn_global->checkpoint_timestamp) &&
224 (__wt_timestamp_iszero(&tmp_ts) ||
225 __wt_timestamp_cmp(&txn_global->checkpoint_timestamp, &tmp_ts) <
226 0))
227 __wt_timestamp_set(&tmp_ts, &txn_global->checkpoint_timestamp);
228 __wt_readunlock(session, &txn_global->rwlock);
229
230 /* Look for the oldest ordinary reader. */
231 __wt_readlock(session, &txn_global->read_timestamp_rwlock);
232 TAILQ_FOREACH(txn, &txn_global->read_timestamph, read_timestampq) {
233 /*
234 * Skip any transactions on the queue that are not active.
235 */
236 if (txn->clear_read_q)
237 continue;
238 /*
239 * A zero timestamp is possible here only when the oldest
240 * timestamp is not accounted for.
241 */
242 if (__wt_timestamp_iszero(&tmp_ts) ||
243 __wt_timestamp_cmp(&txn->read_timestamp, &tmp_ts) < 0)
244 __wt_timestamp_set(&tmp_ts, &txn->read_timestamp);
245 /*
246 * We break on the first active txn on the list.
247 */
248 break;
249 }
250 __wt_readunlock(session, &txn_global->read_timestamp_rwlock);
251
252 if (!include_oldest && __wt_timestamp_iszero(&tmp_ts))
253 return (WT_NOTFOUND);
254 __wt_timestamp_set(tsp, &tmp_ts);
255
256 return (0);
257 }
258
259 /*
260 * __txn_global_query_timestamp --
261 * Query a timestamp on the global transaction.
262 */
263 static int
__txn_global_query_timestamp(WT_SESSION_IMPL * session,wt_timestamp_t * tsp,const char * cfg[])264 __txn_global_query_timestamp(
265 WT_SESSION_IMPL *session, wt_timestamp_t *tsp, const char *cfg[])
266 {
267 WT_CONFIG_ITEM cval;
268 WT_CONNECTION_IMPL *conn;
269 WT_TXN *txn;
270 WT_TXN_GLOBAL *txn_global;
271 wt_timestamp_t ts, tmpts;
272
273 conn = S2C(session);
274 txn_global = &conn->txn_global;
275
276 WT_STAT_CONN_INCR(session, txn_query_ts);
277 WT_RET(__wt_config_gets(session, cfg, "get", &cval));
278 if (WT_STRING_MATCH("all_committed", cval.str, cval.len)) {
279 if (!txn_global->has_commit_timestamp)
280 return (WT_NOTFOUND);
281 WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
282 __wt_timestamp_set(&ts, &txn_global->commit_timestamp));
283 WT_ASSERT(session, !__wt_timestamp_iszero(&ts));
284
285 /* Skip the lock if there are no running transactions. */
286 if (TAILQ_EMPTY(&txn_global->commit_timestamph))
287 goto done;
288
289 /* Compare with the oldest running transaction. */
290 __wt_readlock(session, &txn_global->commit_timestamp_rwlock);
291 TAILQ_FOREACH(txn, &txn_global->commit_timestamph,
292 commit_timestampq) {
293 if (txn->clear_commit_q)
294 continue;
295
296 __wt_timestamp_set(
297 &tmpts, &txn->first_commit_timestamp);
298 WT_ASSERT(session, !__wt_timestamp_iszero(&tmpts));
299 __wt_timestamp_subone(&tmpts);
300
301 if (__wt_timestamp_cmp(&tmpts, &ts) < 0)
302 __wt_timestamp_set(&ts, &tmpts);
303 break;
304 }
305 __wt_readunlock(session, &txn_global->commit_timestamp_rwlock);
306 } else if (WT_STRING_MATCH("last_checkpoint", cval.str, cval.len))
307 /* Read-only value forever. No lock needed. */
308 __wt_timestamp_set(&ts, &txn_global->last_ckpt_timestamp);
309 else if (WT_STRING_MATCH("oldest", cval.str, cval.len)) {
310 if (!txn_global->has_oldest_timestamp)
311 return (WT_NOTFOUND);
312 WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
313 __wt_timestamp_set(&ts, &txn_global->oldest_timestamp));
314 } else if (WT_STRING_MATCH("oldest_reader", cval.str, cval.len))
315 WT_RET(__txn_get_pinned_timestamp(session, &ts, true, false));
316 else if (WT_STRING_MATCH("pinned", cval.str, cval.len))
317 WT_RET(__txn_get_pinned_timestamp(session, &ts, true, true));
318 else if (WT_STRING_MATCH("recovery", cval.str, cval.len))
319 /* Read-only value forever. No lock needed. */
320 __wt_timestamp_set(&ts, &txn_global->recovery_timestamp);
321 else if (WT_STRING_MATCH("stable", cval.str, cval.len)) {
322 if (!txn_global->has_stable_timestamp)
323 return (WT_NOTFOUND);
324 WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
325 __wt_timestamp_set(&ts, &txn_global->stable_timestamp));
326 } else
327 WT_RET_MSG(session, EINVAL,
328 "unknown timestamp query %.*s", (int)cval.len, cval.str);
329
330 done: __wt_timestamp_set(tsp, &ts);
331 return (0);
332 }
333
334 /*
335 * __txn_query_timestamp --
336 * Query a timestamp within this session's transaction.
337 */
338 static int
__txn_query_timestamp(WT_SESSION_IMPL * session,wt_timestamp_t * tsp,const char * cfg[])339 __txn_query_timestamp(
340 WT_SESSION_IMPL *session, wt_timestamp_t *tsp, const char *cfg[])
341 {
342 WT_CONFIG_ITEM cval;
343 WT_TXN *txn;
344
345 txn = &session->txn;
346
347 WT_STAT_CONN_INCR(session, session_query_ts);
348 if (!F_ISSET(txn, WT_TXN_RUNNING))
349 return (WT_NOTFOUND);
350
351 WT_RET(__wt_config_gets(session, cfg, "get", &cval));
352 if (WT_STRING_MATCH("commit", cval.str, cval.len))
353 __wt_timestamp_set(tsp, &txn->commit_timestamp);
354 else if (WT_STRING_MATCH("first_commit", cval.str, cval.len))
355 __wt_timestamp_set(tsp, &txn->first_commit_timestamp);
356 else if (WT_STRING_MATCH("prepare", cval.str, cval.len))
357 __wt_timestamp_set(tsp, &txn->prepare_timestamp);
358 else if (WT_STRING_MATCH("read", cval.str, cval.len))
359 __wt_timestamp_set(tsp, &txn->read_timestamp);
360 else
361 WT_RET_MSG(session, EINVAL,
362 "unknown timestamp query %.*s", (int)cval.len, cval.str);
363
364 return (0);
365 }
366 #endif
367
368 /*
369 * __wt_txn_query_timestamp --
370 * Query a timestamp. The caller may query the global transaction or the
371 * session's transaction.
372 */
373 int
__wt_txn_query_timestamp(WT_SESSION_IMPL * session,char * hex_timestamp,const char * cfg[],bool global_txn)374 __wt_txn_query_timestamp(WT_SESSION_IMPL *session,
375 char *hex_timestamp, const char *cfg[], bool global_txn)
376 {
377 #ifdef HAVE_TIMESTAMPS
378 wt_timestamp_t ts;
379
380 if (global_txn)
381 WT_RET(__txn_global_query_timestamp(session, &ts, cfg));
382 else
383 WT_RET(__txn_query_timestamp(session, &ts, cfg));
384
385 return (__wt_timestamp_to_hex_string(session, hex_timestamp, &ts));
386 #else
387 WT_UNUSED(hex_timestamp);
388 WT_UNUSED(cfg);
389 WT_UNUSED(global_txn);
390
391 WT_RET_MSG(session, ENOTSUP,
392 "requires a version of WiredTiger built with timestamp support");
393 #endif
394 }
395
396 #ifdef HAVE_TIMESTAMPS
397 /*
398 * __wt_txn_update_pinned_timestamp --
399 * Update the pinned timestamp (the oldest timestamp that has to be
400 * maintained for current or future readers).
401 */
402 int
__wt_txn_update_pinned_timestamp(WT_SESSION_IMPL * session,bool force)403 __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session, bool force)
404 {
405 WT_DECL_RET;
406 WT_TXN_GLOBAL *txn_global;
407 wt_timestamp_t active_timestamp, last_pinned_timestamp;
408 wt_timestamp_t oldest_timestamp, pinned_timestamp;
409
410 txn_global = &S2C(session)->txn_global;
411
412 /* Skip locking and scanning when the oldest timestamp is pinned. */
413 if (txn_global->oldest_is_pinned)
414 return (0);
415
416 WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
417 __wt_timestamp_set(
418 &oldest_timestamp, &txn_global->oldest_timestamp));
419
420 /* Scan to find the global pinned timestamp. */
421 if ((ret = __txn_get_pinned_timestamp(
422 session, &active_timestamp, false, true)) != 0)
423 return (ret == WT_NOTFOUND ? 0 : ret);
424
425 if (__wt_timestamp_cmp(&oldest_timestamp, &active_timestamp) < 0)
426 __wt_timestamp_set(&pinned_timestamp, &oldest_timestamp);
427 else
428 __wt_timestamp_set(&pinned_timestamp, &active_timestamp);
429
430 if (txn_global->has_pinned_timestamp && !force) {
431 WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
432 __wt_timestamp_set(
433 &last_pinned_timestamp, &txn_global->pinned_timestamp));
434
435 if (__wt_timestamp_cmp(
436 &pinned_timestamp, &last_pinned_timestamp) <= 0)
437 return (0);
438 }
439
440 __wt_writelock(session, &txn_global->rwlock);
441 if (!txn_global->has_pinned_timestamp || force || __wt_timestamp_cmp(
442 &txn_global->pinned_timestamp, &pinned_timestamp) < 0) {
443 __wt_timestamp_set(
444 &txn_global->pinned_timestamp, &pinned_timestamp);
445 txn_global->has_pinned_timestamp = true;
446 txn_global->oldest_is_pinned = __wt_timestamp_cmp(
447 &txn_global->pinned_timestamp,
448 &txn_global->oldest_timestamp) == 0;
449 txn_global->stable_is_pinned = __wt_timestamp_cmp(
450 &txn_global->pinned_timestamp,
451 &txn_global->stable_timestamp) == 0;
452 __wt_verbose_timestamp(session,
453 &pinned_timestamp, "Updated pinned timestamp");
454 }
455 __wt_writeunlock(session, &txn_global->rwlock);
456
457 return (0);
458 }
459 #endif
460
461 /*
462 * __wt_txn_global_set_timestamp --
463 * Set a global transaction timestamp.
464 */
465 int
__wt_txn_global_set_timestamp(WT_SESSION_IMPL * session,const char * cfg[])466 __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
467 {
468 WT_CONFIG_ITEM commit_cval, oldest_cval, stable_cval;
469 bool has_commit, has_oldest, has_stable;
470
471 WT_STAT_CONN_INCR(session, txn_set_ts);
472 WT_RET(__wt_config_gets_def(session,
473 cfg, "commit_timestamp", 0, &commit_cval));
474 has_commit = commit_cval.len != 0;
475 if (has_commit)
476 WT_STAT_CONN_INCR(session, txn_set_ts_commit);
477
478 WT_RET(__wt_config_gets_def(session,
479 cfg, "oldest_timestamp", 0, &oldest_cval));
480 has_oldest = oldest_cval.len != 0;
481 if (has_oldest)
482 WT_STAT_CONN_INCR(session, txn_set_ts_oldest);
483
484 WT_RET(__wt_config_gets_def(session,
485 cfg, "stable_timestamp", 0, &stable_cval));
486 has_stable = stable_cval.len != 0;
487 if (has_stable)
488 WT_STAT_CONN_INCR(session, txn_set_ts_stable);
489
490 /* If no timestamp was supplied, there's nothing to do. */
491 if (!has_commit && !has_oldest && !has_stable)
492 return (0);
493
494 #ifdef HAVE_TIMESTAMPS
495 {
496 WT_CONFIG_ITEM cval;
497 WT_TXN_GLOBAL *txn_global;
498 wt_timestamp_t commit_ts, oldest_ts, stable_ts;
499 wt_timestamp_t last_oldest_ts, last_stable_ts;
500 char hex_timestamp[2][2 * WT_TIMESTAMP_SIZE + 1];
501 bool force;
502
503 txn_global = &S2C(session)->txn_global;
504
505 /*
506 * Parsing will initialize the timestamp to zero even if
507 * it is not configured.
508 */
509 WT_RET(__wt_txn_parse_timestamp(
510 session, "commit", &commit_ts, &commit_cval));
511 WT_RET(__wt_txn_parse_timestamp(
512 session, "oldest", &oldest_ts, &oldest_cval));
513 WT_RET(__wt_txn_parse_timestamp(
514 session, "stable", &stable_ts, &stable_cval));
515
516 WT_RET(__wt_config_gets_def(session,
517 cfg, "force", 0, &cval));
518 force = cval.val != 0;
519
520 if (force)
521 goto set;
522
523 __wt_readlock(session, &txn_global->rwlock);
524
525 __wt_timestamp_set(&last_oldest_ts, &txn_global->oldest_timestamp);
526 __wt_timestamp_set(&last_stable_ts, &txn_global->stable_timestamp);
527
528 /*
529 * First do error checking on the timestamp values. The
530 * oldest timestamp must always be less than or equal to
531 * the stable timestamp. If we're only setting one
532 * then compare against the system timestamp. If we're
533 * setting both then compare the passed in values.
534 */
535 if (!has_commit && txn_global->has_commit_timestamp)
536 __wt_timestamp_set(&commit_ts, &txn_global->commit_timestamp);
537 if (!has_oldest && txn_global->has_oldest_timestamp)
538 __wt_timestamp_set(&oldest_ts, &last_oldest_ts);
539 if (!has_stable && txn_global->has_stable_timestamp)
540 __wt_timestamp_set(&stable_ts, &last_stable_ts);
541
542 /*
543 * If a commit timestamp was supplied, check that it is no older than
544 * either the stable timestamp or the oldest timestamp.
545 */
546 if (has_commit && (has_oldest || txn_global->has_oldest_timestamp) &&
547 __wt_timestamp_cmp(&oldest_ts, &commit_ts) > 0) {
548 __wt_readunlock(session, &txn_global->rwlock);
549 WT_RET(__wt_timestamp_to_hex_string(
550 session, hex_timestamp[0], &oldest_ts));
551 WT_RET(__wt_timestamp_to_hex_string(
552 session, hex_timestamp[1], &commit_ts));
553 WT_RET_MSG(session, EINVAL,
554 "set_timestamp: oldest timestamp %s must not be later than "
555 "commit timestamp %s", hex_timestamp[0], hex_timestamp[1]);
556 }
557
558 if (has_commit && (has_stable || txn_global->has_stable_timestamp) &&
559 __wt_timestamp_cmp(&stable_ts, &commit_ts) > 0) {
560 __wt_readunlock(session, &txn_global->rwlock);
561 WT_RET(__wt_timestamp_to_hex_string(
562 session, hex_timestamp[0], &stable_ts));
563 WT_RET(__wt_timestamp_to_hex_string(
564 session, hex_timestamp[1], &commit_ts));
565 WT_RET_MSG(session, EINVAL,
566 "set_timestamp: stable timestamp %s must not be later than "
567 "commit timestamp %s", hex_timestamp[0], hex_timestamp[1]);
568 }
569
570 /*
571 * The oldest and stable timestamps must always satisfy the condition
572 * that oldest <= stable.
573 */
574 if ((has_oldest || has_stable) &&
575 (has_oldest || txn_global->has_oldest_timestamp) &&
576 (has_stable || txn_global->has_stable_timestamp) &&
577 __wt_timestamp_cmp(&oldest_ts, &stable_ts) > 0) {
578 __wt_readunlock(session, &txn_global->rwlock);
579 WT_RET(__wt_timestamp_to_hex_string(
580 session, hex_timestamp[0], &oldest_ts));
581 WT_RET(__wt_timestamp_to_hex_string(
582 session, hex_timestamp[1], &stable_ts));
583 WT_RET_MSG(session, EINVAL,
584 "set_timestamp: oldest timestamp %s must not be later than "
585 "stable timestamp %s", hex_timestamp[0], hex_timestamp[1]);
586 }
587
588 __wt_readunlock(session, &txn_global->rwlock);
589
590 /* Check if we are actually updating anything. */
591 if (has_oldest && txn_global->has_oldest_timestamp &&
592 __wt_timestamp_cmp(&oldest_ts, &last_oldest_ts) <= 0)
593 has_oldest = false;
594
595 if (has_stable && txn_global->has_stable_timestamp &&
596 __wt_timestamp_cmp(&stable_ts, &last_stable_ts) <= 0)
597 has_stable = false;
598
599 if (!has_commit && !has_oldest && !has_stable)
600 return (0);
601
602 set: __wt_writelock(session, &txn_global->rwlock);
603 /*
604 * This method can be called from multiple threads, check that we are
605 * moving the global timestamps forwards.
606 *
607 * The exception is the commit timestamp, where the application can
608 * move it backwards (in fact, it only really makes sense to explicitly
609 * move it backwards because it otherwise tracks the largest
610 * commit_timestamp so it moves forward whenever transactions are
611 * assigned timestamps).
612 */
613 if (has_commit) {
614 __wt_timestamp_set(&txn_global->commit_timestamp, &commit_ts);
615 txn_global->has_commit_timestamp = true;
616 WT_STAT_CONN_INCR(session, txn_set_ts_commit_upd);
617 __wt_verbose_timestamp(session, &commit_ts,
618 "Updated global commit timestamp");
619 }
620
621 if (has_oldest && (!txn_global->has_oldest_timestamp ||
622 force || __wt_timestamp_cmp(
623 &oldest_ts, &txn_global->oldest_timestamp) > 0)) {
624 __wt_timestamp_set(&txn_global->oldest_timestamp, &oldest_ts);
625 WT_STAT_CONN_INCR(session, txn_set_ts_oldest_upd);
626 txn_global->has_oldest_timestamp = true;
627 txn_global->oldest_is_pinned = false;
628 __wt_verbose_timestamp(session, &oldest_ts,
629 "Updated global oldest timestamp");
630 }
631
632 if (has_stable && (!txn_global->has_stable_timestamp ||
633 force || __wt_timestamp_cmp(
634 &stable_ts, &txn_global->stable_timestamp) > 0)) {
635 __wt_timestamp_set(&txn_global->stable_timestamp, &stable_ts);
636 WT_STAT_CONN_INCR(session, txn_set_ts_stable_upd);
637 txn_global->has_stable_timestamp = true;
638 txn_global->stable_is_pinned = false;
639 __wt_verbose_timestamp(session, &stable_ts,
640 "Updated global stable timestamp");
641 }
642 __wt_writeunlock(session, &txn_global->rwlock);
643
644 if (has_oldest || has_stable)
645 WT_RET(__wt_txn_update_pinned_timestamp(session, force));
646 }
647 return (0);
648 #else
649 WT_RET_MSG(session, ENOTSUP, "set_timestamp requires a "
650 "version of WiredTiger built with timestamp support");
651 #endif
652 }
653
654 #ifdef HAVE_TIMESTAMPS
655 /*
656 * __wt_timestamp_validate --
657 * Validate a timestamp to be not older than the global oldest and global
658 * stable and running transaction commit timestamp and running transaction
659 * prepare timestamp.
660 */
661 int
__wt_timestamp_validate(WT_SESSION_IMPL * session,const char * name,wt_timestamp_t * ts,WT_CONFIG_ITEM * cval)662 __wt_timestamp_validate(WT_SESSION_IMPL *session, const char *name,
663 wt_timestamp_t *ts, WT_CONFIG_ITEM *cval)
664 {
665 WT_TXN *txn = &session->txn;
666 WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global;
667 wt_timestamp_t oldest_ts, stable_ts;
668 char hex_timestamp[2 * WT_TIMESTAMP_SIZE + 1];
669 bool has_oldest_ts, has_stable_ts;
670
671 /*
672 * Added this redundant initialization to circumvent build failure.
673 */
674 __wt_timestamp_set_zero(&oldest_ts);
675 __wt_timestamp_set_zero(&stable_ts);
676 /*
677 * Compare against the oldest and the stable timestamp. Return an error
678 * if the given timestamp is older than oldest and/or stable timestamp.
679 */
680 WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
681 if ((has_oldest_ts = txn_global->has_oldest_timestamp))
682 __wt_timestamp_set(&oldest_ts, &txn_global->oldest_timestamp);
683 if ((has_stable_ts = txn_global->has_stable_timestamp))
684 __wt_timestamp_set(&stable_ts, &txn_global->stable_timestamp));
685
686 if (has_oldest_ts && __wt_timestamp_cmp(ts, &oldest_ts) < 0) {
687 WT_RET(__wt_timestamp_to_hex_string(session, hex_timestamp,
688 &oldest_ts));
689 WT_RET_MSG(session, EINVAL,
690 "%s timestamp %.*s older than oldest timestamp %s",
691 name, (int)cval->len, cval->str, hex_timestamp);
692 }
693 if (has_stable_ts && __wt_timestamp_cmp(ts, &stable_ts) < 0) {
694 WT_RET(__wt_timestamp_to_hex_string(session, hex_timestamp,
695 &stable_ts));
696 WT_RET_MSG(session, EINVAL,
697 "%s timestamp %.*s older than stable timestamp %s",
698 name, (int)cval->len, cval->str, hex_timestamp);
699 }
700
701 /*
702 * Compare against the commit timestamp of the current transaction.
703 * Return an error if the given timestamp is older than the first
704 * commit timestamp.
705 */
706 if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
707 __wt_timestamp_cmp(ts, &txn->first_commit_timestamp) < 0) {
708 WT_RET(__wt_timestamp_to_hex_string(
709 session, hex_timestamp, &txn->first_commit_timestamp));
710 WT_RET_MSG(session, EINVAL,
711 "%s timestamp %.*s older than the first "
712 "commit timestamp %s for this transaction",
713 name, (int)cval->len, cval->str, hex_timestamp);
714 }
715
716 /*
717 * Compare against the prepare timestamp of the current transaction.
718 * Return an error if the given timestamp is older than the prepare
719 * timestamp.
720 */
721 if (F_ISSET(txn, WT_TXN_PREPARE) &&
722 __wt_timestamp_cmp(ts, &txn->prepare_timestamp) < 0) {
723 WT_RET(__wt_timestamp_to_hex_string(
724 session, hex_timestamp, &txn->prepare_timestamp));
725 WT_RET_MSG(session, EINVAL,
726 "%s timestamp %.*s older than the prepare timestamp %s "
727 "for this transaction",
728 name, (int)cval->len, cval->str, hex_timestamp);
729 }
730
731 return (0);
732 }
733 #endif
734
735 /*
736 * __wt_txn_set_timestamp --
737 * Parse a request to set a timestamp in a transaction.
738 */
739 int
__wt_txn_set_timestamp(WT_SESSION_IMPL * session,const char * cfg[])740 __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
741 {
742 WT_CONFIG_ITEM cval;
743 WT_DECL_RET;
744
745 /* Look for a commit timestamp. */
746 ret = __wt_config_gets_def(session, cfg, "commit_timestamp", 0, &cval);
747 WT_RET_NOTFOUND_OK(ret);
748 if (ret == 0 && cval.len != 0) {
749 #ifdef HAVE_TIMESTAMPS
750 WT_TXN *txn = &session->txn;
751 wt_timestamp_t ts;
752
753 WT_TRET(__wt_txn_context_check(session, true));
754 WT_RET(__wt_txn_parse_timestamp(session, "commit", &ts, &cval));
755 WT_RET(__wt_timestamp_validate(session, "commit", &ts, &cval));
756 __wt_timestamp_set(&txn->commit_timestamp, &ts);
757 __wt_txn_set_commit_timestamp(session);
758 #else
759 WT_RET_MSG(session, ENOTSUP, "commit_timestamp requires a "
760 "version of WiredTiger built with timestamp support");
761 #endif
762 } else
763 /*
764 * We allow setting the commit timestamp after a prepare
765 * but no other timestamp.
766 */
767 WT_RET(__wt_txn_context_prepare_check(session));
768
769 /* Look for a read timestamp. */
770 WT_RET(__wt_txn_parse_read_timestamp(session, cfg));
771
772 return (0);
773 }
774
775 /*
776 * __wt_txn_parse_prepare_timestamp --
777 * Parse a request to set a transaction's prepare_timestamp.
778 */
779 int
__wt_txn_parse_prepare_timestamp(WT_SESSION_IMPL * session,const char * cfg[],wt_timestamp_t * timestamp)780 __wt_txn_parse_prepare_timestamp(
781 WT_SESSION_IMPL *session, const char *cfg[], wt_timestamp_t *timestamp)
782 {
783 WT_CONFIG_ITEM cval;
784
785 WT_RET(__wt_config_gets_def(session,
786 cfg, "prepare_timestamp", 0, &cval));
787 if (cval.len > 0) {
788 #ifdef HAVE_TIMESTAMPS
789 WT_TXN *prev;
790 WT_TXN_GLOBAL *txn_global;
791 wt_timestamp_t oldest_ts;
792 char hex_timestamp[2 * WT_TIMESTAMP_SIZE + 1];
793
794 txn_global = &S2C(session)->txn_global;
795
796 if (F_ISSET(&session->txn, WT_TXN_HAS_TS_COMMIT))
797 WT_RET_MSG(session, EINVAL,
798 "commit timestamp should not have been set before "
799 "prepare transaction");
800
801 WT_RET(__wt_txn_parse_timestamp(
802 session, "prepare", timestamp, &cval));
803
804 /*
805 * Prepare timestamp must be later/greater than latest active
806 * read timestamp.
807 */
808 __wt_readlock(session, &txn_global->read_timestamp_rwlock);
809 prev = TAILQ_LAST(&txn_global->read_timestamph,
810 __wt_txn_rts_qh);
811 while (prev != NULL) {
812 /*
813 * Skip any transactions that are not active.
814 */
815 if (prev->clear_read_q) {
816 prev = TAILQ_PREV(
817 prev, __wt_txn_rts_qh, read_timestampq);
818 continue;
819 }
820 if (__wt_timestamp_cmp(
821 &prev->read_timestamp, timestamp) >= 0) {
822 __wt_readunlock(session,
823 &txn_global->read_timestamp_rwlock);
824 WT_RET(__wt_timestamp_to_hex_string(session,
825 hex_timestamp, &prev->read_timestamp));
826 WT_RET_MSG(session, EINVAL,
827 "prepare timestamp %.*s not later than "
828 "an active read timestamp %s ",
829 (int)cval.len, cval.str, hex_timestamp);
830 }
831 break;
832 }
833 __wt_readunlock(session, &txn_global->read_timestamp_rwlock);
834
835 /*
836 * If there are no active readers, prepare timestamp must not
837 * be older than oldest timestamp.
838 */
839 if (prev == NULL) {
840 WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
841 __wt_timestamp_set(&oldest_ts,
842 &txn_global->oldest_timestamp));
843
844 if (__wt_timestamp_cmp(timestamp, &oldest_ts) < 0) {
845 WT_RET(__wt_timestamp_to_hex_string(session,
846 hex_timestamp, &oldest_ts));
847 WT_RET_MSG(session, EINVAL,
848 "prepare timestamp %.*s is older than the "
849 "oldest timestamp %s ", (int)cval.len,
850 cval.str, hex_timestamp);
851 }
852 }
853 #else
854 WT_UNUSED(timestamp);
855 WT_RET_MSG(session, EINVAL, "prepare_timestamp requires a "
856 "version of WiredTiger built with timestamp support");
857 #endif
858 } else
859 WT_RET_MSG(session, EINVAL, "prepare timestamp is required");
860
861 return (0);
862 }
863 /*
864 * __wt_txn_parse_read_timestamp --
865 * Parse a request to set a transaction's read_timestamp.
866 */
867 int
__wt_txn_parse_read_timestamp(WT_SESSION_IMPL * session,const char * cfg[])868 __wt_txn_parse_read_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
869 {
870 WT_CONFIG_ITEM cval;
871 WT_TXN *txn;
872
873 txn = &session->txn;
874
875 WT_RET(__wt_config_gets_def(session, cfg, "read_timestamp", 0, &cval));
876 if (cval.len > 0) {
877 #ifdef HAVE_TIMESTAMPS
878 wt_timestamp_t ts;
879 WT_TXN_GLOBAL *txn_global;
880 char hex_timestamp[2][2 * WT_TIMESTAMP_SIZE + 1];
881 bool round_to_oldest;
882
883 txn_global = &S2C(session)->txn_global;
884 WT_RET(__wt_txn_parse_timestamp(session, "read", &ts, &cval));
885
886 /* Read timestamps imply / require snapshot isolation. */
887 if (!F_ISSET(txn, WT_TXN_RUNNING))
888 txn->isolation = WT_ISO_SNAPSHOT;
889 else if (txn->isolation != WT_ISO_SNAPSHOT)
890 WT_RET_MSG(session, EINVAL, "setting a read_timestamp"
891 " requires a transaction running at snapshot"
892 " isolation");
893
894 /* Read timestamps can't change once set. */
895 if (F_ISSET(txn, WT_TXN_HAS_TS_READ))
896 WT_RET_MSG(session, EINVAL, "a read_timestamp"
897 " may only be set once per transaction");
898
899 /*
900 * Read the configuration here to reduce the span of the
901 * critical section.
902 */
903 WT_RET(__wt_config_gets_def(session,
904 cfg, "round_to_oldest", 0, &cval));
905 round_to_oldest = cval.val;
906 /*
907 * This code is not using the timestamp validate function to
908 * avoid a race between checking and setting transaction
909 * timestamp.
910 */
911 WT_RET(__wt_timestamp_to_hex_string(session,
912 hex_timestamp[0], &ts));
913 __wt_readlock(session, &txn_global->rwlock);
914 if (__wt_timestamp_cmp(
915 &ts, &txn_global->oldest_timestamp) < 0) {
916 WT_RET(__wt_timestamp_to_hex_string(session,
917 hex_timestamp[1], &txn_global->oldest_timestamp));
918 /*
919 * If given read timestamp is earlier than oldest
920 * timestamp then round the read timestamp to
921 * oldest timestamp.
922 */
923 if (round_to_oldest)
924 __wt_timestamp_set(&txn->read_timestamp,
925 &txn_global->oldest_timestamp);
926 else {
927 __wt_readunlock(session, &txn_global->rwlock);
928 WT_RET_MSG(session, EINVAL, "read timestamp "
929 "%s older than oldest timestamp %s",
930 hex_timestamp[0], hex_timestamp[1]);
931 }
932 } else {
933 __wt_timestamp_set(&txn->read_timestamp, &ts);
934 /*
935 * Reset to avoid a verbose message as read
936 * timestamp is not rounded to oldest timestamp.
937 */
938 round_to_oldest = false;
939 }
940
941 __wt_txn_set_read_timestamp(session);
942 __wt_readunlock(session, &txn_global->rwlock);
943 if (round_to_oldest) {
944 /*
945 * This message is generated here to reduce the span of
946 * critical section.
947 */
948 __wt_verbose(session, WT_VERB_TIMESTAMP, "Read "
949 "timestamp %s : Rounded to oldest timestamp %s",
950 hex_timestamp[0], hex_timestamp[1]);
951 }
952
953 /*
954 * If we already have a snapshot, it may be too early to match
955 * the timestamp (including the one we just read, if rounding
956 * to oldest). Get a new one.
957 */
958 if (F_ISSET(txn, WT_TXN_RUNNING))
959 __wt_txn_get_snapshot(session);
960
961 #else
962 WT_UNUSED(txn);
963 WT_RET_MSG(session, EINVAL, "read_timestamp requires a "
964 "version of WiredTiger built with timestamp support");
965 #endif
966 }
967
968 return (0);
969 }
970
971 #ifdef HAVE_TIMESTAMPS
972 /*
973 * __wt_txn_set_commit_timestamp --
974 * Publish a transaction's commit timestamp.
975 */
976 void
__wt_txn_set_commit_timestamp(WT_SESSION_IMPL * session)977 __wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session)
978 {
979 WT_TXN *qtxn, *txn, *txn_tmp;
980 WT_TXN_GLOBAL *txn_global;
981 wt_timestamp_t ts;
982 uint64_t walked;
983
984 txn = &session->txn;
985 txn_global = &S2C(session)->txn_global;
986
987 if (F_ISSET(txn, WT_TXN_PUBLIC_TS_COMMIT))
988 return;
989
990 /*
991 * Copy the current commit timestamp (which can change while the
992 * transaction is running) into the first_commit_timestamp, which is
993 * fixed.
994 */
995 __wt_timestamp_set(&ts, &txn->commit_timestamp);
996
997 __wt_writelock(session, &txn_global->commit_timestamp_rwlock);
998 /*
999 * If our transaction is on the queue remove it first. The timestamp
1000 * may move earlier so we otherwise might not remove ourselves before
1001 * finding where to insert ourselves (which would result in a list
1002 * loop) and we don't want to walk more of the list than needed.
1003 */
1004 if (txn->clear_commit_q) {
1005 TAILQ_REMOVE(&txn_global->commit_timestamph,
1006 txn, commit_timestampq);
1007 WT_PUBLISH(txn->clear_commit_q, false);
1008 --txn_global->commit_timestampq_len;
1009 }
1010 /*
1011 * Walk the list to look for where to insert our own transaction
1012 * and remove any transactions that are not active. We stop when
1013 * we get to the location where we want to insert.
1014 */
1015 if (TAILQ_EMPTY(&txn_global->commit_timestamph)) {
1016 TAILQ_INSERT_HEAD(
1017 &txn_global->commit_timestamph, txn, commit_timestampq);
1018 WT_STAT_CONN_INCR(session, txn_commit_queue_empty);
1019 } else {
1020 /* Walk from the start, removing cleared entries. */
1021 walked = 0;
1022 TAILQ_FOREACH_SAFE(qtxn, &txn_global->commit_timestamph,
1023 commit_timestampq, txn_tmp) {
1024 ++walked;
1025 /*
1026 * Stop on the first entry that we cannot clear.
1027 */
1028 if (!qtxn->clear_commit_q)
1029 break;
1030
1031 TAILQ_REMOVE(&txn_global->commit_timestamph,
1032 qtxn, commit_timestampq);
1033 WT_PUBLISH(qtxn->clear_commit_q, false);
1034 --txn_global->commit_timestampq_len;
1035 }
1036
1037 /*
1038 * Now walk backwards from the end to find the correct position
1039 * for the insert.
1040 */
1041 qtxn = TAILQ_LAST(
1042 &txn_global->commit_timestamph, __wt_txn_cts_qh);
1043 while (qtxn != NULL && __wt_timestamp_cmp(
1044 &qtxn->first_commit_timestamp, &ts) > 0) {
1045 ++walked;
1046 qtxn = TAILQ_PREV(
1047 qtxn, __wt_txn_cts_qh, commit_timestampq);
1048 }
1049 if (qtxn == NULL) {
1050 TAILQ_INSERT_HEAD(&txn_global->commit_timestamph,
1051 txn, commit_timestampq);
1052 WT_STAT_CONN_INCR(session, txn_commit_queue_head);
1053 } else
1054 TAILQ_INSERT_AFTER(&txn_global->commit_timestamph,
1055 qtxn, txn, commit_timestampq);
1056 WT_STAT_CONN_INCRV(session, txn_commit_queue_walked, walked);
1057 }
1058 __wt_timestamp_set(&txn->first_commit_timestamp, &ts);
1059 ++txn_global->commit_timestampq_len;
1060 WT_STAT_CONN_INCR(session, txn_commit_queue_inserts);
1061 txn->clear_commit_q = false;
1062 F_SET(txn, WT_TXN_HAS_TS_COMMIT | WT_TXN_PUBLIC_TS_COMMIT);
1063 __wt_writeunlock(session, &txn_global->commit_timestamp_rwlock);
1064 }
1065
1066 /*
1067 * __wt_txn_clear_commit_timestamp --
1068 * Clear a transaction's published commit timestamp.
1069 */
1070 void
__wt_txn_clear_commit_timestamp(WT_SESSION_IMPL * session)1071 __wt_txn_clear_commit_timestamp(WT_SESSION_IMPL *session)
1072 {
1073 WT_TXN *txn;
1074 uint32_t flags;
1075
1076 txn = &session->txn;
1077
1078 if (!F_ISSET(txn, WT_TXN_PUBLIC_TS_COMMIT))
1079 return;
1080 flags = txn->flags;
1081 LF_CLR(WT_TXN_PUBLIC_TS_COMMIT);
1082
1083 /*
1084 * Notify other threads that our transaction is inactive and can be
1085 * cleaned up safely from the commit timestamp queue whenever the next
1086 * thread walks the queue. We do not need to remove it now.
1087 */
1088 WT_PUBLISH(txn->clear_commit_q, true);
1089 WT_PUBLISH(txn->flags, flags);
1090 }
1091
1092 /*
1093 * __wt_txn_set_read_timestamp --
1094 * Publish a transaction's read timestamp.
1095 */
1096 void
__wt_txn_set_read_timestamp(WT_SESSION_IMPL * session)1097 __wt_txn_set_read_timestamp(WT_SESSION_IMPL *session)
1098 {
1099 WT_TXN *qtxn, *txn, *txn_tmp;
1100 WT_TXN_GLOBAL *txn_global;
1101 uint64_t walked;
1102
1103 txn = &session->txn;
1104 txn_global = &S2C(session)->txn_global;
1105
1106 if (F_ISSET(txn, WT_TXN_PUBLIC_TS_READ))
1107 return;
1108
1109 __wt_writelock(session, &txn_global->read_timestamp_rwlock);
1110 /*
1111 * If our transaction is on the queue remove it first. The timestamp
1112 * may move earlier so we otherwise might not remove ourselves before
1113 * finding where to insert ourselves (which would result in a list
1114 * loop) and we don't want to walk more of the list than needed.
1115 */
1116 if (txn->clear_read_q) {
1117 TAILQ_REMOVE(&txn_global->read_timestamph,
1118 txn, read_timestampq);
1119 WT_PUBLISH(txn->clear_read_q, false);
1120 --txn_global->read_timestampq_len;
1121 }
1122 /*
1123 * Walk the list to look for where to insert our own transaction
1124 * and remove any transactions that are not active. We stop when
1125 * we get to the location where we want to insert.
1126 */
1127 if (TAILQ_EMPTY(&txn_global->read_timestamph)) {
1128 TAILQ_INSERT_HEAD(
1129 &txn_global->read_timestamph, txn, read_timestampq);
1130 WT_STAT_CONN_INCR(session, txn_read_queue_empty);
1131 } else {
1132 /* Walk from the start, removing cleared entries. */
1133 walked = 0;
1134 TAILQ_FOREACH_SAFE(qtxn, &txn_global->read_timestamph,
1135 read_timestampq, txn_tmp) {
1136 ++walked;
1137 if (!qtxn->clear_read_q)
1138 break;
1139
1140 TAILQ_REMOVE(&txn_global->read_timestamph,
1141 qtxn, read_timestampq);
1142 WT_PUBLISH(qtxn->clear_read_q, false);
1143 --txn_global->read_timestampq_len;
1144 }
1145
1146 /*
1147 * Now walk backwards from the end to find the correct position
1148 * for the insert.
1149 */
1150 qtxn = TAILQ_LAST(
1151 &txn_global->read_timestamph, __wt_txn_rts_qh);
1152 while (qtxn != NULL &&
1153 __wt_timestamp_cmp(&qtxn->read_timestamp,
1154 &txn->read_timestamp) > 0) {
1155 ++walked;
1156 qtxn = TAILQ_PREV(
1157 qtxn, __wt_txn_rts_qh, read_timestampq);
1158 }
1159 if (qtxn == NULL) {
1160 TAILQ_INSERT_HEAD(&txn_global->read_timestamph,
1161 txn, read_timestampq);
1162 WT_STAT_CONN_INCR(session, txn_read_queue_head);
1163 } else
1164 TAILQ_INSERT_AFTER(&txn_global->read_timestamph,
1165 qtxn, txn, read_timestampq);
1166 WT_STAT_CONN_INCRV(session, txn_read_queue_walked, walked);
1167 }
1168 /*
1169 * We do not set the read timestamp here. It has been set in the caller
1170 * because special processing for round to oldest.
1171 */
1172 ++txn_global->read_timestampq_len;
1173 WT_STAT_CONN_INCR(session, txn_read_queue_inserts);
1174 txn->clear_read_q = false;
1175 F_SET(txn, WT_TXN_HAS_TS_READ | WT_TXN_PUBLIC_TS_READ);
1176 __wt_writeunlock(session, &txn_global->read_timestamp_rwlock);
1177 }
1178
1179 /*
1180 * __wt_txn_clear_read_timestamp --
1181 * Clear a transaction's published read timestamp.
1182 */
1183 void
__wt_txn_clear_read_timestamp(WT_SESSION_IMPL * session)1184 __wt_txn_clear_read_timestamp(WT_SESSION_IMPL *session)
1185 {
1186 WT_TXN *txn;
1187 uint32_t flags;
1188
1189 txn = &session->txn;
1190
1191 if (!F_ISSET(txn, WT_TXN_PUBLIC_TS_READ))
1192 return;
1193
1194 #ifdef HAVE_DIAGNOSTIC
1195 {
1196 WT_TXN_GLOBAL *txn_global;
1197 wt_timestamp_t pinned_ts;
1198
1199 txn_global = &S2C(session)->txn_global;
1200 WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
1201 __wt_timestamp_set(&pinned_ts, &txn_global->pinned_timestamp));
1202 WT_ASSERT(session,
1203 __wt_timestamp_cmp(&txn->read_timestamp, &pinned_ts) >= 0);
1204 }
1205 #endif
1206 flags = txn->flags;
1207 LF_CLR(WT_TXN_PUBLIC_TS_READ);
1208
1209 /*
1210 * Notify other threads that our transaction is inactive and can be
1211 * cleaned up safely from the read timestamp queue whenever the
1212 * next thread walks the queue. We do not need to remove it now.
1213 */
1214 WT_PUBLISH(txn->clear_read_q, true);
1215 WT_PUBLISH(txn->flags, flags);
1216 }
1217 #endif
1218
1219 /*
1220 * __wt_txn_clear_timestamp_queues --
1221 * We're about to clear the session and overwrite the txn structure.
1222 * Remove ourselves from the commit timestamp queue and the read
1223 * timestamp queue if we're on either of them.
1224 */
1225 void
__wt_txn_clear_timestamp_queues(WT_SESSION_IMPL * session)1226 __wt_txn_clear_timestamp_queues(WT_SESSION_IMPL *session)
1227 {
1228 WT_TXN *txn;
1229 WT_TXN_GLOBAL *txn_global;
1230
1231 txn = &session->txn;
1232 txn_global = &S2C(session)->txn_global;
1233
1234 if (!txn->clear_commit_q && !txn->clear_read_q)
1235 return;
1236
1237 if (txn->clear_commit_q) {
1238 __wt_writelock(session, &txn_global->commit_timestamp_rwlock);
1239 /*
1240 * Recheck after acquiring the lock.
1241 */
1242 if (txn->clear_commit_q) {
1243 TAILQ_REMOVE(&txn_global->commit_timestamph,
1244 txn, commit_timestampq);
1245 --txn_global->commit_timestampq_len;
1246 txn->clear_commit_q = false;
1247 }
1248 __wt_writeunlock(session, &txn_global->commit_timestamp_rwlock);
1249 }
1250 if (txn->clear_read_q) {
1251 __wt_writelock(session, &txn_global->read_timestamp_rwlock);
1252 /*
1253 * Recheck after acquiring the lock.
1254 */
1255 if (txn->clear_read_q) {
1256 TAILQ_REMOVE(
1257 &txn_global->read_timestamph, txn, read_timestampq);
1258 --txn_global->read_timestampq_len;
1259 txn->clear_read_q = false;
1260 }
1261 __wt_writeunlock(session, &txn_global->read_timestamp_rwlock);
1262 }
1263 }
1264