1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of PerconaFT.
6 
7 
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9 
10     PerconaFT is free software: you can redistribute it and/or modify
11     it under the terms of the GNU General Public License, version 2,
12     as published by the Free Software Foundation.
13 
14     PerconaFT is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17     GNU General Public License for more details.
18 
19     You should have received a copy of the GNU General Public License
20     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
21 
22 ----------------------------------------
23 
24     PerconaFT is free software: you can redistribute it and/or modify
25     it under the terms of the GNU Affero General Public License, version 3,
26     as published by the Free Software Foundation.
27 
28     PerconaFT is distributed in the hope that it will be useful,
29     but WITHOUT ANY WARRANTY; without even the implied warranty of
30     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
31     GNU Affero General Public License for more details.
32 
33     You should have received a copy of the GNU Affero General Public License
34     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
35 ======= */
36 
37 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38 
39 #include <db.h>
40 
41 #include <portability/toku_race_tools.h>
42 #include <portability/toku_atomic.h>
43 
44 #include <ft/cachetable/checkpoint.h>
45 #include <ft/log_header.h>
46 #include <ft/txn/txn_manager.h>
47 
48 
49 #include "ydb-internal.h"
50 #include "ydb_txn.h"
51 #include "ydb_row_lock.h"
52 
toku_txn_id64(DB_TXN * txn)53 static uint64_t toku_txn_id64(DB_TXN * txn) {
54     HANDLE_PANICKED_ENV(txn->mgrp);
55     return toku_txn_get_root_id(db_txn_struct_i(txn)->tokutxn);
56 }
57 
toku_txn_release_locks(DB_TXN * txn)58 static void toku_txn_release_locks(DB_TXN *txn) {
59     // Prevent access to the locktree map while releasing.
60     // It is possible for lock escalation to attempt to
61     // modify this data structure while the txn commits.
62     toku_mutex_lock(&db_txn_struct_i(txn)->txn_mutex);
63 
64     size_t num_ranges = db_txn_struct_i(txn)->lt_map.size();
65     for (size_t i = 0; i < num_ranges; i++) {
66         txn_lt_key_ranges ranges;
67         int r = db_txn_struct_i(txn)->lt_map.fetch(i, &ranges);
68         invariant_zero(r);
69         toku_db_release_lt_key_ranges(txn, &ranges);
70     }
71 
72     toku_mutex_unlock(&db_txn_struct_i(txn)->txn_mutex);
73 }
74 
toku_txn_destroy(DB_TXN * txn)75 static void toku_txn_destroy(DB_TXN *txn) {
76     db_txn_struct_i(txn)->lt_map.destroy();
77     toku_txn_destroy_txn(db_txn_struct_i(txn)->tokutxn);
78     toku_mutex_destroy(&db_txn_struct_i(txn)->txn_mutex);
79     toku_free(txn);
80 }
81 
toku_txn_commit(DB_TXN * txn,uint32_t flags,TXN_PROGRESS_POLL_FUNCTION poll,void * poll_extra,bool release_mo_lock,bool low_priority)82 static int toku_txn_commit(DB_TXN * txn, uint32_t flags,
83                            TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra,
84                            bool release_mo_lock, bool low_priority) {
85     HANDLE_PANICKED_ENV(txn->mgrp);
86     //Recursively kill off children
87     if (db_txn_struct_i(txn)->child) {
88         //commit of child sets the child pointer to NULL
89         int r_child = toku_txn_commit(db_txn_struct_i(txn)->child, flags, NULL, NULL, false, false);
90         if (r_child !=0 && !toku_env_is_panicked(txn->mgrp)) {
91             env_panic(txn->mgrp, r_child, "Recursive child commit failed during parent commit.\n");
92         }
93         //In a panicked env, the child may not be removed from the list.
94         HANDLE_PANICKED_ENV(txn->mgrp);
95     }
96     assert(!db_txn_struct_i(txn)->child);
97     //Remove from parent
98     if (txn->parent) {
99         assert(db_txn_struct_i(txn->parent)->child == txn);
100         db_txn_struct_i(txn->parent)->child=NULL;
101     }
102     if (flags & DB_TXN_SYNC) {
103         toku_txn_force_fsync_on_commit(db_txn_struct_i(txn)->tokutxn);
104         flags &= ~DB_TXN_SYNC;
105     }
106     int nosync = (flags & DB_TXN_NOSYNC)!=0 || (db_txn_struct_i(txn)->flags&DB_TXN_NOSYNC);
107     flags &= ~DB_TXN_NOSYNC;
108 
109     int r;
110     if (flags!=0) {
111         // frees the tokutxn
112         r = toku_txn_abort_txn(db_txn_struct_i(txn)->tokutxn, poll, poll_extra);
113     } else {
114         // frees the tokutxn
115         r = toku_txn_commit_txn(db_txn_struct_i(txn)->tokutxn, nosync,
116                                 poll, poll_extra);
117     }
118     if (r!=0 && !toku_env_is_panicked(txn->mgrp)) {
119         env_panic(txn->mgrp, r, "Error during commit.\n");
120     }
121     //If panicked, we're done.
122     HANDLE_PANICKED_ENV(txn->mgrp);
123     assert_zero(r);
124 
125     TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn;
126     TOKULOGGER logger = txn->mgrp->i->logger;
127     LSN do_fsync_lsn;
128     bool do_fsync;
129     toku_txn_get_fsync_info(ttxn, &do_fsync, &do_fsync_lsn);
130     // remove the txn from the list of live transactions, and then
131     // release the lock tree locks. MVCC requires that toku_txn_complete_txn
132     // get called first, otherwise we have bugs, such as #4145 and #4153
133     toku_txn_complete_txn(ttxn);
134     toku_txn_release_locks(txn);
135     // this lock must be released after toku_txn_complete_txn and toku_txn_release_locks because
136     // this lock must be held until the references to the open FTs is released
137     // begin checkpoint logs these associations, so we must be protect
138     // the changing of these associations with checkpointing
139     if (release_mo_lock) {
140         if (low_priority) {
141             toku_low_priority_multi_operation_client_unlock();
142         } else {
143             toku_multi_operation_client_unlock();
144         }
145     }
146     toku_txn_maybe_fsync_log(logger, do_fsync_lsn, do_fsync);
147     if (flags!=0) {
148         r = EINVAL;
149         goto cleanup;
150     }
151 cleanup:
152     toku_txn_destroy(txn);
153     return r;
154 }
155 
toku_txn_abort(DB_TXN * txn,TXN_PROGRESS_POLL_FUNCTION poll,void * poll_extra)156 static int toku_txn_abort(DB_TXN * txn,
157                           TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra) {
158     HANDLE_PANICKED_ENV(txn->mgrp);
159     //Recursively kill off children (abort or commit are both correct, commit is cheaper)
160     if (db_txn_struct_i(txn)->child) {
161         //commit of child sets the child pointer to NULL
162         int r_child = toku_txn_commit(db_txn_struct_i(txn)->child, DB_TXN_NOSYNC, NULL, NULL, false, false);
163         if (r_child !=0 && !toku_env_is_panicked(txn->mgrp)) {
164             env_panic(txn->mgrp, r_child, "Recursive child commit failed during parent abort.\n");
165         }
166         //In a panicked env, the child may not be removed from the list.
167         HANDLE_PANICKED_ENV(txn->mgrp);
168     }
169     assert(!db_txn_struct_i(txn)->child);
170     //Remove from parent
171     if (txn->parent) {
172         assert(db_txn_struct_i(txn->parent)->child == txn);
173         db_txn_struct_i(txn->parent)->child=NULL;
174     }
175 
176     int r = toku_txn_abort_txn(db_txn_struct_i(txn)->tokutxn, poll, poll_extra);
177     if (r!=0 && !toku_env_is_panicked(txn->mgrp)) {
178         env_panic(txn->mgrp, r, "Error during abort.\n");
179     }
180     HANDLE_PANICKED_ENV(txn->mgrp);
181     assert_zero(r);
182     toku_txn_complete_txn(db_txn_struct_i(txn)->tokutxn);
183     toku_txn_release_locks(txn);
184     toku_txn_destroy(txn);
185     return r;
186 }
187 
toku_txn_xa_prepare(DB_TXN * txn,TOKU_XA_XID * xid,uint32_t flags)188 static int toku_txn_xa_prepare (DB_TXN *txn, TOKU_XA_XID *xid, uint32_t flags) {
189     int r = 0;
190     if (!txn) {
191         r = EINVAL;
192         goto exit;
193     }
194     if (txn->parent) {
195         r = 0; // make this a NO-OP, MySQL calls this
196         goto exit;
197     }
198     HANDLE_PANICKED_ENV(txn->mgrp);
199     // Take the mo lock as soon as a non-readonly txn is found
200     bool holds_mo_lock;
201     holds_mo_lock = false;
202     if (!toku_txn_is_read_only(db_txn_struct_i(txn)->tokutxn)) {
203         // A readonly transaction does no logging, and therefore does not
204         // need the MO lock.
205         toku_multi_operation_client_lock();
206         holds_mo_lock = true;
207     }
208     //Recursively commit any children.
209     if (db_txn_struct_i(txn)->child) {
210         //commit of child sets the child pointer to NULL
211 
212         // toku_txn_commit will take the mo_lock if not held and a non-readonly txn is found.
213         int r_child = toku_txn_commit(db_txn_struct_i(txn)->child, 0, NULL, NULL, false, false);
214         if (r_child !=0 && !toku_env_is_panicked(txn->mgrp)) {
215             env_panic(txn->mgrp, r_child, "Recursive child commit failed during parent commit.\n");
216         }
217         //In a panicked env, the child may not be removed from the list.
218         HANDLE_PANICKED_ENV(txn->mgrp);
219     }
220     assert(!db_txn_struct_i(txn)->child);
221     int nosync;
222     nosync = (flags & DB_TXN_NOSYNC)!=0 || (db_txn_struct_i(txn)->flags&DB_TXN_NOSYNC);
223     TOKUTXN ttxn;
224     ttxn = db_txn_struct_i(txn)->tokutxn;
225     toku_txn_prepare_txn(ttxn, xid, nosync);
226     TOKULOGGER logger;
227     logger = txn->mgrp->i->logger;
228     LSN do_fsync_lsn;
229     bool do_fsync;
230     toku_txn_get_fsync_info(ttxn, &do_fsync, &do_fsync_lsn);
231     // release the multi operation lock before fsyncing the log
232     if (holds_mo_lock) {
233         toku_multi_operation_client_unlock();
234     }
235     toku_txn_maybe_fsync_log(logger, do_fsync_lsn, do_fsync);
236 exit:
237     return r;
238 }
239 
240 // requires: must hold the multi operation lock. it is
241 //           released in toku_txn_xa_prepare before the fsync.
toku_txn_prepare(DB_TXN * txn,uint8_t gid[DB_GID_SIZE],uint32_t flags)242 static int toku_txn_prepare (DB_TXN *txn, uint8_t gid[DB_GID_SIZE], uint32_t flags) {
243     TOKU_XA_XID xid;
244     TOKU_ANNOTATE_NEW_MEMORY(&xid, sizeof(xid));
245     xid.formatID=0x756b6f54; // "Toku"
246     xid.gtrid_length=DB_GID_SIZE/2;  // The maximum allowed gtrid length is 64.  See the XA spec in source:/import/opengroup.org/C193.pdf page 20.
247     xid.bqual_length=DB_GID_SIZE/2; // The maximum allowed bqual length is 64.
248     memcpy(xid.data, gid, DB_GID_SIZE);
249     return toku_txn_xa_prepare(txn, &xid, flags);
250 }
251 
toku_txn_txn_stat(DB_TXN * txn,struct txn_stat ** txn_stat)252 static int toku_txn_txn_stat (DB_TXN *txn, struct txn_stat **txn_stat) {
253     XMALLOC(*txn_stat);
254     return toku_logger_txn_rollback_stats(db_txn_struct_i(txn)->tokutxn, *txn_stat);
255 }
256 
locked_txn_txn_stat(DB_TXN * txn,struct txn_stat ** txn_stat)257 static int locked_txn_txn_stat (DB_TXN *txn, struct txn_stat **txn_stat) {
258     int r = toku_txn_txn_stat(txn, txn_stat);
259     return r;
260 }
261 
locked_txn_commit_with_progress(DB_TXN * txn,uint32_t flags,TXN_PROGRESS_POLL_FUNCTION poll,void * poll_extra)262 static int locked_txn_commit_with_progress(DB_TXN *txn, uint32_t flags,
263                                            TXN_PROGRESS_POLL_FUNCTION poll, void* poll_extra) {
264     bool holds_mo_lock = false;
265     bool low_priority = false;
266     TOKUTXN tokutxn = db_txn_struct_i(txn)->tokutxn;
267     if (!toku_txn_is_read_only(tokutxn)) {
268         // A readonly transaction does no logging, and therefore does not need the MO lock.
269         holds_mo_lock = true;
270         if (toku_is_big_tokutxn(tokutxn)) {
271             low_priority = true;
272             toku_low_priority_multi_operation_client_lock();
273         } else {
274             toku_multi_operation_client_lock();
275         }
276     }
277     // cannot begin a checkpoint.
278     // the multi operation lock is taken the first time we
279     // see a non-readonly txn in the recursive commit.
280     // But released in the first-level toku_txn_commit (if taken),
281     // this way, we don't hold it while we fsync the log.
282     int r = toku_txn_commit(txn, flags, poll, poll_extra, holds_mo_lock, low_priority);
283     return r;
284 }
285 
locked_txn_abort_with_progress(DB_TXN * txn,TXN_PROGRESS_POLL_FUNCTION poll,void * poll_extra)286 static int locked_txn_abort_with_progress(DB_TXN *txn,
287                                           TXN_PROGRESS_POLL_FUNCTION poll, void* poll_extra) {
288     // cannot begin a checkpoint
289     // the multi operation lock is taken the first time we
290     // see a non-readonly txn in the abort (or recursive commit).
291     // But released here so we don't have to hold additional state.
292     bool holds_mo_lock = false;
293     bool low_priority = false;
294     TOKUTXN tokutxn = db_txn_struct_i(txn)->tokutxn;
295     if (!toku_txn_is_read_only(tokutxn)) {
296         // A readonly transaction does no logging, and therefore does not need the MO lock.
297         holds_mo_lock = true;
298         if (toku_is_big_tokutxn(tokutxn)) {
299             low_priority = true;
300             toku_low_priority_multi_operation_client_lock();
301         } else {
302             toku_multi_operation_client_lock();
303         }
304     }
305     int r = toku_txn_abort(txn, poll, poll_extra);
306     if (holds_mo_lock) {
307         if (low_priority) {
308             toku_low_priority_multi_operation_client_unlock();
309         } else {
310             toku_multi_operation_client_unlock();
311         }
312     }
313     return r;
314 }
315 
locked_txn_commit(DB_TXN * txn,uint32_t flags)316 int locked_txn_commit(DB_TXN *txn, uint32_t flags) {
317     int r = locked_txn_commit_with_progress(txn, flags, NULL, NULL);
318     return r;
319 }
320 
locked_txn_abort(DB_TXN * txn)321 int locked_txn_abort(DB_TXN *txn) {
322     int r = locked_txn_abort_with_progress(txn, NULL, NULL);
323     return r;
324 }
325 
locked_txn_set_client_id(DB_TXN * txn,uint64_t client_id,void * client_extra)326 static void locked_txn_set_client_id(DB_TXN *txn, uint64_t client_id, void *client_extra) {
327     toku_txn_set_client_id(db_txn_struct_i(txn)->tokutxn, client_id, client_extra);
328 }
329 
locked_txn_get_client_id(DB_TXN * txn,uint64_t * client_id,void ** client_extra)330 static void locked_txn_get_client_id(DB_TXN *txn, uint64_t *client_id, void **client_extra) {
331     toku_txn_get_client_id(db_txn_struct_i(txn)->tokutxn, client_id, client_extra);
332 }
333 
toku_txn_discard(DB_TXN * txn,uint32_t flags)334 static int toku_txn_discard(DB_TXN *txn, uint32_t flags) {
335     // check parameters
336     if (flags != 0)
337         return EINVAL;
338     TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn;
339     if (toku_txn_get_state(ttxn) != TOKUTXN_PREPARING)
340         return EINVAL;
341 
342     bool low_priority;
343     if (toku_is_big_tokutxn(ttxn)) {
344         low_priority = true;
345         toku_low_priority_multi_operation_client_lock();
346     } else {
347         low_priority = false;
348         toku_multi_operation_client_lock();
349     }
350 
351     // discard
352     toku_txn_discard_txn(ttxn);
353 
354     // complete
355     toku_txn_complete_txn(ttxn);
356 
357     // release locks
358     toku_txn_release_locks(txn);
359 
360     if (low_priority) {
361         toku_low_priority_multi_operation_client_unlock();
362     } else {
363         toku_multi_operation_client_unlock();
364     }
365 
366     // destroy
367     toku_txn_destroy(txn);
368 
369     return 0;
370 }
371 
toku_txn_is_prepared(DB_TXN * txn)372 static bool toku_txn_is_prepared(DB_TXN *txn) {
373     TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn;
374     return toku_txn_get_state(ttxn) == TOKUTXN_PREPARING;
375 }
376 
toku_txn_get_child(DB_TXN * txn)377 static DB_TXN *toku_txn_get_child(DB_TXN *txn) {
378     return db_txn_struct_i(txn)->child;
379 }
380 
toku_txn_get_start_time(DB_TXN * txn)381 static uint64_t toku_txn_get_start_time(DB_TXN *txn) {
382     TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn;
383     return toku_txn_get_start_time(ttxn);
384 }
385 
txn_func_init(DB_TXN * txn)386 static inline void txn_func_init(DB_TXN *txn) {
387 #define STXN(name) txn->name = locked_txn_ ## name
388     STXN(abort);
389     STXN(commit);
390     STXN(abort_with_progress);
391     STXN(commit_with_progress);
392     STXN(txn_stat);
393     STXN(set_client_id);
394     STXN(get_client_id);
395 #undef STXN
396 #define SUTXN(name) txn->name = toku_txn_ ## name
397     SUTXN(prepare);
398     SUTXN(xa_prepare);
399     SUTXN(discard);
400 #undef SUTXN
401     txn->id64 = toku_txn_id64;
402     txn->is_prepared = toku_txn_is_prepared;
403     txn->get_child = toku_txn_get_child;
404     txn->get_start_time = toku_txn_get_start_time;
405 }
406 
407 //
408 // Creates a transaction for the user
409 // In our system, as far as the user is concerned, the rules are as follows:
410 //  - one cannot operate on a transaction if a child exists, with the exception of commit/abort
411 //  - one cannot operate on a transaction simultaneously in two separate threads
412 //     (the reason for this is that some operations may create a child transaction
413 //     as part of the function, such as env->dbremove and env->dbrename, and if
414 //     transactions could be operated on simulatenously in different threads, the first
415 //     rule above is violated)
416 //  - if a parent transaction is committed/aborted, the child transactions are recursively
417 //     committed
418 //
toku_txn_begin(DB_ENV * env,DB_TXN * stxn,DB_TXN ** txn,uint32_t flags)419 int toku_txn_begin(DB_ENV *env, DB_TXN * stxn, DB_TXN ** txn, uint32_t flags) {
420     HANDLE_PANICKED_ENV(env);
421     HANDLE_ILLEGAL_WORKING_PARENT_TXN(env, stxn); //Cannot create child while child already exists.
422     if (!toku_logger_is_open(env->i->logger))
423         return toku_ydb_do_error(env, EINVAL, "Environment does not have logging enabled\n");
424     if (!(env->i->open_flags & DB_INIT_TXN))
425         return toku_ydb_do_error(env, EINVAL, "Environment does not have transactions enabled\n");
426 
427     uint32_t txn_flags = 0;
428     txn_flags |= DB_TXN_NOWAIT; //We do not support blocking locks. RFP remove this?
429 
430     // handle whether txn is declared as read only
431     bool parent_txn_declared_read_only =
432         stxn &&
433         (db_txn_struct_i(stxn)->flags & DB_TXN_READ_ONLY);
434     bool txn_declared_read_only = false;
435     if (flags & DB_TXN_READ_ONLY) {
436         txn_declared_read_only = true;
437         txn_flags |=  DB_TXN_READ_ONLY;
438         flags &= ~(DB_TXN_READ_ONLY);
439     }
440     if (txn_declared_read_only && stxn &&
441         !parent_txn_declared_read_only
442         )
443     {
444         return toku_ydb_do_error(
445             env,
446             EINVAL,
447             "Current transaction set as read only, but parent transaction is not\n"
448             );
449     }
450     if (parent_txn_declared_read_only)
451     {
452         // don't require child transaction to also set transaction as read only
453         // if parent has already done so
454         txn_flags |=  DB_TXN_READ_ONLY;
455         txn_declared_read_only = true;
456     }
457 
458 
459     TOKU_ISOLATION child_isolation = TOKU_ISO_SERIALIZABLE;
460     uint32_t iso_flags = flags & DB_ISOLATION_FLAGS;
461     if (!(iso_flags == 0 ||
462           iso_flags == DB_TXN_SNAPSHOT ||
463           iso_flags == DB_READ_COMMITTED ||
464           iso_flags == DB_READ_COMMITTED_ALWAYS ||
465           iso_flags == DB_READ_UNCOMMITTED ||
466           iso_flags == DB_SERIALIZABLE ||
467           iso_flags == DB_INHERIT_ISOLATION)
468        )
469     {
470         return toku_ydb_do_error(
471             env,
472             EINVAL,
473             "Invalid isolation flags set\n"
474             );
475     }
476     flags &= ~iso_flags;
477 
478     switch (iso_flags) {
479         case (DB_INHERIT_ISOLATION):
480             if (stxn) {
481                 child_isolation = db_txn_struct_i(stxn)->iso;
482             }
483             else {
484                 return toku_ydb_do_error(
485                     env,
486                     EINVAL,
487                     "Cannot set DB_INHERIT_ISOLATION when no parent exists\n"
488                     );
489             }
490             break;
491         case (DB_READ_COMMITTED):
492             child_isolation = TOKU_ISO_READ_COMMITTED;
493             break;
494         case (DB_READ_COMMITTED_ALWAYS):
495             child_isolation = TOKU_ISO_READ_COMMITTED_ALWAYS;
496             break;
497         case (DB_READ_UNCOMMITTED):
498             child_isolation = TOKU_ISO_READ_UNCOMMITTED;
499             break;
500         case (DB_TXN_SNAPSHOT):
501             child_isolation = TOKU_ISO_SNAPSHOT;
502             break;
503         case (DB_SERIALIZABLE):
504             child_isolation = TOKU_ISO_SERIALIZABLE;
505             break;
506         case (0):
507             child_isolation = stxn ? db_txn_struct_i(stxn)->iso : TOKU_ISO_SERIALIZABLE;
508             break;
509         default:
510             assert(false); // error path is above, so this should not happen
511             break;
512     }
513     if (stxn && child_isolation != db_txn_struct_i(stxn)->iso) {
514         return toku_ydb_do_error(
515             env,
516             EINVAL,
517             "Cannot set isolation level of transaction to something different \
518                 isolation level\n"
519             );
520     }
521 
522     if (flags&DB_TXN_NOWAIT) {
523         txn_flags |=  DB_TXN_NOWAIT;
524         flags     &= ~DB_TXN_NOWAIT;
525     }
526     if (flags&DB_TXN_NOSYNC) {
527         txn_flags |=  DB_TXN_NOSYNC;
528         flags     &= ~DB_TXN_NOSYNC;
529     }
530     if (flags!=0) return toku_ydb_do_error(env, EINVAL, "Invalid flags passed to DB_ENV->txn_begin\n");
531 
532     struct __toku_db_txn_external *XCALLOC(eresult); // so the internal stuff is stuck on the end.
533     DB_TXN *result = &eresult->external_part;
534 
535     result->mgrp = env;
536     txn_func_init(result);
537 
538     result->parent = stxn;
539     db_txn_struct_i(result)->flags = txn_flags;
540     db_txn_struct_i(result)->iso = child_isolation;
541     db_txn_struct_i(result)->lt_map.create_no_array();
542 
543     toku_mutex_init(*db_txn_struct_i_txn_mutex_key,
544                     &db_txn_struct_i(result)->txn_mutex,
545                     nullptr);
546 
547     TXN_SNAPSHOT_TYPE snapshot_type;
548     switch (db_txn_struct_i(result)->iso) {
549         case(TOKU_ISO_SNAPSHOT):
550         {
551             snapshot_type = TXN_SNAPSHOT_ROOT;
552             break;
553         }
554         case(TOKU_ISO_READ_COMMITTED):
555         {
556             snapshot_type = TXN_SNAPSHOT_CHILD;
557             break;
558         }
559         case(TOKU_ISO_READ_COMMITTED_ALWAYS) :
560         {
561             snapshot_type = TXN_COPIES_SNAPSHOT;
562             break;
563         }
564         default:
565         {
566             snapshot_type = TXN_SNAPSHOT_NONE;
567             break;
568         }
569     }
570     int r = toku_txn_begin_with_xid(
571         stxn ? db_txn_struct_i(stxn)->tokutxn : 0,
572         &db_txn_struct_i(result)->tokutxn,
573         env->i->logger,
574         TXNID_PAIR_NONE,
575         snapshot_type,
576         result,
577         false, // for_recovery
578         txn_declared_read_only // read_only
579         );
580     if (r != 0) {
581         toku_free(result);
582         return r;
583     }
584 
585     //Add to the list of children for the parent.
586     if (result->parent) {
587         assert(!db_txn_struct_i(result->parent)->child);
588         db_txn_struct_i(result->parent)->child = result;
589     }
590 
591     *txn = result;
592     return 0;
593 }
594 
toku_keep_prepared_txn_callback(DB_ENV * env,TOKUTXN tokutxn)595 void toku_keep_prepared_txn_callback (DB_ENV *env, TOKUTXN tokutxn) {
596     struct __toku_db_txn_external *XCALLOC(eresult);
597     DB_TXN *result = &eresult->external_part;
598     result->mgrp = env;
599     txn_func_init(result);
600 
601     result->parent = NULL;
602 
603     db_txn_struct_i(result)->tokutxn = tokutxn;
604     db_txn_struct_i(result)->lt_map.create();
605 
606     toku_txn_set_container_db_txn(tokutxn, result);
607 
608     toku_mutex_init(*db_txn_struct_i_txn_mutex_key,
609                     &db_txn_struct_i(result)->txn_mutex,
610                     nullptr);
611 }
612 
613 // Test-only function
toku_increase_last_xid(DB_ENV * env,uint64_t increment)614 void toku_increase_last_xid(DB_ENV *env, uint64_t increment) {
615     toku_txn_manager_increase_last_xid(toku_logger_get_txn_manager(env->i->logger), increment);
616 }
617 
toku_is_big_txn(DB_TXN * txn)618 bool toku_is_big_txn(DB_TXN *txn) {
619     return toku_is_big_tokutxn(db_txn_struct_i(txn)->tokutxn);
620 }
621 
toku_is_big_tokutxn(TOKUTXN tokutxn)622 bool toku_is_big_tokutxn(TOKUTXN tokutxn) {
623     return toku_txn_has_spilled_rollback(tokutxn);
624 }
625