1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of PerconaFT.
6
7
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9
10 PerconaFT is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License, version 2,
12 as published by the Free Software Foundation.
13
14 PerconaFT is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
21
22 ----------------------------------------
23
24 PerconaFT is free software: you can redistribute it and/or modify
25 it under the terms of the GNU Affero General Public License, version 3,
26 as published by the Free Software Foundation.
27
28 PerconaFT is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU Affero General Public License for more details.
32
33 You should have received a copy of the GNU Affero General Public License
34 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
35 ======= */
36
37 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38
39 #pragma once
40
41 #include <db.h>
42 #include <limits.h>
43
44 #include <ft/cachetable/cachetable.h>
45 #include <ft/cursor.h>
46 #include <ft/comparator.h>
47 #include <ft/logger/logger.h>
48 #include <ft/txn/txn.h>
49
50 #include <util/growable_array.h>
51 #include <util/minicron.h>
52 #include <util/omt.h>
53
54 #include <locktree/locktree.h>
55 #include <locktree/range_buffer.h>
56
57 #include <toku_list.h>
58
59 struct __toku_db_internal {
60 int opened;
61 uint32_t open_flags;
62 int open_mode;
63 FT_HANDLE ft_handle;
64 DICTIONARY_ID dict_id; // unique identifier used by locktree logic
65 toku::locktree *lt;
66 struct simple_dbt skey, sval; // static key and value
67 bool key_compare_was_set; // true if a comparison function was provided before call to db->open() (if false, use environment's comparison function).
68 char *dname; // dname is constant for this handle (handle must be closed before file is renamed)
69 DB_INDEXER *indexer;
70 };
71
72 int toku_db_set_indexer(DB *db, DB_INDEXER *indexer);
73 DB_INDEXER *toku_db_get_indexer(DB *db);
74
75 #if DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR == 1
76 typedef void (*toku_env_errcall_t)(const char *, char *);
77 #elif DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 3
78 typedef void (*toku_env_errcall_t)(const DB_ENV *, const char *, const char *);
79 #else
80 #error
81 #endif
82
83 struct __toku_db_env_internal {
84 int is_panicked; // if nonzero, then its an error number
85 char *panic_string;
86 uint32_t open_flags;
87 int open_mode;
88 toku_env_errcall_t errcall;
89 void *errfile;
90 const char *errpfx;
91 char *dir; /* A malloc'd copy of the directory. */
92 char *tmp_dir;
93 char *lg_dir;
94 char *data_dir;
95 int (*bt_compare) (DB *, const DBT *, const DBT *);
96 int (*update_function)(DB *, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra);
97 generate_row_for_put_func generate_row_for_put;
98 generate_row_for_del_func generate_row_for_del;
99
100 unsigned long cachetable_size;
101 unsigned long client_pool_threads;
102 unsigned long cachetable_pool_threads;
103 unsigned long checkpoint_pool_threads;
104 CACHETABLE cachetable;
105 TOKULOGGER logger;
106 toku::locktree_manager ltm;
107 lock_timeout_callback lock_wait_timeout_callback; // Called when a lock request times out waiting for a lock.
108
109 DB *directory; // Maps dnames to inames
110 DB *persistent_environment; // Stores environment settings, can be used for upgrade
111 toku::omt<DB *> *open_dbs_by_dname; // Stores open db handles, sorted first by dname and then by numerical value of pointer to the db (arbitrarily assigned memory location)
112 toku::omt<DB *> *open_dbs_by_dict_id; // Stores open db handles, sorted by dictionary id and then by numerical value of pointer to the db (arbitrarily assigned memory location)
113 toku_pthread_rwlock_t open_dbs_rwlock; // rwlock that protects the OMT of open dbs.
114
115 char *real_data_dir; // data dir used when the env is opened (relative to cwd, or absolute with leading /)
116 char *real_log_dir; // log dir used when the env is opened (relative to cwd, or absolute with leading /)
117 char *real_tmp_dir; // tmp dir used for temporary files (relative to cwd, or absolute with leading /)
118
119 fs_redzone_state fs_state;
120 uint64_t fs_seq; // how many times has fs_poller run?
121 uint64_t last_seq_entered_red;
122 uint64_t last_seq_entered_yellow;
123 int redzone; // percent of total fs space that marks boundary between yellow and red zones
124 int enospc_redzone_ctr; // number of operations rejected by enospc prevention (red zone)
125 int fs_poll_time; // Time in seconds between statfs calls
126 struct minicron fs_poller; // Poll the file systems
127 bool fs_poller_is_init;
128 uint32_t fsync_log_period_ms;
129 bool fsync_log_cron_is_init;
130 struct minicron fsync_log_cron; // fsync recovery log
131 int envdir_lockfd;
132 int datadir_lockfd;
133 int logdir_lockfd;
134 int tmpdir_lockfd;
135 bool check_thp; // if set check if transparent huge pages are disabled
136 bool dir_per_db;
137 uint64_t (*get_loader_memory_size_callback)(void);
138 uint64_t default_lock_timeout_msec;
139 uint64_t (*get_lock_timeout_callback)(uint64_t default_lock_timeout_msec);
140 uint64_t default_killed_time_msec;
141 uint64_t (*get_killed_time_callback)(uint64_t default_killed_time_msec);
142 int (*killed_callback)(void);
143 };
144
145 // test-only environment function for running lock escalation
toku_env_run_lock_escalation_for_test(DB_ENV * env)146 static inline void toku_env_run_lock_escalation_for_test(DB_ENV *env) {
147 toku::locktree_manager *mgr = &env->i->ltm;
148 mgr->run_escalation_for_test();
149 }
150
151 // Common error handling macros and panic detection
152 #define MAYBE_RETURN_ERROR(cond, status) if (cond) return status;
153 #define HANDLE_PANICKED_ENV(env) if (toku_env_is_panicked(env)) { sleep(1); return EINVAL; }
154 #define HANDLE_PANICKED_DB(db) HANDLE_PANICKED_ENV(db->dbenv)
155
156 // Only commit/abort/prelock (which are used by handlerton) are allowed when a child exists.
157 #define HANDLE_ILLEGAL_WORKING_PARENT_TXN(env, txn) \
158 MAYBE_RETURN_ERROR(((txn) && db_txn_struct_i(txn)->child), \
159 toku_ydb_do_error((env), \
160 EINVAL, \
161 "%s: Transaction cannot do work when child exists\n", __FUNCTION__))
162
163 #define HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn) \
164 HANDLE_ILLEGAL_WORKING_PARENT_TXN((db)->dbenv, txn)
165
166 #define HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c) \
167 HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN((c)->dbp, dbc_struct_i(c)->txn)
168
169 // Bail out if we get unknown flags
170 #define HANDLE_EXTRA_FLAGS(env, flags_to_function, allowed_flags) \
171 MAYBE_RETURN_ERROR((env) && ((flags_to_function) & ~(allowed_flags)), \
172 toku_ydb_do_error((env), \
173 EINVAL, \
174 "Unknown flags (%" PRIu32 ") in " __FILE__ ":%s(): %d\n", (flags_to_function) & ~(allowed_flags), __FUNCTION__, __LINE__))
175
176 int toku_ydb_check_avail_fs_space(DB_ENV *env);
177
178 void toku_ydb_error_all_cases(const DB_ENV * env,
179 int error,
180 bool include_stderrstring,
181 bool use_stderr_if_nothing_else,
182 const char *fmt, va_list ap)
183 __attribute__((format (printf, 5, 0)))
184 __attribute__((__visibility__("default"))); // this is needed by the C++ interface.
185
186 int toku_ydb_do_error (const DB_ENV *dbenv, int error, const char *string, ...)
187 __attribute__((__format__(__printf__, 3, 4)));
188
189 /* Environment related errors */
190 int toku_env_is_panicked(DB_ENV *dbenv);
191 void toku_env_err(const DB_ENV * env, int error, const char *fmt, ...)
192 __attribute__((__format__(__printf__, 3, 4)));
193
194 typedef enum __toku_isolation_level {
195 TOKU_ISO_SERIALIZABLE=0,
196 TOKU_ISO_SNAPSHOT=1,
197 TOKU_ISO_READ_COMMITTED=2,
198 TOKU_ISO_READ_UNCOMMITTED=3,
199 TOKU_ISO_READ_COMMITTED_ALWAYS=4
200 } TOKU_ISOLATION;
201
202 // needed in ydb_db.c
203 #define DB_ISOLATION_FLAGS (DB_READ_COMMITTED | DB_READ_COMMITTED_ALWAYS | DB_READ_UNCOMMITTED | DB_TXN_SNAPSHOT | DB_SERIALIZABLE | DB_INHERIT_ISOLATION)
204
205 struct txn_lock_range {
206 DBT left;
207 DBT right;
208 };
209
210 struct txn_lt_key_ranges {
211 toku::locktree *lt;
212 toku::range_buffer *buffer;
213 };
214
215 struct __toku_db_txn_internal {
216 struct tokutxn *tokutxn;
217 uint32_t flags;
218 TOKU_ISOLATION iso;
219 DB_TXN *child;
220 toku_mutex_t txn_mutex;
221
222 // maps a locktree to a buffer of key ranges that are locked.
223 // it is protected by the txn_mutex, so hot indexing and a client
224 // thread can concurrently operate on this txn.
225 toku::omt<txn_lt_key_ranges> lt_map;
226 };
227
228 struct __toku_db_txn_external {
229 struct __toku_db_txn external_part;
230 struct __toku_db_txn_internal internal_part;
231 };
232 #define db_txn_struct_i(x) (&((struct __toku_db_txn_external *)x)->internal_part)
233
234 struct __toku_dbc_internal {
235 struct ft_cursor ftcursor;
236 DB_TXN *txn;
237 TOKU_ISOLATION iso;
238 struct simple_dbt skey_s,sval_s;
239 struct simple_dbt *skey,*sval;
240
241 // if the rmw flag is asserted, cursor operations (like set) grab write
242 // locks instead of read locks
243 // the rmw flag is set when the cursor is created with the DB_RMW flag set
244 bool rmw;
245 bool locking_read;
246 };
247
248 static_assert(
249 sizeof(__toku_dbc_internal) <= sizeof(((DBC *)nullptr)->_internal),
250 "__toku_dbc_internal doesn't fit in the internal portion of a DBC");
251
dbc_struct_i(DBC * c)252 static inline __toku_dbc_internal *dbc_struct_i(DBC *c) {
253 union dbc_union {
254 __toku_dbc_internal *dbc_internal;
255 char *buf;
256 } u;
257 u.buf = c->_internal;
258 return u.dbc_internal;
259 }
260
dbc_ftcursor(DBC * c)261 static inline struct ft_cursor *dbc_ftcursor(DBC *c) {
262 return &dbc_struct_i(c)->ftcursor;
263 }
264
265 static inline int
env_opened(DB_ENV * env)266 env_opened(DB_ENV *env) {
267 return env->i->cachetable != 0;
268 }
269
270 static inline bool
txn_is_read_only(DB_TXN * txn)271 txn_is_read_only(DB_TXN* txn) {
272 if (txn && (db_txn_struct_i(txn)->flags & DB_TXN_READ_ONLY)) {
273 return true;
274 }
275 return false;
276 }
277
278 #define HANDLE_READ_ONLY_TXN(txn) if(txn_is_read_only(txn)) return EINVAL;
279
280 void env_panic(DB_ENV * env, int cause, const char * msg);
281 void env_note_db_opened(DB_ENV *env, DB *db);
282 void env_note_db_closed(DB_ENV *env, DB *db);
283