1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of PerconaFT.
6 
7 
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9 
10     PerconaFT is free software: you can redistribute it and/or modify
11     it under the terms of the GNU General Public License, version 2,
12     as published by the Free Software Foundation.
13 
14     PerconaFT is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17     GNU General Public License for more details.
18 
19     You should have received a copy of the GNU General Public License
20     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
21 
22 ----------------------------------------
23 
24     PerconaFT is free software: you can redistribute it and/or modify
25     it under the terms of the GNU Affero General Public License, version 3,
26     as published by the Free Software Foundation.
27 
28     PerconaFT is distributed in the hope that it will be useful,
29     but WITHOUT ANY WARRANTY; without even the implied warranty of
30     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
31     GNU Affero General Public License for more details.
32 
33     You should have received a copy of the GNU Affero General Public License
34     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
35 ======= */
36 
37 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38 
39 #pragma once
40 
41 #include <db.h>
42 #include <limits.h>
43 
44 #include <ft/cachetable/cachetable.h>
45 #include <ft/cursor.h>
46 #include <ft/comparator.h>
47 #include <ft/logger/logger.h>
48 #include <ft/txn/txn.h>
49 
50 #include <util/growable_array.h>
51 #include <util/minicron.h>
52 #include <util/omt.h>
53 
54 #include <locktree/locktree.h>
55 #include <locktree/range_buffer.h>
56 
57 #include <toku_list.h>
58 
59 struct __toku_db_internal {
60     int opened;
61     uint32_t open_flags;
62     int open_mode;
63     FT_HANDLE ft_handle;
64     DICTIONARY_ID dict_id;        // unique identifier used by locktree logic
65     toku::locktree *lt;
66     struct simple_dbt skey, sval; // static key and value
67     bool key_compare_was_set;     // true if a comparison function was provided before call to db->open()  (if false, use environment's comparison function).
68     char *dname;                  // dname is constant for this handle (handle must be closed before file is renamed)
69     DB_INDEXER *indexer;
70 };
71 
72 int toku_db_set_indexer(DB *db, DB_INDEXER *indexer);
73 DB_INDEXER *toku_db_get_indexer(DB *db);
74 
75 #if DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR == 1
76 typedef void (*toku_env_errcall_t)(const char *, char *);
77 #elif DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 3
78 typedef void (*toku_env_errcall_t)(const DB_ENV *, const char *, const char *);
79 #else
80 #error
81 #endif
82 
83 struct __toku_db_env_internal {
84     int is_panicked; // if nonzero, then its an error number
85     char *panic_string;
86     uint32_t open_flags;
87     int open_mode;
88     toku_env_errcall_t errcall;
89     void *errfile;
90     const char *errpfx;
91     char *dir;                  /* A malloc'd copy of the directory. */
92     char *tmp_dir;
93     char *lg_dir;
94     char *data_dir;
95     int (*bt_compare)  (DB *, const DBT *, const DBT *);
96     int (*update_function)(DB *, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra);
97     generate_row_for_put_func generate_row_for_put;
98     generate_row_for_del_func generate_row_for_del;
99 
100     unsigned long cachetable_size;
101     unsigned long client_pool_threads;
102     unsigned long cachetable_pool_threads;
103     unsigned long checkpoint_pool_threads;
104     CACHETABLE cachetable;
105     TOKULOGGER logger;
106     toku::locktree_manager ltm;
107     lock_timeout_callback lock_wait_timeout_callback;   // Called when a lock request times out waiting for a lock.
108 
109     DB *directory;                                      // Maps dnames to inames
110     DB *persistent_environment;                         // Stores environment settings, can be used for upgrade
111     toku::omt<DB *> *open_dbs_by_dname;                              // Stores open db handles, sorted first by dname and then by numerical value of pointer to the db (arbitrarily assigned memory location)
112     toku::omt<DB *> *open_dbs_by_dict_id;                            // Stores open db handles, sorted by dictionary id and then by numerical value of pointer to the db (arbitrarily assigned memory location)
113     toku_pthread_rwlock_t open_dbs_rwlock;              // rwlock that protects the OMT of open dbs.
114 
115     char *real_data_dir;                                // data dir used when the env is opened (relative to cwd, or absolute with leading /)
116     char *real_log_dir;                                 // log dir used when the env is opened  (relative to cwd, or absolute with leading /)
117     char *real_tmp_dir;                                 // tmp dir used for temporary files (relative to cwd, or absolute with leading /)
118 
119     fs_redzone_state fs_state;
120     uint64_t fs_seq;                                    // how many times has fs_poller run?
121     uint64_t last_seq_entered_red;
122     uint64_t last_seq_entered_yellow;
123     int redzone;                                        // percent of total fs space that marks boundary between yellow and red zones
124     int enospc_redzone_ctr;                             // number of operations rejected by enospc prevention  (red zone)
125     int fs_poll_time;                                   // Time in seconds between statfs calls
126     struct minicron fs_poller;                          // Poll the file systems
127     bool fs_poller_is_init;
128     uint32_t fsync_log_period_ms;
129     bool fsync_log_cron_is_init;
130     struct minicron fsync_log_cron;                     // fsync recovery log
131     int envdir_lockfd;
132     int datadir_lockfd;
133     int logdir_lockfd;
134     int tmpdir_lockfd;
135     bool check_thp;  // if set check if transparent huge pages are disabled
136     bool dir_per_db;
137     uint64_t (*get_loader_memory_size_callback)(void);
138     uint64_t default_lock_timeout_msec;
139     uint64_t (*get_lock_timeout_callback)(uint64_t default_lock_timeout_msec);
140     uint64_t default_killed_time_msec;
141     uint64_t (*get_killed_time_callback)(uint64_t default_killed_time_msec);
142     int (*killed_callback)(void);
143 };
144 
145 // test-only environment function for running lock escalation
toku_env_run_lock_escalation_for_test(DB_ENV * env)146 static inline void toku_env_run_lock_escalation_for_test(DB_ENV *env) {
147     toku::locktree_manager *mgr = &env->i->ltm;
148     mgr->run_escalation_for_test();
149 }
150 
151 // Common error handling macros and panic detection
152 #define MAYBE_RETURN_ERROR(cond, status) if (cond) return status;
153 #define HANDLE_PANICKED_ENV(env) if (toku_env_is_panicked(env)) { sleep(1); return EINVAL; }
154 #define HANDLE_PANICKED_DB(db) HANDLE_PANICKED_ENV(db->dbenv)
155 
156 // Only commit/abort/prelock (which are used by handlerton) are allowed when a child exists.
157 #define HANDLE_ILLEGAL_WORKING_PARENT_TXN(env, txn) \
158         MAYBE_RETURN_ERROR(((txn) && db_txn_struct_i(txn)->child), \
159                              toku_ydb_do_error((env),                \
160                                                EINVAL,               \
161                                                "%s: Transaction cannot do work when child exists\n", __FUNCTION__))
162 
163 #define HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn) \
164         HANDLE_ILLEGAL_WORKING_PARENT_TXN((db)->dbenv, txn)
165 
166 #define HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c)   \
167         HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN((c)->dbp, dbc_struct_i(c)->txn)
168 
169 // Bail out if we get unknown flags
170 #define HANDLE_EXTRA_FLAGS(env, flags_to_function, allowed_flags) \
171         MAYBE_RETURN_ERROR((env) && ((flags_to_function) & ~(allowed_flags)), \
172 			 toku_ydb_do_error((env),			\
173 					   EINVAL,			\
174 					   "Unknown flags (%" PRIu32 ") in " __FILE__ ":%s(): %d\n", (flags_to_function) & ~(allowed_flags), __FUNCTION__, __LINE__))
175 
176 int toku_ydb_check_avail_fs_space(DB_ENV *env);
177 
178 void toku_ydb_error_all_cases(const DB_ENV * env,
179                               int error,
180                               bool include_stderrstring,
181                               bool use_stderr_if_nothing_else,
182                               const char *fmt, va_list ap)
183     __attribute__((format (printf, 5, 0)))
184     __attribute__((__visibility__("default"))); // this is needed by the C++ interface.
185 
186 int toku_ydb_do_error (const DB_ENV *dbenv, int error, const char *string, ...)
187                        __attribute__((__format__(__printf__, 3, 4)));
188 
189 /* Environment related errors */
190 int toku_env_is_panicked(DB_ENV *dbenv);
191 void toku_env_err(const DB_ENV * env, int error, const char *fmt, ...)
192                          __attribute__((__format__(__printf__, 3, 4)));
193 
194 typedef enum __toku_isolation_level {
195     TOKU_ISO_SERIALIZABLE=0,
196     TOKU_ISO_SNAPSHOT=1,
197     TOKU_ISO_READ_COMMITTED=2,
198     TOKU_ISO_READ_UNCOMMITTED=3,
199     TOKU_ISO_READ_COMMITTED_ALWAYS=4
200 } TOKU_ISOLATION;
201 
202 // needed in ydb_db.c
203 #define DB_ISOLATION_FLAGS (DB_READ_COMMITTED | DB_READ_COMMITTED_ALWAYS | DB_READ_UNCOMMITTED | DB_TXN_SNAPSHOT | DB_SERIALIZABLE | DB_INHERIT_ISOLATION)
204 
205 struct txn_lock_range {
206     DBT left;
207     DBT right;
208 };
209 
210 struct txn_lt_key_ranges {
211     toku::locktree *lt;
212     toku::range_buffer *buffer;
213 };
214 
215 struct __toku_db_txn_internal {
216     struct tokutxn *tokutxn;
217     uint32_t flags;
218     TOKU_ISOLATION iso;
219     DB_TXN *child;
220     toku_mutex_t txn_mutex;
221 
222     // maps a locktree to a buffer of key ranges that are locked.
223     // it is protected by the txn_mutex, so hot indexing and a client
224     // thread can concurrently operate on this txn.
225     toku::omt<txn_lt_key_ranges> lt_map;
226 };
227 
228 struct __toku_db_txn_external {
229     struct __toku_db_txn           external_part;
230     struct __toku_db_txn_internal  internal_part;
231 };
232 #define db_txn_struct_i(x) (&((struct __toku_db_txn_external *)x)->internal_part)
233 
234 struct __toku_dbc_internal {
235     struct ft_cursor ftcursor;
236     DB_TXN *txn;
237     TOKU_ISOLATION iso;
238     struct simple_dbt skey_s,sval_s;
239     struct simple_dbt *skey,*sval;
240 
241     // if the rmw flag is asserted, cursor operations (like set) grab write
242     // locks instead of read locks
243     // the rmw flag is set when the cursor is created with the DB_RMW flag set
244     bool rmw;
245     bool locking_read;
246 };
247 
248 static_assert(
249     sizeof(__toku_dbc_internal) <= sizeof(((DBC *)nullptr)->_internal),
250     "__toku_dbc_internal doesn't fit in the internal portion of a DBC");
251 
dbc_struct_i(DBC * c)252 static inline __toku_dbc_internal *dbc_struct_i(DBC *c) {
253     union dbc_union {
254         __toku_dbc_internal *dbc_internal;
255         char *buf;
256     } u;
257     u.buf = c->_internal;
258     return u.dbc_internal;
259 }
260 
dbc_ftcursor(DBC * c)261 static inline struct ft_cursor *dbc_ftcursor(DBC *c) {
262     return &dbc_struct_i(c)->ftcursor;
263 }
264 
265 static inline int
env_opened(DB_ENV * env)266 env_opened(DB_ENV *env) {
267     return env->i->cachetable != 0;
268 }
269 
270 static inline bool
txn_is_read_only(DB_TXN * txn)271 txn_is_read_only(DB_TXN* txn) {
272     if (txn && (db_txn_struct_i(txn)->flags & DB_TXN_READ_ONLY)) {
273         return true;
274     }
275     return false;
276 }
277 
278 #define HANDLE_READ_ONLY_TXN(txn) if(txn_is_read_only(txn)) return EINVAL;
279 
280 void env_panic(DB_ENV * env, int cause, const char * msg);
281 void env_note_db_opened(DB_ENV *env, DB *db);
282 void env_note_db_closed(DB_ENV *env, DB *db);
283