1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of PerconaFT.
6 
7 
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9 
10     PerconaFT is free software: you can redistribute it and/or modify
11     it under the terms of the GNU General Public License, version 2,
12     as published by the Free Software Foundation.
13 
14     PerconaFT is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17     GNU General Public License for more details.
18 
19     You should have received a copy of the GNU General Public License
20     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
21 
22 ----------------------------------------
23 
24     PerconaFT is free software: you can redistribute it and/or modify
25     it under the terms of the GNU Affero General Public License, version 3,
26     as published by the Free Software Foundation.
27 
28     PerconaFT is distributed in the hope that it will be useful,
29     but WITHOUT ANY WARRANTY; without even the implied warranty of
30     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
31     GNU Affero General Public License for more details.
32 
33     You should have received a copy of the GNU Affero General Public License
34     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
35 ======= */
36 
37 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38 
39 #pragma once
40 
41 #include <db.h>
42 #include <limits.h>
43 
44 #include <ft/cachetable/cachetable.h>
45 #include <ft/cursor.h>
46 #include <ft/comparator.h>
47 #include <ft/logger/logger.h>
48 #include <ft/txn/txn.h>
49 
50 #include <util/growable_array.h>
51 #include <util/minicron.h>
52 #include <util/omt.h>
53 
54 #include <locktree/locktree.h>
55 #include <locktree/range_buffer.h>
56 
57 #include <toku_list.h>
58 
59 struct __toku_db_internal {
60     int opened;
61     uint32_t open_flags;
62     int open_mode;
63     FT_HANDLE ft_handle;
64     DICTIONARY_ID dict_id;        // unique identifier used by locktree logic
65     toku::locktree *lt;
66     struct simple_dbt skey, sval; // static key and value
67     bool key_compare_was_set;     // true if a comparison function was provided before call to db->open()  (if false, use environment's comparison function).
68     char *dname;                  // dname is constant for this handle (handle must be closed before file is renamed)
69     DB_INDEXER *indexer;
70 };
71 
72 int toku_db_set_indexer(DB *db, DB_INDEXER *indexer);
73 DB_INDEXER *toku_db_get_indexer(DB *db);
74 
75 #if DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR == 1
76 typedef void (*toku_env_errcall_t)(const char *, char *);
77 #elif DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 3
78 typedef void (*toku_env_errcall_t)(const DB_ENV *, const char *, const char *);
79 #else
80 #error
81 #endif
82 
83 struct __toku_db_env_internal {
84     int is_panicked; // if nonzero, then its an error number
85     char *panic_string;
86     uint32_t open_flags;
87     int open_mode;
88     toku_env_errcall_t errcall;
89     void *errfile;
90     const char *errpfx;
91     char *dir;                  /* A malloc'd copy of the directory. */
92     char *tmp_dir;
93     char *lg_dir;
94     char *data_dir;
95     int (*bt_compare)  (DB *, const DBT *, const DBT *);
96     int (*update_function)(DB *, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra);
97     generate_row_for_put_func generate_row_for_put;
98     generate_row_for_del_func generate_row_for_del;
99 
100     unsigned long cachetable_size;
101     unsigned long client_pool_threads;
102     unsigned long cachetable_pool_threads;
103     unsigned long checkpoint_pool_threads;
104     CACHETABLE cachetable;
105     TOKULOGGER logger;
106     toku::locktree_manager ltm;
107     lock_timeout_callback lock_wait_timeout_callback;   // Called when a lock request times out waiting for a lock.
108     lock_wait_callback lock_wait_needed_callback;       // Called when a lock request requires a wait.
109 
110     DB *directory;                                      // Maps dnames to inames
111     DB *persistent_environment;                         // Stores environment settings, can be used for upgrade
112     toku::omt<DB *> *open_dbs_by_dname;                              // Stores open db handles, sorted first by dname and then by numerical value of pointer to the db (arbitrarily assigned memory location)
113     toku::omt<DB *> *open_dbs_by_dict_id;                            // Stores open db handles, sorted by dictionary id and then by numerical value of pointer to the db (arbitrarily assigned memory location)
114     toku_pthread_rwlock_t open_dbs_rwlock;              // rwlock that protects the OMT of open dbs.
115 
116     char *real_data_dir;                                // data dir used when the env is opened (relative to cwd, or absolute with leading /)
117     char *real_log_dir;                                 // log dir used when the env is opened  (relative to cwd, or absolute with leading /)
118     char *real_tmp_dir;                                 // tmp dir used for temporary files (relative to cwd, or absolute with leading /)
119 
120     fs_redzone_state fs_state;
121     uint64_t fs_seq;                                    // how many times has fs_poller run?
122     uint64_t last_seq_entered_red;
123     uint64_t last_seq_entered_yellow;
124     int redzone;                                        // percent of total fs space that marks boundary between yellow and red zones
125     int enospc_redzone_ctr;                             // number of operations rejected by enospc prevention  (red zone)
126     int fs_poll_time;                                   // Time in seconds between statfs calls
127     struct minicron fs_poller;                          // Poll the file systems
128     bool fs_poller_is_init;
129     uint32_t fsync_log_period_ms;
130     bool fsync_log_cron_is_init;
131     struct minicron fsync_log_cron;                     // fsync recovery log
132     int envdir_lockfd;
133     int datadir_lockfd;
134     int logdir_lockfd;
135     int tmpdir_lockfd;
136     bool check_thp;  // if set check if transparent huge pages are disabled
137     bool dir_per_db;
138     uint64_t (*get_loader_memory_size_callback)(void);
139     uint64_t default_lock_timeout_msec;
140     uint64_t (*get_lock_timeout_callback)(uint64_t default_lock_timeout_msec);
141     uint64_t default_killed_time_msec;
142     uint64_t (*get_killed_time_callback)(uint64_t default_killed_time_msec);
143     int (*killed_callback)(void);
144 };
145 
146 // test-only environment function for running lock escalation
toku_env_run_lock_escalation_for_test(DB_ENV * env)147 static inline void toku_env_run_lock_escalation_for_test(DB_ENV *env) {
148     toku::locktree_manager *mgr = &env->i->ltm;
149     mgr->run_escalation_for_test();
150 }
151 
152 // Common error handling macros and panic detection
153 #define MAYBE_RETURN_ERROR(cond, status) if (cond) return status;
154 #define HANDLE_PANICKED_ENV(env) if (toku_env_is_panicked(env)) { sleep(1); return EINVAL; }
155 #define HANDLE_PANICKED_DB(db) HANDLE_PANICKED_ENV(db->dbenv)
156 
157 // Only commit/abort/prelock (which are used by handlerton) are allowed when a child exists.
158 #define HANDLE_ILLEGAL_WORKING_PARENT_TXN(env, txn) \
159         MAYBE_RETURN_ERROR(((txn) && db_txn_struct_i(txn)->child), \
160                              toku_ydb_do_error((env),                \
161                                                EINVAL,               \
162                                                "%s: Transaction cannot do work when child exists\n", __FUNCTION__))
163 
164 #define HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn) \
165         HANDLE_ILLEGAL_WORKING_PARENT_TXN((db)->dbenv, txn)
166 
167 #define HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c)   \
168         HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN((c)->dbp, dbc_struct_i(c)->txn)
169 
170 // Bail out if we get unknown flags
171 #define HANDLE_EXTRA_FLAGS(env, flags_to_function, allowed_flags) \
172         MAYBE_RETURN_ERROR((env) && ((flags_to_function) & ~(allowed_flags)), \
173 			 toku_ydb_do_error((env),			\
174 					   EINVAL,			\
175 					   "Unknown flags (%" PRIu32 ") in " __FILE__ ":%s(): %d\n", (flags_to_function) & ~(allowed_flags), __FUNCTION__, __LINE__))
176 
177 int toku_ydb_check_avail_fs_space(DB_ENV *env);
178 
179 void toku_ydb_error_all_cases(const DB_ENV * env,
180                               int error,
181                               bool include_stderrstring,
182                               bool use_stderr_if_nothing_else,
183                               const char *fmt, va_list ap)
184     __attribute__((format (printf, 5, 0)))
185     __attribute__((__visibility__("default"))); // this is needed by the C++ interface.
186 
187 int toku_ydb_do_error (const DB_ENV *dbenv, int error, const char *string, ...)
188                        __attribute__((__format__(__printf__, 3, 4)));
189 
190 /* Environment related errors */
191 int toku_env_is_panicked(DB_ENV *dbenv);
192 void toku_env_err(const DB_ENV * env, int error, const char *fmt, ...)
193                          __attribute__((__format__(__printf__, 3, 4)));
194 
195 typedef enum __toku_isolation_level {
196     TOKU_ISO_SERIALIZABLE=0,
197     TOKU_ISO_SNAPSHOT=1,
198     TOKU_ISO_READ_COMMITTED=2,
199     TOKU_ISO_READ_UNCOMMITTED=3,
200     TOKU_ISO_READ_COMMITTED_ALWAYS=4
201 } TOKU_ISOLATION;
202 
203 // needed in ydb_db.c
204 #define DB_ISOLATION_FLAGS (DB_READ_COMMITTED | DB_READ_COMMITTED_ALWAYS | DB_READ_UNCOMMITTED | DB_TXN_SNAPSHOT | DB_SERIALIZABLE | DB_INHERIT_ISOLATION)
205 
206 struct txn_lock_range {
207     DBT left;
208     DBT right;
209 };
210 
211 struct txn_lt_key_ranges {
212     toku::locktree *lt;
213     toku::range_buffer *buffer;
214 };
215 
216 struct __toku_db_txn_internal {
217     struct tokutxn *tokutxn;
218     uint32_t flags;
219     TOKU_ISOLATION iso;
220     DB_TXN *child;
221     toku_mutex_t txn_mutex;
222 
223     // maps a locktree to a buffer of key ranges that are locked.
224     // it is protected by the txn_mutex, so hot indexing and a client
225     // thread can concurrently operate on this txn.
226     toku::omt<txn_lt_key_ranges> lt_map;
227 };
228 
229 struct __toku_db_txn_external {
230     struct __toku_db_txn           external_part;
231     struct __toku_db_txn_internal  internal_part;
232 };
233 #define db_txn_struct_i(x) (&((struct __toku_db_txn_external *)x)->internal_part)
234 
235 struct __toku_dbc_internal {
236     struct ft_cursor ftcursor;
237     DB_TXN *txn;
238     TOKU_ISOLATION iso;
239     struct simple_dbt skey_s,sval_s;
240     struct simple_dbt *skey,*sval;
241 
242     // if the rmw flag is asserted, cursor operations (like set) grab write
243     // locks instead of read locks
244     // the rmw flag is set when the cursor is created with the DB_RMW flag set
245     bool rmw;
246     bool locking_read;
247 };
248 
249 static_assert(
250     sizeof(__toku_dbc_internal) <= sizeof(((DBC *)nullptr)->_internal),
251     "__toku_dbc_internal doesn't fit in the internal portion of a DBC");
252 
dbc_struct_i(DBC * c)253 static inline __toku_dbc_internal *dbc_struct_i(DBC *c) {
254     union dbc_union {
255         __toku_dbc_internal *dbc_internal;
256         char *buf;
257     } u;
258     u.buf = c->_internal;
259     return u.dbc_internal;
260 }
261 
dbc_ftcursor(DBC * c)262 static inline struct ft_cursor *dbc_ftcursor(DBC *c) {
263     return &dbc_struct_i(c)->ftcursor;
264 }
265 
266 static inline int
env_opened(DB_ENV * env)267 env_opened(DB_ENV *env) {
268     return env->i->cachetable != 0;
269 }
270 
271 static inline bool
txn_is_read_only(DB_TXN * txn)272 txn_is_read_only(DB_TXN* txn) {
273     if (txn && (db_txn_struct_i(txn)->flags & DB_TXN_READ_ONLY)) {
274         return true;
275     }
276     return false;
277 }
278 
279 #define HANDLE_READ_ONLY_TXN(txn) if(txn_is_read_only(txn)) return EINVAL;
280 
281 void env_panic(DB_ENV * env, int cause, const char * msg);
282 void env_note_db_opened(DB_ENV *env, DB *db);
283 void env_note_db_closed(DB_ENV *env, DB *db);
284