1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of PerconaFT.
6 
7 
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9 
10     PerconaFT is free software: you can redistribute it and/or modify
11     it under the terms of the GNU General Public License, version 2,
12     as published by the Free Software Foundation.
13 
14     PerconaFT is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17     GNU General Public License for more details.
18 
19     You should have received a copy of the GNU General Public License
20     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
21 
22 ----------------------------------------
23 
24     PerconaFT is free software: you can redistribute it and/or modify
25     it under the terms of the GNU Affero General Public License, version 3,
26     as published by the Free Software Foundation.
27 
28     PerconaFT is distributed in the hope that it will be useful,
29     but WITHOUT ANY WARRANTY; without even the implied warranty of
30     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
31     GNU Affero General Public License for more details.
32 
33     You should have received a copy of the GNU Affero General Public License
new(stream: St, n: usize) -> Self34     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
35 ======= */
36 
37 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38 
39 #pragma once
40 
41 // This must be first to make the 64-bit file mode work right in Linux
42 #define _FILE_OFFSET_BITS 64
43 
44 #include <db.h>
45 
46 #include "ft/cachetable/cachetable.h"
47 #include "ft/comparator.h"
48 #include "ft/msg.h"
49 #include "util/dbt.h"
50 
51 #define OS_PATH_SEPARATOR '/'
52 
53 typedef struct ft_handle *FT_HANDLE;
54 
55 int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *, int nodesize, int basementnodesize, enum toku_compression_method compression_method, CACHETABLE, TOKUTXN, int(*)(DB *,const DBT*,const DBT*)) __attribute__ ((warn_unused_result));
56 
57 // effect: changes the descriptor for the ft of the given handle.
58 // requires:
59 // - cannot change descriptor for same ft in two threads in parallel.
60 // - can only update cmp descriptor immidiately after opening the FIRST ft handle for this ft and before
61 //   ANY operations. to update the cmp descriptor after any operations have already happened, all handles
get_pin_mut(self: Pin<&mut Self>) -> Pin<&mut St>62 //   and transactions must close and reopen before the change, then you can update the cmp descriptor
63 void toku_ft_change_descriptor(FT_HANDLE t, const DBT* old_descriptor, const DBT* new_descriptor, bool do_log, TOKUTXN txn, bool update_cmp_descriptor);
64 uint32_t toku_serialize_descriptor_size(DESCRIPTOR desc);
65 
66 void toku_ft_handle_create(FT_HANDLE *ft);
67 void toku_ft_set_flags(FT_HANDLE, unsigned int flags);
68 void toku_ft_get_flags(FT_HANDLE, unsigned int *flags);
69 void toku_ft_handle_set_nodesize(FT_HANDLE, unsigned int nodesize);
into_inner(self) -> St70 void toku_ft_handle_get_nodesize(FT_HANDLE, unsigned int *nodesize);
71 void toku_ft_get_maximum_advised_key_value_lengths(unsigned int *klimit, unsigned int *vlimit);
72 void toku_ft_handle_set_basementnodesize(FT_HANDLE, unsigned int basementnodesize);
73 void toku_ft_handle_get_basementnodesize(FT_HANDLE, unsigned int *basementnodesize);
74 void toku_ft_handle_set_compression_method(FT_HANDLE, enum toku_compression_method);
75 void toku_ft_handle_get_compression_method(FT_HANDLE, enum toku_compression_method *);
76 void toku_ft_handle_set_fanout(FT_HANDLE, unsigned int fanout);
77 void toku_ft_handle_get_fanout(FT_HANDLE, unsigned int *fanout);
78 int toku_ft_handle_set_memcmp_magic(FT_HANDLE, uint8_t magic);
79 
80 void toku_ft_set_bt_compare(FT_HANDLE ft_handle, ft_compare_func cmp_func);
poll_next( mut self: Pin<&mut Self>, cx: &mut Context<'_>, ) -> Poll<Option<Self::Item>>81 const toku::comparator &toku_ft_get_comparator(FT_HANDLE ft_handle);
82 
83 typedef void (*on_redirect_callback)(FT_HANDLE ft_handle, void *extra);
84 void toku_ft_set_redirect_callback(FT_HANDLE ft_handle, on_redirect_callback cb, void *extra);
85 
86 // How updates (update/insert/deletes) work:
87 // There are two flavers of upsertdels:  Singleton and broadcast.
88 // When a singleton upsertdel message arrives it contains a key and an extra DBT.
89 //
90 // At the YDB layer, the function looks like
91 //
92 // int (*update_function)(DB*, DB_TXN*, const DBT *key, const DBT *old_val, const DBT *extra,
93 //                        void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra);
94 //
95 // And there are two DB functions
96 //
97 // int DB->update(DB *, DB_TXN *, const DBT *key, const DBT *extra);
98 // Effect:
99 //    If there is a key-value pair visible to the txn with value old_val then the system calls
100 //      update_function(DB, key, old_val, extra, set_val, set_extra)
101 //    where set_val and set_extra are a function and a void* provided by the system.
102 //    The update_function can do one of two things:
103 //      a) call set_val(new_val, set_extra)
104 //         which has the effect of doing DB->put(db, txn, key, new_val, 0)
105 //         overwriting the old value.
106 //      b) Return DB_DELETE (a new return code)
107 //      c) Return 0 (success) without calling set_val, which leaves the old value unchanged.
108 //    If there is no such key-value pair visible to the txn, then the system calls
109 //       update_function(DB, key, NULL, extra, set_val, set_extra)
110 //    and the update_function can do one of the same three things.
111 // Implementation notes: Update acquires a write lock (just as DB->put
112 //    does).   This function works by sending a UPDATE message containing
113 //    the key and extra.
114 //
115 // int DB->update_broadcast(DB *, DB_TXN*, const DBT *extra);
116 // Effect: This has the same effect as building a cursor that walks
117 //  through the DB, calling DB->update() on every key that the cursor
118 //  finds.
119 // Implementation note: Acquires a write lock on the entire database.
120 //  This function works by sending an BROADCAST-UPDATE message containing
121 //   the key and the extra.
122 typedef int (*ft_update_func)(DB *db, const DBT *key, const DBT *old_val, const DBT *extra,
123                               void (*set_val)(const DBT *new_val, void *set_extra),
124                               void *set_extra);
125 void toku_ft_set_update(FT_HANDLE ft_h, ft_update_func update_fun);
126 
127 int toku_ft_handle_open(FT_HANDLE, const char *fname_in_env,
128 		  int is_create, int only_create, CACHETABLE ct, TOKUTXN txn, bool open_rw=true)  __attribute__ ((warn_unused_result));
129 int toku_ft_handle_open_recovery(FT_HANDLE, const char *fname_in_env, int is_create, int only_create, CACHETABLE ct, TOKUTXN txn,
130 			   FILENUM use_filenum, LSN max_acceptable_lsn)  __attribute__ ((warn_unused_result));
131 
132 // clone an ft handle. the cloned handle has a new dict_id but refers to the same fractal tree
133 int toku_ft_handle_clone(FT_HANDLE *cloned_ft_handle, FT_HANDLE ft_handle, TOKUTXN txn, bool open_rw=true);
134 
135 // close an ft handle during normal operation. the underlying ft may or may not close,
136 // depending if there are still references. an lsn for this close will come from the logger.
137 void toku_ft_handle_close(FT_HANDLE ft_handle);
138 // close an ft handle during recovery. the underlying ft must close, and will use the given lsn.
139 void toku_ft_handle_close_recovery(FT_HANDLE ft_handle, LSN oplsn);
140 
141 // At the ydb layer, a DICTIONARY_ID uniquely identifies an open dictionary.
142 // With the introduction of the loader (ticket 2216), it is possible for the file that holds
143 // an open dictionary to change, so these are now separate and independent unique identifiers (see FILENUM)
144 struct DICTIONARY_ID {
145     uint64_t dictid;
146 };
147 static const DICTIONARY_ID DICTIONARY_ID_NONE = { .dictid = 0 };
148 
149 int
150 toku_ft_handle_open_with_dict_id(
151     FT_HANDLE ft_h,
152     const char *fname_in_env,
153     int is_create,
154     int only_create,
155     CACHETABLE cachetable,
156     TOKUTXN txn,
157     DICTIONARY_ID use_dictionary_id
158     )  __attribute__ ((warn_unused_result));
159 
160 // Effect: Insert a key and data pair into an ft
161 void toku_ft_insert (FT_HANDLE ft_h, DBT *k, DBT *v, TOKUTXN txn);
162 
163 // Returns: 0 if the key was inserted, DB_KEYEXIST if the key already exists
164 int toku_ft_insert_unique(FT_HANDLE ft, DBT *k, DBT *v, TOKUTXN txn, bool do_logging);
165 
166 // Effect: Optimize the ft
167 void toku_ft_optimize (FT_HANDLE ft_h);
168 
169 // Effect: Insert a key and data pair into an ft if the oplsn is newer than the ft's lsn.  This function is called during recovery.
170 void toku_ft_maybe_insert (FT_HANDLE ft_h, DBT *k, DBT *v, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging, enum ft_msg_type type);
171 
172 // Effect: Send an update message into an ft.  This function is called
173 // during recovery.
174 void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_function_extra, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging);
175 
176 // Effect: Send a broadcasting update message into an ft.  This function
177 // is called during recovery.
178 void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_extra, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging, bool is_resetting_op);
179 
180 void toku_ft_load_recovery(TOKUTXN txn, FILENUM old_filenum, char const * new_iname, int do_fsync, int do_log, LSN *load_lsn);
181 void toku_ft_load(FT_HANDLE ft_h, TOKUTXN txn, char const * new_iname, int do_fsync, LSN *get_lsn);
182 void toku_ft_hot_index_recovery(TOKUTXN txn, FILENUMS filenums, int do_fsync, int do_log, LSN *hot_index_lsn);
183 void toku_ft_hot_index(FT_HANDLE ft_h, TOKUTXN txn, FILENUMS filenums, int do_fsync, LSN *lsn);
184 
185 void toku_ft_log_put_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *fts, uint32_t num_fts, const DBT *key, const DBT *val);
186 void toku_ft_log_put (TOKUTXN txn, FT_HANDLE ft_h, const DBT *key, const DBT *val);
187 void toku_ft_log_del_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *fts, uint32_t num_fts, const DBT *key, const DBT *val);
188 void toku_ft_log_del (TOKUTXN txn, FT_HANDLE ft_h, const DBT *key);
189 
190 // Effect: Delete a key from an ft
191 void toku_ft_delete (FT_HANDLE ft_h, DBT *k, TOKUTXN txn);
192 
193 // Effect: Delete a key from an ft if the oplsn is newer than the ft lsn.  This function is called during recovery.
194 void toku_ft_maybe_delete (FT_HANDLE ft_h, DBT *k, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging);
195 
196 TXNID toku_ft_get_oldest_referenced_xid_estimate(FT_HANDLE ft_h);
197 struct txn_manager *toku_ft_get_txn_manager(FT_HANDLE ft_h);
198 
199 struct txn_gc_info;
200 void toku_ft_send_insert(FT_HANDLE ft_h, DBT *key, DBT *val, XIDS xids, enum ft_msg_type type, txn_gc_info *gc_info);
201 void toku_ft_send_delete(FT_HANDLE ft_h, DBT *key, XIDS xids, txn_gc_info *gc_info);
202 void toku_ft_send_commit_any(FT_HANDLE ft_h, DBT *key, XIDS xids, txn_gc_info *gc_info);
203 
204 int toku_close_ft_handle_nolsn (FT_HANDLE, char **error_string)  __attribute__ ((warn_unused_result));
205 
206 int toku_dump_ft (FILE *,FT_HANDLE ft_h)  __attribute__ ((warn_unused_result));
207 
208 extern int toku_ft_debug_mode;
209 int toku_verify_ft (FT_HANDLE ft_h)  __attribute__ ((warn_unused_result));
210 int toku_verify_ft_with_progress (FT_HANDLE ft_h, int (*progress_callback)(void *extra, float progress), void *extra, int verbose, int keep_going)  __attribute__ ((warn_unused_result));
211 
212 int toku_ft_recount_rows(
213     FT_HANDLE ft,
214     int (*progress_callback)(
215         uint64_t count,
216         uint64_t deleted,
217         void* progress_extra),
218     void* progress_extra);
219 
220 
221 DICTIONARY_ID toku_ft_get_dictionary_id(FT_HANDLE);
222 
223 enum ft_flags {
224     //TOKU_DB_DUP             = (1<<0),  //Obsolete #2862
225     //TOKU_DB_DUPSORT         = (1<<1),  //Obsolete #2862
226     TOKU_DB_KEYCMP_BUILTIN  = (1<<2),
227     TOKU_DB_VALCMP_BUILTIN_13  = (1<<3),
228 };
229 
230 void toku_ft_keyrange(FT_HANDLE ft_h, DBT *key, uint64_t *less,  uint64_t *equal,  uint64_t *greater);
231 void toku_ft_keysrange(FT_HANDLE ft_h, DBT* key_left, DBT* key_right, uint64_t *less_p, uint64_t* equal_left_p, uint64_t* middle_p, uint64_t* equal_right_p, uint64_t* greater_p, bool* middle_3_exact_p);
232 
233 int toku_ft_get_key_after_bytes(FT_HANDLE ft_h, const DBT *start_key, uint64_t skip_len, void (*callback)(const DBT *end_key, uint64_t actually_skipped, void *extra), void *cb_extra);
234 
235 struct ftstat64_s {
236     uint64_t nkeys; /* estimate how many unique keys (even when flattened this may be an estimate)     */
237     uint64_t ndata; /* estimate the number of pairs (exact when flattened and committed)               */
238     uint64_t dsize; /* estimate the sum of the sizes of the pairs (exact when flattened and committed) */
239     uint64_t fsize;  /* the size of the underlying file                                                */
240     uint64_t ffree; /* Number of free bytes in the underlying file                                    */
241     uint64_t create_time_sec; /* creation time in seconds. */
242     uint64_t modify_time_sec; /* time of last serialization, in seconds. */
243     uint64_t verify_time_sec; /* time of last verification, in seconds */
244 };
245 
246 void toku_ft_handle_stat64 (FT_HANDLE, TOKUTXN, struct ftstat64_s *stat);
247 
248 struct ftinfo64 {
249     uint64_t num_blocks_allocated;  // number of blocks in the blocktable
250     uint64_t num_blocks_in_use;     // number of blocks in use by most recent checkpoint
251     uint64_t size_allocated;        // sum of sizes of blocks in blocktable
252     uint64_t size_in_use;           // sum of sizes of blocks in use by most recent checkpoint
253 };
254 
255 void toku_ft_handle_get_fractal_tree_info64(FT_HANDLE, struct ftinfo64 *);
256 
257 int toku_ft_handle_iterate_fractal_tree_block_map(FT_HANDLE, int (*)(uint64_t,int64_t,int64_t,int64_t,int64_t,void*), void *);
258 
259 int toku_ft_layer_init(void) __attribute__ ((warn_unused_result));
260 void toku_ft_open_close_lock(void);
261 void toku_ft_open_close_unlock(void);
262 void toku_ft_layer_destroy(void);
263 void toku_ft_serialize_layer_init(void);
264 void toku_ft_serialize_layer_destroy(void);
265 
266 void toku_maybe_truncate_file (int fd, uint64_t size_used, uint64_t expected_size, uint64_t *new_size);
267 // Effect: truncate file if overallocated by at least 32MiB
268 
269 void toku_maybe_preallocate_in_file (int fd, int64_t size, int64_t expected_size, int64_t *new_size);
270 // Effect: make the file bigger by either doubling it or growing by 16MiB whichever is less, until it is at least size
271 // Return 0 on success, otherwise an error number.
272 
273 int toku_ft_get_fragmentation(FT_HANDLE ft_h, TOKU_DB_FRAGMENTATION report) __attribute__ ((warn_unused_result));
274 
275 bool toku_ft_is_empty_fast (FT_HANDLE ft_h) __attribute__ ((warn_unused_result));
276 // Effect: Return true if there are no messages or leaf entries in the tree.  If so, it's empty.  If there are messages  or leaf entries, we say it's not empty
277 // even though if we were to optimize the tree it might turn out that they are empty.
278 
279 int toku_ft_strerror_r(int error, char *buf, size_t buflen);
280 // Effect: LIke the XSI-compliant strerorr_r, extended to db_strerror().
281 // If error>=0 then the result is to do strerror_r(error, buf, buflen), that is fill buf with a descriptive error message.
282 // If error<0 then return a PerconaFT-specific error code.  For unknown cases, we return -1 and set errno=EINVAL, even for cases that *should* be known.  (Not all DB errors are known by this function which is a bug.)
283 
284 extern bool garbage_collection_debug;
285 
286 // This is a poor place to put global options like these.
287 void toku_ft_set_direct_io(bool direct_io_on);
288 void toku_ft_set_compress_buffers_before_eviction(bool compress_buffers);
289 
290 void toku_note_deserialized_basement_node(bool fixed_key_size);
291 
292 // Creates all directories for the path if necessary,
293 // returns true if all dirs are created successfully or
294 // all dirs exist, false otherwise.
295 bool toku_create_subdirs_if_needed(const char* path);
296