1 /* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 /*======
4 This file is part of PerconaFT.
5 
6 
7 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
8 
9     PerconaFT is free software: you can redistribute it and/or modify
10     it under the terms of the GNU General Public License, version 2,
11     as published by the Free Software Foundation.
12 
13     PerconaFT is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17 
18     You should have received a copy of the GNU General Public License
19     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
20 
21 ----------------------------------------
22 
23     PerconaFT is free software: you can redistribute it and/or modify
24     it under the terms of the GNU Affero General Public License, version 3,
25     as published by the Free Software Foundation.
26 
27     PerconaFT is distributed in the hope that it will be useful,
28     but WITHOUT ANY WARRANTY; without even the implied warranty of
29     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
30     GNU Affero General Public License for more details.
31 
32     You should have received a copy of the GNU Affero General Public License
33     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
34 ======= */
35 
36 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
37 
38 #pragma once
39 
40 #include "util/dmt.h"
41 #include "util/mempool.h"
42 
43 #include "ft/leafentry.h"
44 #include "ft/serialize/wbuf.h"
45 
46 // Key/leafentry pair stored in a dmt.  The key is inlined, the offset (in leafentry mempool) is stored for the leafentry.
47 struct klpair_struct {
48     uint32_t le_offset;  //Offset of leafentry (in leafentry mempool)
49     uint8_t key[0]; // key, followed by le
50 };
51 
keylen_from_klpair_len(const uint32_t klpair_len)52 static constexpr uint32_t keylen_from_klpair_len(const uint32_t klpair_len) {
53     return klpair_len - __builtin_offsetof(klpair_struct, key);
54 }
55 
56 
57 static_assert(__builtin_offsetof(klpair_struct, key) == 1*sizeof(uint32_t), "klpair alignment issues");
58 static_assert(__builtin_offsetof(klpair_struct, key) == sizeof(klpair_struct), "klpair size issues");
59 
60 // A wrapper for the heaviside function provided to dmt->find*.
61 // Needed because the heaviside functions provided to bndata do not know about the internal types.
62 // Alternative to this wrapper is to expose accessor functions and rewrite all the external heaviside functions.
63 template<typename dmtcmp_t,
64          int (*h)(const DBT &, const dmtcmp_t &)>
klpair_find_wrapper(const uint32_t klpair_len,const klpair_struct & klpair,const dmtcmp_t & extra)65 static int klpair_find_wrapper(const uint32_t klpair_len, const klpair_struct &klpair, const dmtcmp_t &extra) {
66     DBT kdbt;
67     kdbt.data = const_cast<void*>(reinterpret_cast<const void*>(klpair.key));
68     kdbt.size = keylen_from_klpair_len(klpair_len);
69     return h(kdbt, extra);
70 }
71 
72 template<typename inner_iterate_extra_t>
73 struct klpair_iterate_extra {
74     public:
75     inner_iterate_extra_t *inner;
76     const class bn_data * bd;
77 };
78 
79 // A wrapper for the high-order function provided to dmt->iterate*
80 // Needed because the heaviside functions provided to bndata do not know about the internal types.
81 // Alternative to this wrapper is to expose accessor functions and rewrite all the external heaviside functions.
82 template<typename iterate_extra_t,
83          int (*f)(const void * key, const uint32_t keylen, const LEAFENTRY &, const uint32_t idx, iterate_extra_t *const)>
klpair_iterate_wrapper(const uint32_t klpair_len,const klpair_struct & klpair,const uint32_t idx,klpair_iterate_extra<iterate_extra_t> * const extra)84 static int klpair_iterate_wrapper(const uint32_t klpair_len, const klpair_struct &klpair, const uint32_t idx, klpair_iterate_extra<iterate_extra_t> *const extra) {
85     const void* key = &klpair.key;
86     LEAFENTRY le = extra->bd->get_le_from_klpair(&klpair);
87     return f(key, keylen_from_klpair_len(klpair_len), le, idx, extra->inner);
88 }
89 
90 
91 namespace toku {
92 // dmt writer for klpair_struct
93 class klpair_dmtwriter {
94     public:
95         // Return the size needed for the klpair_struct that this dmtwriter represents
get_size(void)96         size_t get_size(void) const {
97             return sizeof(klpair_struct) + this->keylen;
98         }
99         // Write the klpair_struct this dmtwriter represents to a destination
write_to(klpair_struct * const dest)100         void write_to(klpair_struct *const dest) const {
101             dest->le_offset = this->le_offset;
102             memcpy(dest->key, this->keyp, this->keylen);
103         }
104 
klpair_dmtwriter(uint32_t _keylen,uint32_t _le_offset,const void * _keyp)105         klpair_dmtwriter(uint32_t _keylen, uint32_t _le_offset, const void* _keyp)
106             : keylen(_keylen), le_offset(_le_offset), keyp(_keyp) {}
klpair_dmtwriter(const uint32_t klpair_len,klpair_struct * const src)107         klpair_dmtwriter(const uint32_t klpair_len, klpair_struct *const src)
108             : keylen(keylen_from_klpair_len(klpair_len)), le_offset(src->le_offset), keyp(src->key) {}
109     private:
110         const uint32_t keylen;
111         const uint32_t le_offset;
112         const void* keyp;
113 };
114 }
115 
116 typedef toku::dmt<klpair_struct, klpair_struct*, toku::klpair_dmtwriter> klpair_dmt_t;
117 // This class stores the data associated with a basement node
118 class bn_data {
119 public:
120     // Initialize an empty bn_data _without_ a dmt backing.
121     // Externally only used for deserialization.
122     void init_zero(void);
123 
124     // Initialize an empty bn_data _with_ a dmt
125     void initialize_empty(void);
126 
127     // Deserialize a bn_data from rbuf.
128     // This is the entry point for deserialization.
129     void deserialize_from_rbuf(uint32_t num_entries, struct rbuf *rb, uint32_t data_size, uint32_t version);
130 
131     // Retrieve the memory footprint of this basement node.
132     // May over or under count: see Percona/PerconaFT#136
133     // Also see dmt's implementation.
134     uint64_t get_memory_size(void);
135 
136     // Get the serialized size of this basement node.
137     uint64_t get_disk_size(void);
138 
139     // Perform (paranoid) verification that all leafentries are fully contained within the mempool
140     void verify_mempool(void);
141 
142     // size() of key dmt
143     uint32_t num_klpairs(void) const;
144 
145     // iterate() on key dmt (and associated leafentries)
146     template<typename iterate_extra_t,
147              int (*f)(const void * key, const uint32_t keylen, const LEAFENTRY &, const uint32_t, iterate_extra_t *const)>
iterate(iterate_extra_t * const iterate_extra)148     int iterate(iterate_extra_t *const iterate_extra) const {
149         return iterate_on_range<iterate_extra_t, f>(0, num_klpairs(), iterate_extra);
150     }
151 
152     // iterate_on_range() on key dmt (and associated leafentries)
153     template<typename iterate_extra_t,
154              int (*f)(const void * key, const uint32_t keylen, const LEAFENTRY &, const uint32_t, iterate_extra_t *const)>
iterate_on_range(const uint32_t left,const uint32_t right,iterate_extra_t * const iterate_extra)155     int iterate_on_range(const uint32_t left, const uint32_t right, iterate_extra_t *const iterate_extra) const {
156         klpair_iterate_extra<iterate_extra_t> klpair_extra = { iterate_extra, this };
157         return m_buffer.iterate_on_range< klpair_iterate_extra<iterate_extra_t>, klpair_iterate_wrapper<iterate_extra_t, f> >(left, right, &klpair_extra);
158     }
159 
160     // find_zero() on key dmt
161     template<typename dmtcmp_t,
162              int (*h)(const DBT &, const dmtcmp_t &)>
find_zero(const dmtcmp_t & extra,LEAFENTRY * const value,void ** key,uint32_t * keylen,uint32_t * const idxp)163     int find_zero(const dmtcmp_t &extra, LEAFENTRY *const value, void** key, uint32_t* keylen, uint32_t *const idxp) const {
164         klpair_struct* klpair = nullptr;
165         uint32_t klpair_len;
166         int r = m_buffer.find_zero< dmtcmp_t, klpair_find_wrapper<dmtcmp_t, h> >(extra, &klpair_len, &klpair, idxp);
167         if (r == 0) {
168             if (value) {
169                 *value = get_le_from_klpair(klpair);
170             }
171             if (key) {
172                 paranoid_invariant_notnull(keylen);
173                 *key = klpair->key;
174                 *keylen = keylen_from_klpair_len(klpair_len);
175             }
176             else {
177                 paranoid_invariant_null(keylen);
178             }
179         }
180         return r;
181     }
182 
183     // find() on key dmt (and associated leafentries)
184     template<typename dmtcmp_t,
185              int (*h)(const DBT &, const dmtcmp_t &)>
find(const dmtcmp_t & extra,int direction,LEAFENTRY * const value,void ** key,uint32_t * keylen,uint32_t * const idxp)186     int find(const dmtcmp_t &extra, int direction, LEAFENTRY *const value, void** key, uint32_t* keylen, uint32_t *const idxp) const {
187         klpair_struct* klpair = nullptr;
188         uint32_t klpair_len;
189         int r = m_buffer.find< dmtcmp_t, klpair_find_wrapper<dmtcmp_t, h> >(extra, direction, &klpair_len, &klpair, idxp);
190         if (r == 0) {
191             if (value) {
192                 *value = get_le_from_klpair(klpair);
193             }
194             if (key) {
195                 paranoid_invariant_notnull(keylen);
196                 *key = klpair->key;
197                 *keylen = keylen_from_klpair_len(klpair_len);
198             }
199             else {
200                 paranoid_invariant_null(keylen);
201             }
202         }
203         return r;
204     }
205 
206     // Fetch leafentry by index
207     __attribute__((__nonnull__))
208     int fetch_le(uint32_t idx, LEAFENTRY *le);
209     // Fetch (leafentry, key, keylen) by index
210     __attribute__((__nonnull__))
211     int fetch_klpair(uint32_t idx, LEAFENTRY *le, uint32_t *len, void** key);
212     // Fetch (serialized size of leafentry, key, and keylen) by index
213     __attribute__((__nonnull__))
214     int fetch_klpair_disksize(uint32_t idx, size_t *size);
215     // Fetch (key, keylen) by index
216     __attribute__((__nonnull__))
217     int fetch_key_and_len(uint32_t idx, uint32_t *len, void** key);
218 
219     // Move leafentries (and associated key/keylens) from this basement node to dest_bd
220     // Moves indexes [lbi-ube)
221     __attribute__((__nonnull__))
222     void split_klpairs(bn_data* dest_bd, uint32_t first_index_for_dest);
223 
224     // Destroy this basement node and free memory.
225     void destroy(void);
226 
227     // Uses sorted array as input for this basement node.
228     // Expects this to be a basement node just initialized with initialize_empty()
229     void set_contents_as_clone_of_sorted_array(
230         uint32_t num_les,
231         const void** old_key_ptrs,
232         uint32_t* old_keylens,
233         LEAFENTRY* old_les,
234         size_t *le_sizes,
235         size_t total_key_size,
236         size_t total_le_size
237         );
238 
239     // Make this basement node a clone of orig_bn_data.
240     // orig_bn_data still owns all its memory (dmt, mempool)
241     // this basement node will have a new dmt, mempool containing same data.
242     void clone(bn_data* orig_bn_data);
243 
244     // Delete klpair index idx with provided keylen and old leafentry with size old_le_size
245     void delete_leafentry (
246         uint32_t idx,
247         uint32_t keylen,
248         uint32_t old_le_size
249         );
250 
251     // Allocates space in the mempool to store a new leafentry.
252     // This may require reorganizing the mempool and updating the dmt.
253     __attribute__((__nonnull__))
254     void get_space_for_overwrite(uint32_t idx, const void* keyp, uint32_t keylen, uint32_t old_keylen, uint32_t old_size,
255                                  uint32_t new_size, LEAFENTRY* new_le_space, void **const maybe_free);
256 
257     // Allocates space in the mempool to store a new leafentry
258     // and inserts a new key into the dmt
259     // This may require reorganizing the mempool and updating the dmt.
260     __attribute__((__nonnull__))
261     void get_space_for_insert(uint32_t idx, const void* keyp, uint32_t keylen, size_t size, LEAFENTRY* new_le_space, void **const maybe_free);
262 
263     // Gets a leafentry given a klpair from this basement node.
264     LEAFENTRY get_le_from_klpair(const klpair_struct *klpair) const;
265 
266     void serialize_to_wbuf(struct wbuf *const wb);
267 
268     // Prepares this basement node for serialization.
269     // Must be called before serializing this basement node.
270     // Between calling prepare_to_serialize and actually serializing, the basement node may not be modified
271     void prepare_to_serialize(void);
272 
273     // Serialize the basement node header to a wbuf
274     // Requires prepare_to_serialize() to have been called first.
275     void serialize_header(struct wbuf *wb) const;
276 
277     // Serialize all keys and leafentries to a wbuf
278     // Requires prepare_to_serialize() (and serialize_header()) has been called first.
279     // Currently only supported when all keys are fixed-length.
280     void serialize_rest(struct wbuf *wb) const;
281 
282     static const uint32_t HEADER_LENGTH = 0
283         + sizeof(uint32_t) // key_data_size
284         + sizeof(uint32_t) // val_data_size
285         + sizeof(uint32_t) // fixed_key_length
286         + sizeof(uint8_t) // all_keys_same_length
287         + sizeof(uint8_t) // keys_vals_separate
288         + 0;
289 private:
290 
291     // split_klpairs_extra should be a local class in split_klpairs, but
292     // the dmt template parameter for iterate needs linkage, so it has to be a
293     // separate class, but we want it to be able to call e.g. add_key
294     friend class split_klpairs_extra;
295 
296     // Allocates space in the mempool.
297     // If there is insufficient space, the mempool is enlarged and leafentries may be shuffled to reduce fragmentation.
298     // If shuffling happens, the offsets stored in the dmt are updated.
299     LEAFENTRY mempool_malloc_and_update_dmt(size_t size, void **maybe_free);
300 
301     // Change the size of the mempool to support what is already in it, plus added_size.
302     // possibly "compress" by shuffling leafentries around to reduce fragmentation to 0.
303     // If fragmentation is already 0 and force_compress is not true, shuffling may be skipped.
304     // If shuffling happens, leafentries will be stored in the mempool in sorted order.
305     void dmt_compress_kvspace(size_t added_size, void **maybe_free, bool force_compress);
306 
307     // Note that a key was added (for maintaining disk-size of this basement node)
308     void add_key(uint32_t keylen);
309 
310     // Note that multiple keys were added (for maintaining disk-size of this basement node)
311     void add_keys(uint32_t n_keys, uint32_t combined_klpair_len);
312 
313     // Note that a key was removed (for maintaining disk-size of this basement node)
314     void remove_key(uint32_t keylen);
315 
316     klpair_dmt_t m_buffer;                     // pointers to individual leaf entries
317     struct mempool m_buffer_mempool;  // storage for all leaf entries
318 
319     friend class bndata_bugfix_test;
320 
321     // Get the serialized size of a klpair.
322     // As of Jan 14, 2014, serialized size of a klpair is independent of whether this basement node has fixed-length keys.
323     uint32_t klpair_disksize(const uint32_t klpair_len, const klpair_struct *klpair) const;
324 
325     // The disk/memory size of all keys.  (Note that the size of memory for the leafentries is maintained by m_buffer_mempool)
326     size_t m_disksize_of_keys;
327 
328     // Deserialize this basement node from rbuf
329     // all keys will be first followed by all leafentries (both in sorted order)
330     void initialize_from_separate_keys_and_vals(uint32_t num_entries, struct rbuf *rb, uint32_t data_size, uint32_t version,
331                                                 uint32_t key_data_size, uint32_t val_data_size, bool all_keys_same_length,
332                                                 uint32_t fixed_klpair_length);
333 };
334