1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of PerconaFT.
6 
7 
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9 
10     PerconaFT is free software: you can redistribute it and/or modify
11     it under the terms of the GNU General Public License, version 2,
12     as published by the Free Software Foundation.
13 
14     PerconaFT is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17     GNU General Public License for more details.
18 
19     You should have received a copy of the GNU General Public License
20     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
21 
22 ----------------------------------------
23 
24     PerconaFT is free software: you can redistribute it and/or modify
25     it under the terms of the GNU Affero General Public License, version 3,
26     as published by the Free Software Foundation.
27 
28     PerconaFT is distributed in the hope that it will be useful,
29     but WITHOUT ANY WARRANTY; without even the implied warranty of
30     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
31     GNU Affero General Public License for more details.
32 
33     You should have received a copy of the GNU Affero General Public License
34     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
35 ======= */
36 
37 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38 
39 #pragma once
40 
41 #include <vector>
42 
43 #include "portability/memory.h"
44 #include "portability/toku_portability.h"
45 #include "portability/toku_race_tools.h"
46 #include "portability/toku_stdint.h"
47 
48 #include "ft/serialize/wbuf.h"
49 #include "util/growable_array.h"
50 #include "util/mempool.h"
51 
52 namespace toku {
53 typedef uint32_t node_offset;
54 
55 
56 /**
57  * Dynamic Order Maintenance Tree (DMT)
58  *
59  * Maintains a collection of totally ordered values, where each value has weight 1.
60  * A DMT supports variable sized values.
61  * The DMT is a mutable datatype.
62  *
63  * The Abstraction:
64  *
65  * An DMT is a vector of values, $V$, where $|V|$ is the length of the vector.
66  * The vector is numbered from $0$ to $|V|-1$.
67  *
68  * We can create a new DMT, which is the empty vector.
69  *
70  * We can insert a new element $x$ into slot $i$, changing $V$ into $V'$ where
71  *  $|V'|=1+|V|$       and
72  *
73  *   V'_j = V_j       if $j<i$
74  *          x         if $j=i$
75  *          V_{j-1}   if $j>i$.
76  *
77  * We can specify $i$ using a kind of function instead of as an integer.
78  * Let $b$ be a function mapping from values to nonzero integers, such that
79  * the signum of $b$ is monotically increasing.
80  * We can specify $i$ as the minimum integer such that $b(V_i)>0$.
81  *
82  * We look up a value using its index, or using a Heaviside function.
83  * For lookups, we allow $b$ to be zero for some values, and again the signum of $b$ must be monotonically increasing.
84  * When lookup up values, we can look up
85  *  $V_i$ where $i$ is the minimum integer such that $b(V_i)=0$.   (With a special return code if no such value exists.)
86  *      (Rationale:  Ordinarily we want $i$ to be unique.  But for various reasons we want to allow multiple zeros, and we want the smallest $i$ in that case.)
87  *  $V_i$ where $i$ is the minimum integer such that $b(V_i)>0$.   (Or an indication that no such value exists.)
88  *  $V_i$ where $i$ is the maximum integer such that $b(V_i)<0$.   (Or an indication that no such value exists.)
89  *
90  * When looking up a value using a Heaviside function, we get the value and its index.
91  *
92  * Performance:
93  *  Insertion and deletion should run with $O(\log |V|)$ time and $O(\log |V|)$ calls to the Heaviside function.
94  *  The memory required is O(|V|).
95  *
96  * Usage:
97  *  The dmt is templated by three parameters:
98  *   - dmtdata_t is what will be stored within the dmt.  These could be pointers or real data types (ints, structs).
99  *   - dmtdataout_t is what will be returned by find and related functions.  By default, it is the same as dmtdata_t, but you can set it to (dmtdata_t *).
100  *   - dmtwriter_t is a class that effectively handles (de)serialization between the value stored in the dmt and outside the dmt.
101  *  To create an dmt which will store "TXNID"s, for example, it is a good idea to typedef the template:
102  *   typedef dmt<TXNID, TXNID, txnid_writer_t> txnid_dmt_t;
103  *  If you are storing structs (or you want to edit what is stored), you may want to be able to get a pointer to the data actually stored in the dmt (see find_zero).  To do this, use the second template parameter:
104  *   typedef dmt<struct foo, struct foo *, foo_writer_t> foo_dmt_t;
105  */
106 
107 namespace dmt_internal {
108 
109 class subtree {
110 private:
111     uint32_t m_index;
112 public:
113     // The maximum mempool size for a dmt is 2**32-2
114     static const uint32_t NODE_NULL = UINT32_MAX;
set_to_null(void)115     inline void set_to_null(void) {
116         m_index = NODE_NULL;
117     }
118 
is_null(void)119     inline bool is_null(void) const {
120         return NODE_NULL == this->get_offset();
121     }
122 
get_offset(void)123     inline node_offset get_offset(void) const {
124         return m_index;
125     }
126 
set_offset(node_offset index)127     inline void set_offset(node_offset index) {
128         paranoid_invariant(index != NODE_NULL);
129         m_index = index;
130     }
131 } __attribute__((__packed__,__aligned__(4)));
132 
133 template<typename dmtdata_t>
134 class dmt_node_templated {
135 public:
136     uint32_t weight;
137     subtree left;
138     subtree right;
139     uint32_t value_length;
140     dmtdata_t value;
141 } __attribute__((__aligned__(4)));  //NOTE: we cannot use attribute packed or dmtdata_t will call copy constructors (dmtdata_t might not be packed by default)
142 
143 }
144 
145 using namespace toku::dmt_internal;
146 
147 // Each data type used in a dmt requires a dmt_writer class (allows you to insert/etc with dynamic sized types).
148 // A dmt_writer can be thought of a (de)serializer
149 // There is no default implementation.
150 // A dmtwriter instance handles reading/writing 'dmtdata_t's to/from the dmt.
151 // The class must implement the following functions:
152 //      The size required in a dmt for the dmtdata_t represented:
153 //          size_t get_size(void) const;
154 //      Write the dmtdata_t to memory owned by a dmt:
155 //          void write_to(dmtdata_t *const dest) const;
156 //      Constructor (others are allowed, but this one is required)
157 //          dmtwriter(const uint32_t dmtdata_t_len, dmtdata_t *const src)
158 
159 template<typename dmtdata_t,
160          typename dmtdataout_t,
161          typename dmtwriter_t
162         >
163 class dmt {
164 private:
165     typedef dmt_node_templated<dmtdata_t> dmt_node;
166 
167 public:
168     static const uint8_t ALIGNMENT = 4;
169 
170     class builder {
171     public:
172         void append(const dmtwriter_t &value);
173 
174         // Create a dmt builder to build a dmt that will have at most n_values values and use
175         // at most n_value_bytes bytes in the mempool to store values (not counting node or alignment overhead).
176         void create(uint32_t n_values, uint32_t n_value_bytes);
177 
178         bool value_length_is_fixed(void);
179 
180         // Constructs a dmt that contains everything that was append()ed to this builder.
181         // Destroys this builder and frees associated memory.
182         void build(dmt<dmtdata_t, dmtdataout_t, dmtwriter_t> *dest);
183     private:
184         uint32_t max_values;
185         uint32_t max_value_bytes;
186         node_offset *sorted_node_offsets;
187         bool temp_valid;
188         dmt<dmtdata_t, dmtdataout_t, dmtwriter_t> temp;
189     };
190 
191     /**
192      * Effect: Create an empty DMT.
193      * Performance: constant time.
194      */
195     void create(void);
196 
197     /**
198      * Effect: Create a DMT containing values.  The number of values is in numvalues.
199      *         Each value is of a fixed (at runtime) length.
200      *         mem contains the values in packed form (no alignment padding)
201      *         Caller retains ownership of mem.
202      * Requires: this has not been created yet
203      * Rationale:    Normally to insert N values takes O(N lg N) amortized time.
204      *               If the N values are known in advance, are sorted, and
205      *               the structure is empty, we can batch insert them much faster.
206      */
207     __attribute__((nonnull))
208     void create_from_sorted_memory_of_fixed_size_elements(
209             const void *mem,
210             const uint32_t numvalues,
211             const uint32_t mem_length,
212             const uint32_t fixed_value_length);
213 
214     /**
215      * Effect: Creates a copy of an dmt.
216      *  Creates this as the clone.
217      *  Each element is copied directly.  If they are pointers, the underlying data is not duplicated.
218      * Performance: O(memory) (essentially a memdup)
219      *  The underlying structures are memcpy'd.  Only the values themselves are copied (shallow copy)
220      */
221     void clone(const dmt &src);
222 
223     /**
224      * Effect: Set the tree to be empty.
225      *  Note: Will not reallocate or resize any memory.
226      *  Note: If this dmt had variable sized elements, it will start tracking again (until it gets values of two different sizes)
227      * Performance: time=O(1)
228      */
229     void clear(void);
230 
231     /**
232      * Effect:  Destroy an DMT, freeing all its memory.
233      *   If the values being stored are pointers, their underlying data is not freed.
234      *   Those values may be freed before or after calling ::destroy()
235      * Rationale: Returns no values since free() cannot fail.
236      * Rationale: Does not free the underlying pointers to reduce complexity/maintain abstraction layer
237      * Performance:  time=O(1)
238      */
239     void destroy(void);
240 
241     /**
242      * Effect: return |this| (number of values stored in this dmt).
243      * Performance:  time=O(1)
244      */
245     uint32_t size(void) const;
246 
247     /**
248      * Effect: Serialize all values contained in this dmt into a packed form (no alignment padding).
249      *  We serialized to wb.  expected_unpadded_memory is the size of memory reserved in the wbuf
250      *  for serialization.  (We assert that serialization requires exactly the expected amount)
251      * Requires:
252      *  ::prepare_for_serialize() has been called and no non-const functions have been called since.
253      *  This dmt has fixed-length values and is in array form.
254      * Performance:
255      *  O(memory)
256      */
257     void serialize_values(uint32_t expected_unpadded_memory, struct wbuf *wb) const;
258 
259     /**
260      * Effect:  Insert value into the DMT.
261      *   If there is some i such that $h(V_i, v)=0$ then returns DB_KEYEXIST.
262      *   Otherwise, let i be the minimum value such that $h(V_i, v)>0$.
263      *      If no such i exists, then let i be |V|
264      *   Then this has the same effect as
265      *    insert_at(tree, value, i);
266      *   If idx!=NULL then i is stored in *idx
267      * Requires:  The signum of h must be monotonically increasing.
268      * Returns:
269      *    0            success
270      *    DB_KEYEXIST  the key is present (h was equal to zero for some value)
271      * On nonzero return, dmt is unchanged.
272      * Performance: time=O(\log N) amortized.
273      * Rationale: Some future implementation may be O(\log N) worst-case time, but O(\log N) amortized is good enough for now.
274      */
275     template<typename dmtcmp_t, int (*h)(const uint32_t size, const dmtdata_t &, const dmtcmp_t &)>
276     int insert(const dmtwriter_t &value, const dmtcmp_t &v, uint32_t *const idx);
277 
278     /**
279      * Effect: Increases indexes of all items at slot >= idx by 1.
280      *         Insert value into the position at idx.
281      * Returns:
282      *   0         success
283      *   EINVAL    if idx > this->size()
284      * On error, dmt is unchanged.
285      * Performance: time=O(\log N) amortized time.
286      * Rationale: Some future implementation may be O(\log N) worst-case time, but O(\log N) amortized is good enough for now.
287      */
288     int insert_at(const dmtwriter_t &value, const uint32_t idx);
289 
290     /**
291      * Effect: Delete the item in slot idx.
292      *         Decreases indexes of all items at slot > idx by 1.
293      * Returns
294      *     0            success
295      *     EINVAL       if idx>=this->size()
296      * On error, dmt is unchanged.
297      * Rationale: To delete an item, first find its index using find or find_zero, then delete it.
298      * Performance: time=O(\log N) amortized.
299      */
300     int delete_at(const uint32_t idx);
301 
302     /**
303      * Effect:  Iterate over the values of the dmt, from left to right, calling f on each value.
304      *  The first argument passed to f is a ref-to-const of the value stored in the dmt.
305      *  The second argument passed to f is the index of the value.
306      *  The third argument passed to f is iterate_extra.
307      *  The indices run from 0 (inclusive) to this->size() (exclusive).
308      * Requires: f != NULL
309      * Returns:
310      *  If f ever returns nonzero, then the iteration stops, and the value returned by f is returned by iterate.
311      *  If f always returns zero, then iterate returns 0.
312      * Requires:  Don't modify the dmt while running.  (E.g., f may not insert or delete values from the dmt.)
313      * Performance: time=O(i+\log N) where i is the number of times f is called, and N is the number of elements in the dmt.
314      * Rationale: Although the functional iterator requires defining another function (as opposed to C++ style iterator), it is much easier to read.
315      * Rationale: We may at some point use functors, but for now this is a smaller change from the old DMT.
316      */
317     template<typename iterate_extra_t,
318              int (*f)(const uint32_t, const dmtdata_t &, const uint32_t, iterate_extra_t *const)>
319     int iterate(iterate_extra_t *const iterate_extra) const;
320 
321     /**
322      * Effect:  Iterate over the values of the dmt, from left to right, calling f on each value.
323      *  The first argument passed to f is a ref-to-const of the value stored in the dmt.
324      *  The second argument passed to f is the index of the value.
325      *  The third argument passed to f is iterate_extra.
326      *  The indices run from 0 (inclusive) to this->size() (exclusive).
327      *  We will iterate only over [left,right)
328      *
329      * Requires: left <= right
330      * Requires: f != NULL
331      * Returns:
332      *  EINVAL  if right > this->size()
333      *  If f ever returns nonzero, then the iteration stops, and the value returned by f is returned by iterate_on_range.
334      *  If f always returns zero, then iterate_on_range returns 0.
335      * Requires:  Don't modify the dmt while running.  (E.g., f may not insert or delete values from the dmt.)
336      * Performance: time=O(i+\log N) where i is the number of times f is called, and N is the number of elements in the dmt.
337      * Rational: Although the functional iterator requires defining another function (as opposed to C++ style iterator), it is much easier to read.
338      */
339     template<typename iterate_extra_t,
340              int (*f)(const uint32_t, const dmtdata_t &, const uint32_t, iterate_extra_t *const)>
341     int iterate_on_range(const uint32_t left, const uint32_t right, iterate_extra_t *const iterate_extra) const;
342 
343     // Attempt to verify this dmt is well formed.  (Crashes/asserts/aborts if not well formed)
344     void verify(void) const;
345 
346     /**
347      * Effect:  Iterate over the values of the dmt, from left to right, calling f on each value.
348      *  The first argument passed to f is a pointer to the value stored in the dmt.
349      *  The second argument passed to f is the index of the value.
350      *  The third argument passed to f is iterate_extra.
351      *  The indices run from 0 (inclusive) to this->size() (exclusive).
352      * Requires: same as for iterate()
353      * Returns: same as for iterate()
354      * Performance: same as for iterate()
355      * Rationale: In general, most iterators should use iterate() since they should not modify the data stored in the dmt.  This function is for iterators which need to modify values (for example, free_items).
356      * Rationale: We assume if you are transforming the data in place, you want to do it to everything at once, so there is not yet an iterate_on_range_ptr (but there could be).
357      */
358     template<typename iterate_extra_t,
359              int (*f)(const uint32_t, dmtdata_t *, const uint32_t, iterate_extra_t *const)>
360     void iterate_ptr(iterate_extra_t *const iterate_extra);
361 
362     /**
363      * Effect: Set *value=V_idx
364      * Returns
365      *    0             success
366      *    EINVAL        if index>=toku_dmt_size(dmt)
367      * On nonzero return, *value is unchanged
368      * Performance: time=O(\log N)
369      */
370     int fetch(const uint32_t idx, uint32_t *const value_size, dmtdataout_t *const value) const;
371 
372     /**
373      * Effect:  Find the smallest i such that h(V_i, extra)>=0
374      *  If there is such an i and h(V_i,extra)==0 then set *idxp=i, set *value = V_i, and return 0.
375      *  If there is such an i and h(V_i,extra)>0  then set *idxp=i and return DB_NOTFOUND.
376      *  If there is no such i then set *idx=this->size() and return DB_NOTFOUND.
377      * Note: value is of type dmtdataout_t, which may be of type (dmtdata_t) or (dmtdata_t *) but is fixed by the instantiation.
378      *  If it is the value type, then the value is copied out (even if the value type is a pointer to something else)
379      *  If it is the pointer type, then *value is set to a pointer to the data within the dmt.
380      *  This is determined by the type of the dmt as initially declared.
381      *   If the dmt is declared as dmt<foo_t>, then foo_t's will be stored and foo_t's will be returned by find and related functions.
382      *   If the dmt is declared as dmt<foo_t, foo_t *>, then foo_t's will be stored, and pointers to the stored items will be returned by find and related functions.
383      * Rationale:
384      *  Structs too small for malloc should be stored directly in the dmt.
385      *  These structs may need to be edited as they exist inside the dmt, so we need a way to get a pointer within the dmt.
386      *  Using separate functions for returning pointers and values increases code duplication and reduces type-checking.
387      *  That also reduces the ability of the creator of a data structure to give advice to its future users.
388      *  Slight overloading in this case seemed to provide a better API and better type checking.
389      */
390     template<typename dmtcmp_t,
391              int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)>
392     int find_zero(const dmtcmp_t &extra, uint32_t *const value_size, dmtdataout_t *const value, uint32_t *const idxp) const;
393 
394     /**
395      *   Effect:
396      *    If direction >0 then find the smallest i such that h(V_i,extra)>0.
397      *    If direction <0 then find the largest  i such that h(V_i,extra)<0.
398      *    (Direction may not be equal to zero.)
399      *    If value!=NULL then store V_i in *value
400      *    If idxp!=NULL then store i in *idxp.
401      *   Requires: The signum of h is monotically increasing.
402      *   Returns
403      *      0             success
404      *      DB_NOTFOUND   no such value is found.
405      *   On nonzero return, *value and *idxp are unchanged
406      *   Performance: time=O(\log N)
407      *   Rationale:
408      *     Here's how to use the find function to find various things
409      *       Cases for find:
410      *        find first value:         ( h(v)=+1, direction=+1 )
411      *        find last value           ( h(v)=-1, direction=-1 )
412      *        find first X              ( h(v)=(v< x) ? -1 : 1    direction=+1 )
413      *        find last X               ( h(v)=(v<=x) ? -1 : 1    direction=-1 )
414      *        find X or successor to X  ( same as find first X. )
415      *
416      *   Rationale: To help understand heaviside functions and behavor of find:
417      *    There are 7 kinds of heaviside functions.
418      *    The signus of the h must be monotonically increasing.
419      *    Given a function of the following form, A is the element
420      *    returned for direction>0, B is the element returned
421      *    for direction<0, C is the element returned for
422      *    direction==0 (see find_zero) (with a return of 0), and D is the element
423      *    returned for direction==0 (see find_zero) with a return of DB_NOTFOUND.
424      *    If any of A, B, or C are not found, then asking for the
425      *    associated direction will return DB_NOTFOUND.
426      *    See find_zero for more information.
427      *
428      *    Let the following represent the signus of the heaviside function.
429      *
430      *    -...-
431      *        A
432      *         D
433      *
434      *    +...+
435      *    B
436      *    D
437      *
438      *    0...0
439      *    C
440      *
441      *    -...-0...0
442      *        AC
443      *
444      *    0...0+...+
445      *    C    B
446      *
447      *    -...-+...+
448      *        AB
449      *         D
450      *
451      *    -...-0...0+...+
452      *        AC    B
453      */
454     template<typename dmtcmp_t,
455              int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)>
456     int find(const dmtcmp_t &extra, int direction, uint32_t *const value_size, dmtdataout_t *const value, uint32_t *const idxp) const;
457 
458     /**
459      * Effect: Return the size (in bytes) of the dmt, as it resides in main memory.
460      * If the data stored are pointers, don't include the size of what they all point to.
461      * //TODO(leif or yoni): (maybe rename and) return memory footprint instead of allocated size
462      */
463     size_t memory_size(void);
464 
465     // Returns whether all values in the dmt are known to be the same size.
466     // Note:
467     //  There are no false positives, but false negatives are allowed.
468     //  A false negative can happen if this dmt had 2 (or more) different size values,
469     //  and then enough were deleted so that all the remaining ones are the same size.
470     //  Once that happens, this dmt will never again return true for this function unless/until
471     //  ::clear() is called
472     bool value_length_is_fixed(void) const;
473 
474 
475     // If this dmt is empty, return value is undefined.
476     // else if value_length_is_fixed() then it returns the fixed length.
477     // else returns 0
478     uint32_t get_fixed_length(void) const;
479 
480     // Preprocesses the dmt so that serialization can happen quickly.
481     // After this call, serialize_values() can be called but no other mutator function can be called in between.
482     void prepare_for_serialize(void);
483 
484 private:
485     // Do a bit of verification that subtree and nodes act like packed c structs and do not introduce unnecessary padding for alignment.
486     ENSURE_POD(subtree);
487     static_assert(ALIGNMENT > 0, "ALIGNMENT <= 0");
488     static_assert((ALIGNMENT & (ALIGNMENT - 1)) == 0, "ALIGNMENT not a power of 2");
489     static_assert(sizeof(dmt_node) - sizeof(dmtdata_t) == __builtin_offsetof(dmt_node, value), "value is not last field in node");
490     static_assert(4 * sizeof(uint32_t) == __builtin_offsetof(dmt_node, value), "dmt_node is padded");
491     static_assert(__builtin_offsetof(dmt_node, value) % ALIGNMENT == 0, "dmt_node requires padding for alignment");
492     ENSURE_POD(dmt_node);
493 
494     struct dmt_array {
495         uint32_t num_values;
496     };
497 
498     struct dmt_tree {
499         subtree root;
500     };
501 
502     /*
503     Relationship between values_same_size, d.a.num_values, value_length, is_array:
504     In an empty dmt:
505         is_array is true
506         value_same_size is true
507         value_length is undefined
508         d.a.num_values is 0
509     In a non-empty array dmt:
510         is_array is true
511         values_same_size is true
512         value_length is defined
513         d.a.num_values > 0
514     In a non-empty tree dmt:
515         is_array = false
516         value_same_size is true iff all values have been the same size since the last time the dmt turned into a tree.
517         value_length is defined iff values_same_size is true
518         d.a.num_values is undefined (the memory is used for the tree)
519     Note that in tree form, the dmt keeps track of if all values are the same size until the first time they are not.
520     'values_same_size' will not become true again (even if we change all values to be the same size)
521         until/unless the dmt becomes empty, at which point it becomes an array again.
522      */
523     bool values_same_size;
524     uint32_t value_length;  // valid iff values_same_size is true.
525     struct mempool mp;
526     bool is_array;
527     union {
528         struct dmt_array a;
529         struct dmt_tree t;
530     } d;
531 
532     // Returns pad bytes per element (for alignment) or 0 if not fixed length.
533     uint32_t get_fixed_length_alignment_overhead(void) const;
534 
535     void verify_internal(const subtree &subtree, std::vector<bool> *touched) const;
536 
537     // Retrieves the node for a given subtree.
538     // Requires: !subtree.is_null()
539     dmt_node & get_node(const subtree &subtree) const;
540 
541     // Retrieves the node at a given offset in the mempool.
542     dmt_node & get_node(const node_offset offset) const;
543 
544     // Returns the weight of a subtree rooted at st.
545     // if st.is_null(), returns 0
546     // Perf: O(1)
547     uint32_t nweight(const subtree &st) const;
548 
549     // Allocates space for a node (in the mempool) and uses the dmtwriter to write the value into the node
550     node_offset node_malloc_and_set_value(const dmtwriter_t &value);
551 
552     // Uses the dmtwriter to write a value into node n
553     void node_set_value(dmt_node *n, const dmtwriter_t &value);
554 
555     // (mempool-)free the memory for a node
556     void node_free(const subtree &st);
557 
558     // Effect: Resizes the mempool (holding the array) if necessary to hold one more item of length: this->value_length
559     // Requires:
560     //  This dmt is in array form (and thus this->values_same_length)
561     void maybe_resize_array_for_insert(void);
562 
563     // Effect: Converts a dmt from array form to tree form.
564     // Perf: O(n)
565     // Note: This does not clear the 'this->values_same_size' bit
566     void convert_to_tree(void);
567 
568     // Effect: Resizes the mempool holding a tree if necessary.  If value==nullptr then it may shrink if overallocated,
569     //         otherwise resize only happens if there is not enough free space for an insert of value
570     void maybe_resize_tree(const dmtwriter_t * value);
571 
572     // Returns true if the tree rooted at st would need rebalance after adding
573     // leftmod to the left subtree and rightmod to the right subtree
574     bool will_need_rebalance(const subtree &st, const int leftmod, const int rightmod) const;
575 
576     __attribute__((nonnull))
577     void insert_internal(subtree *const subtreep, const dmtwriter_t &value, const uint32_t idx, subtree **const rebalance_subtree);
578 
579     template<bool with_resize>
580     int insert_at_array_end(const dmtwriter_t& value_in);
581 
582     dmtdata_t * alloc_array_value_end(void);
583 
584     dmtdata_t * get_array_value(const uint32_t idx) const;
585 
586     dmtdata_t * get_array_value_internal(const struct mempool *mempool, const uint32_t idx) const;
587 
588     void convert_from_array_to_tree(void);
589 
590     void convert_from_tree_to_array(void);
591 
592     void delete_internal(subtree *const subtreep, const uint32_t idx, subtree *const subtree_replace, subtree **const rebalance_subtree);
593 
594     template<typename iterate_extra_t,
595              int (*f)(const uint32_t, const dmtdata_t &, const uint32_t, iterate_extra_t *const)>
596     int iterate_internal_array(const uint32_t left, const uint32_t right,
597                                       iterate_extra_t *const iterate_extra) const;
598 
599     template<typename iterate_extra_t,
600              int (*f)(const uint32_t, dmtdata_t *, const uint32_t, iterate_extra_t *const)>
601     void iterate_ptr_internal(const uint32_t left, const uint32_t right,
602                                      const subtree &subtree, const uint32_t idx,
603                                      iterate_extra_t *const iterate_extra);
604 
605     template<typename iterate_extra_t,
606              int (*f)(const uint32_t, dmtdata_t *, const uint32_t, iterate_extra_t *const)>
607     void iterate_ptr_internal_array(const uint32_t left, const uint32_t right,
608                                            iterate_extra_t *const iterate_extra);
609 
610     template<typename iterate_extra_t,
611              int (*f)(const uint32_t, const dmtdata_t &, const uint32_t, iterate_extra_t *const)>
612     int iterate_internal(const uint32_t left, const uint32_t right,
613                                 const subtree &subtree, const uint32_t idx,
614                                 iterate_extra_t *const iterate_extra) const;
615 
616     void fetch_internal_array(const uint32_t i, uint32_t *const value_len, dmtdataout_t *const value) const;
617 
618     void fetch_internal(const subtree &subtree, const uint32_t i, uint32_t *const value_len, dmtdataout_t *const value) const;
619 
620     __attribute__((nonnull))
621     void fill_array_with_subtree_offsets(node_offset *const array, const subtree &subtree) const;
622 
623     __attribute__((nonnull))
624     void rebuild_subtree_from_offsets(subtree *const subtree, const node_offset *const offsets, const uint32_t numvalues);
625 
626     __attribute__((nonnull))
627     void rebalance(subtree *const subtree);
628 
629     static void copyout(uint32_t *const outlen, dmtdata_t *const out, const dmt_node *const n);
630 
631     static void copyout(uint32_t *const outlen, dmtdata_t **const out, dmt_node *const n);
632 
633     static void copyout(uint32_t *const outlen, dmtdata_t *const out, const uint32_t len, const dmtdata_t *const stored_value_ptr);
634 
635     static void copyout(uint32_t *const outlen, dmtdata_t **const out, const uint32_t len, dmtdata_t *const stored_value_ptr);
636 
637     template<typename dmtcmp_t,
638              int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)>
639     int find_internal_zero_array(const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const;
640 
641     template<typename dmtcmp_t,
642              int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)>
643     int find_internal_zero(const subtree &subtree, const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const;
644 
645     template<typename dmtcmp_t,
646              int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)>
647     int find_internal_plus_array(const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const;
648 
649     template<typename dmtcmp_t,
650              int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)>
651     int find_internal_plus(const subtree &subtree, const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const;
652 
653     template<typename dmtcmp_t,
654              int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)>
655     int find_internal_minus_array(const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const;
656 
657     template<typename dmtcmp_t,
658              int (*h)(const uint32_t, const dmtdata_t &, const dmtcmp_t &)>
659     int find_internal_minus(const subtree &subtree, const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const;
660 
661     // Allocate memory for an array:  node_offset[num_idx] from pre-allocated contiguous free space in the mempool.
662     // If there is not enough space, returns nullptr.
663     node_offset* alloc_temp_node_offsets(uint32_t num_idxs);
664 
665     // Returns the aligned size of x.
666     // If x % ALIGNMENT == 0, returns x
667     // o.w. returns x + (ALIGNMENT - (x % ALIGNMENT))
668     uint32_t align(const uint32_t x) const;
669 };
670 
671 } // namespace toku
672 
673 // include the implementation here
674 #include "dmt.cc"
675 
676