1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of PerconaFT.
6 
7 
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9 
10     PerconaFT is free software: you can redistribute it and/or modify
11     it under the terms of the GNU General Public License, version 2,
12     as published by the Free Software Foundation.
13 
14     PerconaFT is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17     GNU General Public License for more details.
18 
19     You should have received a copy of the GNU General Public License
20     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
21 
22 ----------------------------------------
23 
24     PerconaFT is free software: you can redistribute it and/or modify
25     it under the terms of the GNU Affero General Public License, version 3,
26     as published by the Free Software Foundation.
27 
28     PerconaFT is distributed in the hope that it will be useful,
29     but WITHOUT ANY WARRANTY; without even the implied warranty of
30     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
31     GNU Affero General Public License for more details.
32 
33     You should have received a copy of the GNU Affero General Public License
34     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
35 
36 ----------------------------------------
37 
38    Licensed under the Apache License, Version 2.0 (the "License");
39    you may not use this file except in compliance with the License.
40    You may obtain a copy of the License at
41 
42        http://www.apache.org/licenses/LICENSE-2.0
43 
44    Unless required by applicable law or agreed to in writing, software
45    distributed under the License is distributed on an "AS IS" BASIS,
46    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
47    See the License for the specific language governing permissions and
48    limitations under the License.
49 ======= */
50 
51 #ident \
52     "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
53 
54 #pragma once
55 
56 #include <memory.h>
57 #include <stdint.h>
58 
59 #include "../portability/toku_portability.h"
60 #include "../portability/toku_race_tools.h"
61 #include "growable_array.h"
62 
63 namespace toku {
64 
65 /**
66  * Order Maintenance Tree (OMT)
67  *
68  * Maintains a collection of totally ordered values, where each value has an
69  * integer weight. The OMT is a mutable datatype.
70  *
71  * The Abstraction:
72  *
73  * An OMT is a vector of values, $V$, where $|V|$ is the length of the vector.
74  * The vector is numbered from $0$ to $|V|-1$.
75  * Each value has a weight.  The weight of the $i$th element is denoted
76  * $w(V_i)$.
77  *
78  * We can create a new OMT, which is the empty vector.
79  *
80  * We can insert a new element $x$ into slot $i$, changing $V$ into $V'$ where
81  *  $|V'|=1+|V|$       and
82  *
83  *   V'_j = V_j       if $j<i$
84  *          x         if $j=i$
85  *          V_{j-1}   if $j>i$.
86  *
87  * We can specify $i$ using a kind of function instead of as an integer.
88  * Let $b$ be a function mapping from values to nonzero integers, such that
89  * the signum of $b$ is monotically increasing.
90  * We can specify $i$ as the minimum integer such that $b(V_i)>0$.
91  *
92  * We look up a value using its index, or using a Heaviside function.
93  * For lookups, we allow $b$ to be zero for some values, and again the signum of
94  * $b$ must be monotonically increasing. When lookup up values, we can look up
95  *  $V_i$ where $i$ is the minimum integer such that $b(V_i)=0$.   (With a
96  * special return code if no such value exists.) (Rationale:  Ordinarily we want
97  * $i$ to be unique.  But for various reasons we want to allow multiple zeros,
98  * and we want the smallest $i$ in that case.) $V_i$ where $i$ is the minimum
99  * integer such that $b(V_i)>0$.   (Or an indication that no such value exists.)
100  *  $V_i$ where $i$ is the maximum integer such that $b(V_i)<0$.   (Or an
101  * indication that no such value exists.)
102  *
103  * When looking up a value using a Heaviside function, we get the value and its
104  * index.
105  *
106  * We can also split an OMT into two OMTs, splitting the weight of the values
107  * evenly. Find a value $j$ such that the values to the left of $j$ have about
108  * the same total weight as the values to the right of $j$. The resulting two
109  * OMTs contain the values to the left of $j$ and the values to the right of $j$
110  * respectively. All of the values from the original OMT go into one of the new
111  * OMTs. If the weights of the values don't split exactly evenly, then the
112  * implementation has the freedom to choose whether the new left OMT or the new
113  * right OMT is larger.
114  *
115  * Performance:
116  *  Insertion and deletion should run with $O(\log |V|)$ time and $O(\log |V|)$
117  * calls to the Heaviside function. The memory required is O(|V|).
118  *
119  * Usage:
120  *  The omt is templated by two parameters:
121  *   - omtdata_t is what will be stored within the omt.  These could be pointers
122  * or real data types (ints, structs).
123  *   - omtdataout_t is what will be returned by find and related functions.  By
124  * default, it is the same as omtdata_t, but you can set it to (omtdata_t *). To
125  * create an omt which will store "TXNID"s, for example, it is a good idea to
126  * typedef the template: typedef omt<TXNID> txnid_omt_t; If you are storing
127  * structs, you may want to be able to get a pointer to the data actually stored
128  * in the omt (see find_zero).  To do this, use the second template parameter:
129  *   typedef omt<struct foo, struct foo *> foo_omt_t;
130  */
131 
132 namespace omt_internal {
133 
134 template <bool subtree_supports_marks>
135 class subtree_templated {
136  private:
137   uint32_t m_index;
138 
139  public:
140   static const uint32_t NODE_NULL = UINT32_MAX;
set_to_null(void)141   inline void set_to_null(void) { m_index = NODE_NULL; }
142 
is_null(void)143   inline bool is_null(void) const { return NODE_NULL == this->get_index(); }
144 
get_index(void)145   inline uint32_t get_index(void) const { return m_index; }
146 
set_index(uint32_t index)147   inline void set_index(uint32_t index) {
148     paranoid_invariant(index != NODE_NULL);
149     m_index = index;
150   }
151 } __attribute__((__packed__, aligned(4)));
152 
153 template <>
154 class subtree_templated<true> {
155  private:
156   uint32_t m_bitfield;
157   static const uint32_t MASK_INDEX = ~(((uint32_t)1) << 31);
158   static const uint32_t MASK_BIT = ((uint32_t)1) << 31;
159 
set_index_internal(uint32_t new_index)160   inline void set_index_internal(uint32_t new_index) {
161     m_bitfield = (m_bitfield & MASK_BIT) | new_index;
162   }
163 
164  public:
165   static const uint32_t NODE_NULL = INT32_MAX;
set_to_null(void)166   inline void set_to_null(void) { this->set_index_internal(NODE_NULL); }
167 
is_null(void)168   inline bool is_null(void) const { return NODE_NULL == this->get_index(); }
169 
get_index(void)170   inline uint32_t get_index(void) const {
171     TOKU_DRD_IGNORE_VAR(m_bitfield);
172     const uint32_t bits = m_bitfield;
173     TOKU_DRD_STOP_IGNORING_VAR(m_bitfield);
174     return bits & MASK_INDEX;
175   }
176 
set_index(uint32_t index)177   inline void set_index(uint32_t index) {
178     paranoid_invariant(index < NODE_NULL);
179     this->set_index_internal(index);
180   }
181 
get_bit(void)182   inline bool get_bit(void) const {
183     TOKU_DRD_IGNORE_VAR(m_bitfield);
184     const uint32_t bits = m_bitfield;
185     TOKU_DRD_STOP_IGNORING_VAR(m_bitfield);
186     return (bits & MASK_BIT) != 0;
187   }
188 
enable_bit(void)189   inline void enable_bit(void) {
190     // These bits may be set by a thread with a write lock on some
191     // leaf, and the index can be read by another thread with a (read
192     // or write) lock on another thread.  Also, the has_marks_below
193     // bit can be set by two threads simultaneously.  Neither of these
194     // are real races, so if we are using DRD we should tell it to
195     // ignore these bits just while we set this bit.  If there were a
196     // race in setting the index, that would be a real race.
197     TOKU_DRD_IGNORE_VAR(m_bitfield);
198     m_bitfield |= MASK_BIT;
199     TOKU_DRD_STOP_IGNORING_VAR(m_bitfield);
200   }
201 
disable_bit(void)202   inline void disable_bit(void) { m_bitfield &= MASK_INDEX; }
203 } __attribute__((__packed__));
204 
205 template <typename omtdata_t, bool subtree_supports_marks>
206 class omt_node_templated {
207  public:
208   omtdata_t value;
209   uint32_t weight;
210   subtree_templated<subtree_supports_marks> left;
211   subtree_templated<subtree_supports_marks> right;
212 
213   // this needs to be in both implementations because we don't have
214   // a "static if" the caller can use
clear_stolen_bits(void)215   inline void clear_stolen_bits(void) {}
216 };  // note: originally this class had __attribute__((__packed__, aligned(4)))
217 
218 template <typename omtdata_t>
219 class omt_node_templated<omtdata_t, true> {
220  public:
221   omtdata_t value;
222   uint32_t weight;
223   subtree_templated<true> left;
224   subtree_templated<true> right;
get_marked(void)225   inline bool get_marked(void) const { return left.get_bit(); }
set_marked_bit(void)226   inline void set_marked_bit(void) { return left.enable_bit(); }
unset_marked_bit(void)227   inline void unset_marked_bit(void) { return left.disable_bit(); }
228 
get_marks_below(void)229   inline bool get_marks_below(void) const { return right.get_bit(); }
set_marks_below_bit(void)230   inline void set_marks_below_bit(void) {
231     // This function can be called by multiple threads.
232     // Checking first reduces cache invalidation.
233     if (!this->get_marks_below()) {
234       right.enable_bit();
235     }
236   }
unset_marks_below_bit(void)237   inline void unset_marks_below_bit(void) { right.disable_bit(); }
238 
clear_stolen_bits(void)239   inline void clear_stolen_bits(void) {
240     this->unset_marked_bit();
241     this->unset_marks_below_bit();
242   }
243 };  // note: originally this class had __attribute__((__packed__, aligned(4)))
244 
245 }  // namespace omt_internal
246 
247 template <typename omtdata_t, typename omtdataout_t = omtdata_t,
248           bool supports_marks = false>
249 class omt {
250  public:
251   /**
252    * Effect: Create an empty OMT.
253    * Performance: constant time.
254    */
255   void create(void);
256 
257   /**
258    * Effect: Create an empty OMT with no internal allocated space.
259    * Performance: constant time.
260    * Rationale: In some cases we need a valid omt but don't want to malloc.
261    */
262   void create_no_array(void);
263 
264   /**
265    * Effect: Create a OMT containing values.  The number of values is in
266    * numvalues. Stores the new OMT in *omtp. Requires: this has not been created
267    * yet Requires: values != NULL Requires: values is sorted Performance:
268    * time=O(numvalues) Rationale:    Normally to insert N values takes O(N lg N)
269    * amortized time. If the N values are known in advance, are sorted, and the
270    * structure is empty, we can batch insert them much faster.
271    */
272   __attribute__((nonnull)) void create_from_sorted_array(
273       const omtdata_t *const values, const uint32_t numvalues);
274 
275   /**
276    * Effect: Create an OMT containing values.  The number of values is in
277    * numvalues. On success the OMT takes ownership of *values array, and sets
278    * values=NULL. Requires: this has not been created yet Requires: values !=
279    * NULL Requires: *values is sorted Requires: *values was allocated with
280    * toku_malloc Requires: Capacity of the *values array is <= new_capacity
281    * Requires: On success, *values may not be accessed again by the caller.
282    * Performance:  time=O(1)
283    * Rational:     create_from_sorted_array takes O(numvalues) time.
284    *               By taking ownership of the array, we save a malloc and
285    * memcpy, and possibly a free (if the caller is done with the array).
286    */
287   void create_steal_sorted_array(omtdata_t **const values,
288                                  const uint32_t numvalues,
289                                  const uint32_t new_capacity);
290 
291   /**
292    * Effect: Create a new OMT, storing it in *newomt.
293    *  The values to the right of index (starting at index) are moved to *newomt.
294    * Requires: newomt != NULL
295    * Returns
296    *    0             success,
297    *    EINVAL        if index > toku_omt_size(omt)
298    * On nonzero return, omt and *newomt are unmodified.
299    * Performance: time=O(n)
300    * Rationale:  We don't need a split-evenly operation.  We need to split items
301    * so that their total sizes are even, and other similar splitting criteria.
302    * It's easy to split evenly by calling size(), and dividing by two.
303    */
304   __attribute__((nonnull)) int split_at(omt *const newomt, const uint32_t idx);
305 
306   /**
307    * Effect: Appends leftomt and rightomt to produce a new omt.
308    *  Creates this as the new omt.
309    *  leftomt and rightomt are destroyed.
310    * Performance: time=O(n) is acceptable, but one can imagine implementations
311    * that are O(\log n) worst-case.
312    */
313   __attribute__((nonnull)) void merge(omt *const leftomt, omt *const rightomt);
314 
315   /**
316    * Effect: Creates a copy of an omt.
317    *  Creates this as the clone.
318    *  Each element is copied directly.  If they are pointers, the underlying
319    * data is not duplicated. Performance: O(n) or the running time of
320    * fill_array_with_subtree_values()
321    */
322   void clone(const omt &src);
323 
324   /**
325    * Effect: Set the tree to be empty.
326    *  Note: Will not reallocate or resize any memory.
327    * Performance: time=O(1)
328    */
329   void clear(void);
330 
331   /**
332    * Effect:  Destroy an OMT, freeing all its memory.
333    *   If the values being stored are pointers, their underlying data is not
334    * freed.  See free_items() Those values may be freed before or after calling
335    * toku_omt_destroy. Rationale: Returns no values since free() cannot fail.
336    * Rationale: Does not free the underlying pointers to reduce complexity.
337    * Performance:  time=O(1)
338    */
339   void destroy(void);
340 
341   /**
342    * Effect: return |this|.
343    * Performance:  time=O(1)
344    */
345   uint32_t size(void) const;
346 
347   /**
348    * Effect:  Insert value into the OMT.
349    *   If there is some i such that $h(V_i, v)=0$ then returns DB_KEYEXIST.
350    *   Otherwise, let i be the minimum value such that $h(V_i, v)>0$.
351    *      If no such i exists, then let i be |V|
352    *   Then this has the same effect as
353    *    insert_at(tree, value, i);
354    *   If idx!=NULL then i is stored in *idx
355    * Requires:  The signum of h must be monotonically increasing.
356    * Returns:
357    *    0            success
358    *    DB_KEYEXIST  the key is present (h was equal to zero for some value)
359    * On nonzero return, omt is unchanged.
360    * Performance: time=O(\log N) amortized.
361    * Rationale: Some future implementation may be O(\log N) worst-case time, but
362    * O(\log N) amortized is good enough for now.
363    */
364   template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
365   int insert(const omtdata_t &value, const omtcmp_t &v, uint32_t *const idx);
366 
367   /**
368    * Effect: Increases indexes of all items at slot >= idx by 1.
369    *         Insert value into the position at idx.
370    * Returns:
371    *   0         success
372    *   EINVAL    if idx > this->size()
373    * On error, omt is unchanged.
374    * Performance: time=O(\log N) amortized time.
375    * Rationale: Some future implementation may be O(\log N) worst-case time, but
376    * O(\log N) amortized is good enough for now.
377    */
378   int insert_at(const omtdata_t &value, const uint32_t idx);
379 
380   /**
381    * Effect:  Replaces the item at idx with value.
382    * Returns:
383    *   0       success
384    *   EINVAL  if idx>=this->size()
385    * On error, omt is unchanged.
386    * Performance: time=O(\log N)
387    * Rationale: The FT needs to be able to replace a value with another copy of
388    * the same value (allocated in a different location)
389    *
390    */
391   int set_at(const omtdata_t &value, const uint32_t idx);
392 
393   /**
394    * Effect: Delete the item in slot idx.
395    *         Decreases indexes of all items at slot > idx by 1.
396    * Returns
397    *     0            success
398    *     EINVAL       if idx>=this->size()
399    * On error, omt is unchanged.
400    * Rationale: To delete an item, first find its index using find or find_zero,
401    * then delete it. Performance: time=O(\log N) amortized.
402    */
403   int delete_at(const uint32_t idx);
404 
405   /**
406    * Effect:  Iterate over the values of the omt, from left to right, calling f
407    * on each value. The first argument passed to f is a ref-to-const of the
408    * value stored in the omt. The second argument passed to f is the index of
409    * the value. The third argument passed to f is iterate_extra. The indices run
410    * from 0 (inclusive) to this->size() (exclusive). Requires: f != NULL
411    * Returns:
412    *  If f ever returns nonzero, then the iteration stops, and the value
413    * returned by f is returned by iterate. If f always returns zero, then
414    * iterate returns 0. Requires:  Don't modify the omt while running.  (E.g., f
415    * may not insert or delete values from the omt.) Performance: time=O(i+\log
416    * N) where i is the number of times f is called, and N is the number of
417    * elements in the omt. Rationale: Although the functional iterator requires
418    * defining another function (as opposed to C++ style iterator), it is much
419    * easier to read. Rationale: We may at some point use functors, but for now
420    * this is a smaller change from the old OMT.
421    */
422   template <typename iterate_extra_t,
423             int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
424   int iterate(iterate_extra_t *const iterate_extra) const;
425 
426   /**
427    * Effect:  Iterate over the values of the omt, from left to right, calling f
428    * on each value. The first argument passed to f is a ref-to-const of the
429    * value stored in the omt. The second argument passed to f is the index of
430    * the value. The third argument passed to f is iterate_extra. The indices run
431    * from 0 (inclusive) to this->size() (exclusive). We will iterate only over
432    * [left,right)
433    *
434    * Requires: left <= right
435    * Requires: f != NULL
436    * Returns:
437    *  EINVAL  if right > this->size()
438    *  If f ever returns nonzero, then the iteration stops, and the value
439    * returned by f is returned by iterate_on_range. If f always returns zero,
440    * then iterate_on_range returns 0. Requires:  Don't modify the omt while
441    * running.  (E.g., f may not insert or delete values from the omt.)
442    * Performance: time=O(i+\log N) where i is the number of times f is called,
443    * and N is the number of elements in the omt. Rational: Although the
444    * functional iterator requires defining another function (as opposed to C++
445    * style iterator), it is much easier to read.
446    */
447   template <typename iterate_extra_t,
448             int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
449   int iterate_on_range(const uint32_t left, const uint32_t right,
450                        iterate_extra_t *const iterate_extra) const;
451 
452   /**
453    * Effect: Iterate over the values of the omt, and mark the nodes that are
454    * visited. Other than the marks, this behaves the same as iterate_on_range.
455    * Requires: supports_marks == true
456    * Performance: time=O(i+\log N) where i is the number of times f is called,
457    * and N is the number of elements in the omt. Notes: This function MAY be
458    * called concurrently by multiple threads, but not concurrently with any
459    * other non-const function.
460    */
461   template <typename iterate_extra_t,
462             int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
463   int iterate_and_mark_range(const uint32_t left, const uint32_t right,
464                              iterate_extra_t *const iterate_extra);
465 
466   /**
467    * Effect: Iterate over the values of the omt, from left to right, calling f
468    * on each value whose node has been marked. Other than the marks, this
469    * behaves the same as iterate. Requires: supports_marks == true Performance:
470    * time=O(i+\log N) where i is the number of times f is called, and N is the
471    * number of elements in the omt.
472    */
473   template <typename iterate_extra_t,
474             int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
475   int iterate_over_marked(iterate_extra_t *const iterate_extra) const;
476 
477   /**
478    * Effect: Delete all elements from the omt, whose nodes have been marked.
479    * Requires: supports_marks == true
480    * Performance: time=O(N + i\log N) where i is the number of marked elements,
481    * {c,sh}ould be faster
482    */
483   void delete_all_marked(void);
484 
485   /**
486    * Effect: Verify that the internal state of the marks in the tree are
487    * self-consistent. Crashes the system if the marks are in a bad state.
488    * Requires: supports_marks == true
489    * Performance: time=O(N)
490    * Notes:
491    *  Even though this is a const function, it requires exclusive access.
492    * Rationale:
493    *  The current implementation of the marks relies on a sort of
494    *  "cache" bit representing the state of bits below it in the tree.
495    *  This allows glass-box testing that these bits are correct.
496    */
497   void verify_marks_consistent(void) const;
498 
499   /**
500    * Effect: None
501    * Returns whether there are any marks in the tree.
502    */
503   bool has_marks(void) const;
504 
505   /**
506    * Effect:  Iterate over the values of the omt, from left to right, calling f
507    * on each value. The first argument passed to f is a pointer to the value
508    * stored in the omt. The second argument passed to f is the index of the
509    * value. The third argument passed to f is iterate_extra. The indices run
510    * from 0 (inclusive) to this->size() (exclusive). Requires: same as for
511    * iterate() Returns: same as for iterate() Performance: same as for iterate()
512    * Rationale: In general, most iterators should use iterate() since they
513    * should not modify the data stored in the omt.  This function is for
514    * iterators which need to modify values (for example, free_items). Rationale:
515    * We assume if you are transforming the data in place, you want to do it to
516    * everything at once, so there is not yet an iterate_on_range_ptr (but there
517    * could be).
518    */
519   template <typename iterate_extra_t,
520             int (*f)(omtdata_t *, const uint32_t, iterate_extra_t *const)>
521   void iterate_ptr(iterate_extra_t *const iterate_extra);
522 
523   /**
524    * Effect: Set *value=V_idx
525    * Returns
526    *    0             success
527    *    EINVAL        if index>=toku_omt_size(omt)
528    * On nonzero return, *value is unchanged
529    * Performance: time=O(\log N)
530    */
531   int fetch(const uint32_t idx, omtdataout_t *const value) const;
532 
533   /**
534    * Effect:  Find the smallest i such that h(V_i, extra)>=0
535    *  If there is such an i and h(V_i,extra)==0 then set *idxp=i, set *value =
536    * V_i, and return 0. If there is such an i and h(V_i,extra)>0  then set
537    * *idxp=i and return DB_NOTFOUND. If there is no such i then set
538    * *idx=this->size() and return DB_NOTFOUND. Note: value is of type
539    * omtdataout_t, which may be of type (omtdata_t) or (omtdata_t *) but is
540    * fixed by the instantiation. If it is the value type, then the value is
541    * copied out (even if the value type is a pointer to something else) If it is
542    * the pointer type, then *value is set to a pointer to the data within the
543    * omt. This is determined by the type of the omt as initially declared. If
544    * the omt is declared as omt<foo_t>, then foo_t's will be stored and foo_t's
545    * will be returned by find and related functions. If the omt is declared as
546    * omt<foo_t, foo_t *>, then foo_t's will be stored, and pointers to the
547    * stored items will be returned by find and related functions. Rationale:
548    *  Structs too small for malloc should be stored directly in the omt.
549    *  These structs may need to be edited as they exist inside the omt, so we
550    * need a way to get a pointer within the omt. Using separate functions for
551    * returning pointers and values increases code duplication and reduces
552    * type-checking. That also reduces the ability of the creator of a data
553    * structure to give advice to its future users. Slight overloading in this
554    * case seemed to provide a better API and better type checking.
555    */
556   template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
557   int find_zero(const omtcmp_t &extra, omtdataout_t *const value,
558                 uint32_t *const idxp) const;
559 
560   /**
561    *   Effect:
562    *    If direction >0 then find the smallest i such that h(V_i,extra)>0.
563    *    If direction <0 then find the largest  i such that h(V_i,extra)<0.
564    *    (Direction may not be equal to zero.)
565    *    If value!=NULL then store V_i in *value
566    *    If idxp!=NULL then store i in *idxp.
567    *   Requires: The signum of h is monotically increasing.
568    *   Returns
569    *      0             success
570    *      DB_NOTFOUND   no such value is found.
571    *   On nonzero return, *value and *idxp are unchanged
572    *   Performance: time=O(\log N)
573    *   Rationale:
574    *     Here's how to use the find function to find various things
575    *       Cases for find:
576    *        find first value:         ( h(v)=+1, direction=+1 )
577    *        find last value           ( h(v)=-1, direction=-1 )
578    *        find first X              ( h(v)=(v< x) ? -1 : 1    direction=+1 )
579    *        find last X               ( h(v)=(v<=x) ? -1 : 1    direction=-1 )
580    *        find X or successor to X  ( same as find first X. )
581    *
582    *   Rationale: To help understand heaviside functions and behavor of find:
583    *    There are 7 kinds of heaviside functions.
584    *    The signus of the h must be monotonically increasing.
585    *    Given a function of the following form, A is the element
586    *    returned for direction>0, B is the element returned
587    *    for direction<0, C is the element returned for
588    *    direction==0 (see find_zero) (with a return of 0), and D is the element
589    *    returned for direction==0 (see find_zero) with a return of DB_NOTFOUND.
590    *    If any of A, B, or C are not found, then asking for the
591    *    associated direction will return DB_NOTFOUND.
592    *    See find_zero for more information.
593    *
594    *    Let the following represent the signus of the heaviside function.
595    *
596    *    -...-
597    *        A
598    *         D
599    *
600    *    +...+
601    *    B
602    *    D
603    *
604    *    0...0
605    *    C
606    *
607    *    -...-0...0
608    *        AC
609    *
610    *    0...0+...+
611    *    C    B
612    *
613    *    -...-+...+
614    *        AB
615    *         D
616    *
617    *    -...-0...0+...+
618    *        AC    B
619    */
620   template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
621   int find(const omtcmp_t &extra, int direction, omtdataout_t *const value,
622            uint32_t *const idxp) const;
623 
624   /**
625    * Effect: Return the size (in bytes) of the omt, as it resides in main
626    * memory.  If the data stored are pointers, don't include the size of what
627    * they all point to.
628    */
629   size_t memory_size(void);
630 
631  private:
632   typedef uint32_t node_idx;
633   typedef omt_internal::subtree_templated<supports_marks> subtree;
634   typedef omt_internal::omt_node_templated<omtdata_t, supports_marks> omt_node;
635   ENSURE_POD(subtree);
636 
637   struct omt_array {
638     uint32_t start_idx;
639     uint32_t num_values;
640     omtdata_t *values;
641   };
642 
643   struct omt_tree {
644     subtree root;
645     uint32_t free_idx;
646     omt_node *nodes;
647   };
648 
649   bool is_array;
650   uint32_t capacity;
651   union {
652     struct omt_array a;
653     struct omt_tree t;
654   } d;
655 
656   __attribute__((nonnull)) void unmark(const subtree &subtree,
657                                        const uint32_t index,
658                                        GrowableArray<node_idx> *const indexes);
659 
660   void create_internal_no_array(const uint32_t new_capacity);
661 
662   void create_internal(const uint32_t new_capacity);
663 
664   uint32_t nweight(const subtree &subtree) const;
665 
666   node_idx node_malloc(void);
667 
668   void node_free(const node_idx idx);
669 
670   void maybe_resize_array(const uint32_t n);
671 
672   __attribute__((nonnull)) void fill_array_with_subtree_values(
673       omtdata_t *const array, const subtree &subtree) const;
674 
675   void convert_to_array(void);
676 
677   __attribute__((nonnull)) void rebuild_from_sorted_array(
678       subtree *const subtree, const omtdata_t *const values,
679       const uint32_t numvalues);
680 
681   void convert_to_tree(void);
682 
683   void maybe_resize_or_convert(const uint32_t n);
684 
685   bool will_need_rebalance(const subtree &subtree, const int leftmod,
686                            const int rightmod) const;
687 
688   __attribute__((nonnull)) void insert_internal(
689       subtree *const subtreep, const omtdata_t &value, const uint32_t idx,
690       subtree **const rebalance_subtree);
691 
692   void set_at_internal_array(const omtdata_t &value, const uint32_t idx);
693 
694   void set_at_internal(const subtree &subtree, const omtdata_t &value,
695                        const uint32_t idx);
696 
697   void delete_internal(subtree *const subtreep, const uint32_t idx,
698                        omt_node *const copyn,
699                        subtree **const rebalance_subtree);
700 
701   template <typename iterate_extra_t,
702             int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
703   int iterate_internal_array(const uint32_t left, const uint32_t right,
704                              iterate_extra_t *const iterate_extra) const;
705 
706   template <typename iterate_extra_t,
707             int (*f)(omtdata_t *, const uint32_t, iterate_extra_t *const)>
708   void iterate_ptr_internal(const uint32_t left, const uint32_t right,
709                             const subtree &subtree, const uint32_t idx,
710                             iterate_extra_t *const iterate_extra);
711 
712   template <typename iterate_extra_t,
713             int (*f)(omtdata_t *, const uint32_t, iterate_extra_t *const)>
714   void iterate_ptr_internal_array(const uint32_t left, const uint32_t right,
715                                   iterate_extra_t *const iterate_extra);
716 
717   template <typename iterate_extra_t,
718             int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
719   int iterate_internal(const uint32_t left, const uint32_t right,
720                        const subtree &subtree, const uint32_t idx,
721                        iterate_extra_t *const iterate_extra) const;
722 
723   template <typename iterate_extra_t,
724             int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
725   int iterate_and_mark_range_internal(const uint32_t left, const uint32_t right,
726                                       const subtree &subtree,
727                                       const uint32_t idx,
728                                       iterate_extra_t *const iterate_extra);
729 
730   template <typename iterate_extra_t,
731             int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)>
732   int iterate_over_marked_internal(const subtree &subtree, const uint32_t idx,
733                                    iterate_extra_t *const iterate_extra) const;
734 
735   uint32_t verify_marks_consistent_internal(const subtree &subtree,
736                                             const bool allow_marks) const;
737 
738   void fetch_internal_array(const uint32_t i, omtdataout_t *const value) const;
739 
740   void fetch_internal(const subtree &subtree, const uint32_t i,
741                       omtdataout_t *const value) const;
742 
743   __attribute__((nonnull)) void fill_array_with_subtree_idxs(
744       node_idx *const array, const subtree &subtree) const;
745 
746   __attribute__((nonnull)) void rebuild_subtree_from_idxs(
747       subtree *const subtree, const node_idx *const idxs,
748       const uint32_t numvalues);
749 
750   __attribute__((nonnull)) void rebalance(subtree *const subtree);
751 
752   __attribute__((nonnull)) static void copyout(omtdata_t *const out,
753                                                const omt_node *const n);
754 
755   __attribute__((nonnull)) static void copyout(omtdata_t **const out,
756                                                omt_node *const n);
757 
758   __attribute__((nonnull)) static void copyout(
759       omtdata_t *const out, const omtdata_t *const stored_value_ptr);
760 
761   __attribute__((nonnull)) static void copyout(
762       omtdata_t **const out, omtdata_t *const stored_value_ptr);
763 
764   template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
765   int find_internal_zero_array(const omtcmp_t &extra, omtdataout_t *const value,
766                                uint32_t *const idxp) const;
767 
768   template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
769   int find_internal_zero(const subtree &subtree, const omtcmp_t &extra,
770                          omtdataout_t *const value, uint32_t *const idxp) const;
771 
772   template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
773   int find_internal_plus_array(const omtcmp_t &extra, omtdataout_t *const value,
774                                uint32_t *const idxp) const;
775 
776   template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
777   int find_internal_plus(const subtree &subtree, const omtcmp_t &extra,
778                          omtdataout_t *const value, uint32_t *const idxp) const;
779 
780   template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
781   int find_internal_minus_array(const omtcmp_t &extra,
782                                 omtdataout_t *const value,
783                                 uint32_t *const idxp) const;
784 
785   template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)>
786   int find_internal_minus(const subtree &subtree, const omtcmp_t &extra,
787                           omtdataout_t *const value,
788                           uint32_t *const idxp) const;
789 };
790 
791 }  // namespace toku
792 
793 // include the implementation here
794 #include "omt_impl.h"
795