1 //! An implementation of B trees. The core operations on B trees
2 //! (lookup, iterate, put and del) are generic in the actual
3 //! implementation of nodes, via the [`BTreePage`] and
4 //! [`BTreeMutPage`]. This allows for a simpler code for the
5 //! high-level functions, as well as specialised, high-performance
6 //! implementations for the nodes.
7 //!
8 //! Two different implementations are supplied: one in [`page`] for
9 //! types with a size known at compile-time, which yields denser
10 //! leaves, and hence shallower trees (if the values are really using
11 //! the space). The other one, in [`page_unsized`], is for
12 //! dynamic-sized types, or types whose representation is dynamic, for
13 //! example enums that are `Sized` in Rust, but whose cases vary
14 //! widely in size.
15 use crate::*;
16 #[doc(hidden)]
17 pub mod cursor;
18 pub use cursor::{iter, rev_iter, Cursor, Iter, RevIter};
19 pub mod del;
20 pub use del::del;
21 pub mod put;
22 pub use put::put;
23 pub mod page;
24 pub mod page_unsized;
25 
26 pub trait BTreePage<K: ?Sized, V: ?Sized>: Sized {
27     type Cursor: Clone + Copy + core::fmt::Debug;
28 
29     /// Whether this cursor is at the end of the page.
is_empty(c: &Self::Cursor) -> bool30     fn is_empty(c: &Self::Cursor) -> bool;
31 
32     /// Whether this cursor is strictly before the first element.
is_init(c: &Self::Cursor) -> bool33     fn is_init(c: &Self::Cursor) -> bool;
34 
35     /// Returns a cursor set before the first element of the page
36     /// (i.e. set to -1).
cursor_before(p: &CowPage) -> Self::Cursor37     fn cursor_before(p: &CowPage) -> Self::Cursor;
38 
39     /// Returns a cursor set to the first element of the page
40     /// (i.e. 0). If the page is empty, the returned cursor might be
41     /// empty.
cursor_first(p: &CowPage) -> Self::Cursor42     fn cursor_first(p: &CowPage) -> Self::Cursor {
43         let mut c = Self::cursor_before(p);
44         Self::move_next(&mut c);
45         c
46     }
47 
48     /// Returns a cursor set after the last element of the page
49     /// (i.e. to element n)
cursor_after(p: &CowPage) -> Self::Cursor50     fn cursor_after(p: &CowPage) -> Self::Cursor;
51 
52     /// Returns a cursor set to the last element of the page. If the
53     /// cursor is empty, this is the same as `cursor_before`.
cursor_last(p: &CowPage) -> Self::Cursor54     fn cursor_last(p: &CowPage) -> Self::Cursor {
55         let mut c = Self::cursor_after(p);
56         Self::move_prev(&mut c);
57         c
58     }
59 
60     /// Return the element currently pointed to by the cursor (if the
61     /// cursor is not before or after the elements of the page), and
62     /// move the cursor to the next element.
next<'b, T: LoadPage>( txn: &T, p: Page<'b>, c: &mut Self::Cursor, ) -> Option<(&'b K, &'b V, u64)>63     fn next<'b, T: LoadPage>(
64         txn: &T,
65         p: Page<'b>,
66         c: &mut Self::Cursor,
67     ) -> Option<(&'b K, &'b V, u64)> {
68         if let Some((k, v, r)) = Self::current(txn, p, c) {
69             Self::move_next(c);
70             Some((k, v, r))
71         } else {
72             None
73         }
74     }
75 
76     /// Move the cursor to the previous element, and return that
77     /// element. Note that this is not the symmetric of `next`.
prev<'b, T: LoadPage>( txn: &T, p: Page<'b>, c: &mut Self::Cursor, ) -> Option<(&'b K, &'b V, u64)>78     fn prev<'b, T: LoadPage>(
79         txn: &T,
80         p: Page<'b>,
81         c: &mut Self::Cursor,
82     ) -> Option<(&'b K, &'b V, u64)> {
83         if Self::move_prev(c) {
84             Self::current(txn, p, c)
85         } else {
86             None
87         }
88     }
89 
90     /// Move the cursor to the next position. Returns whether the
91     /// cursor was actually moved (i.e. `true` if and only if the
92     /// cursor isn't already after the last element).
move_next(c: &mut Self::Cursor) -> bool93     fn move_next(c: &mut Self::Cursor) -> bool;
94 
95     /// Move the cursor to the previous position. Returns whether the
96     /// cursor was actually moved (i.e. `true` if and only if the
97     /// cursor isn't strictly before the page).
move_prev(c: &mut Self::Cursor) -> bool98     fn move_prev(c: &mut Self::Cursor) -> bool;
99 
100     /// Returns the current element, if the cursor is pointing at one.
current<'a, T: LoadPage>( txn: &T, p: Page<'a>, c: &Self::Cursor, ) -> Option<(&'a K, &'a V, u64)>101     fn current<'a, T: LoadPage>(
102         txn: &T,
103         p: Page<'a>,
104         c: &Self::Cursor,
105     ) -> Option<(&'a K, &'a V, u64)>;
106 
107     /// Returns the left child of the entry pointed to by the cursor.
left_child(p: Page, c: &Self::Cursor) -> u64108     fn left_child(p: Page, c: &Self::Cursor) -> u64;
109 
110     /// Returns the right child of the entry pointed to by the cursor.
right_child(p: Page, c: &Self::Cursor) -> u64111     fn right_child(p: Page, c: &Self::Cursor) -> u64;
112 
113     /// Sets the cursor to the last element less than or equal to `k0`
114     /// if `v0.is_none()`, and to `(k0, v0)` if `v0.is_some()`.  If a
115     /// match is found (on `k0` only if `v0.is_none()`, on `(k0, v0)`
116     /// else), return the match along with the right child.
117     ///
118     /// Else (in the `Err` case of the `Result`), return the position
119     /// at which `(k0, v0)` can be inserted.
set_cursor<'a, T: LoadPage>( txn: &'a T, page: Page, c: &mut Self::Cursor, k0: &K, v0: Option<&V>, ) -> Result<(&'a K, &'a V, u64), usize>120     fn set_cursor<'a, T: LoadPage>(
121         txn: &'a T,
122         page: Page,
123         c: &mut Self::Cursor,
124         k0: &K,
125         v0: Option<&V>,
126     ) -> Result<(&'a K, &'a V, u64), usize>;
127 
128     /// Splits the cursor into two cursors: the elements strictly
129     /// before the current position, and the elements greater than or
130     /// equal the current element.
split_at(c: &Self::Cursor) -> (Self::Cursor, Self::Cursor)131     fn split_at(c: &Self::Cursor) -> (Self::Cursor, Self::Cursor);
132 }
133 
134 pub struct PageIterator<'a, T: LoadPage, K: ?Sized, V: ?Sized, P: BTreePage<K, V>> {
135     cursor: P::Cursor,
136     txn: &'a T,
137     page: Page<'a>,
138 }
139 
140 impl<'a, T: LoadPage, K: ?Sized + 'a, V: ?Sized + 'a, P: BTreePage<K, V>> Iterator
141     for PageIterator<'a, T, K, V, P>
142 {
143     type Item = (&'a K, &'a V, u64);
next(&mut self) -> Option<Self::Item>144     fn next(&mut self) -> Option<Self::Item> {
145         P::next(self.txn, self.page, &mut self.cursor)
146     }
147 }
148 
149 pub trait BTreeMutPage<K: ?Sized, V: ?Sized>: BTreePage<K, V> + core::fmt::Debug {
150     /// Initialise a page.
init(page: &mut MutPage)151     fn init(page: &mut MutPage);
152 
153     /// Add an entry to the page, possibly splitting the page in the
154     /// process.
155     ///
156     /// Makes the assumption that `k1v1.is_some()` implies
157     /// `replace`. When `k1v1.is_some()`, we insert both `(k0, v0)`
158     /// (as a replacement), followed by `(k1, v1)`. This is only ever
159     /// used when deleting, and when the right child of a deleted
160     /// entry (in an internal node) has split while we were looking
161     /// for a replacement for the deleted entry.
162     ///
163     /// Since we only look for replacements in the right child, the
164     /// left child of the insertion isn't modified, in which case `l`
165     /// and `r` are interpreted as the left and right child of the new
166     /// entry, `k1v1`.
put<'a, T: AllocPage>( txn: &mut T, page: CowPage, mutable: bool, replace: bool, c: &Self::Cursor, k0: &'a K, v0: &'a V, k1v1: Option<(&'a K, &'a V)>, l: u64, r: u64, ) -> Result<crate::btree::put::Put<'a, K, V>, T::Error>167     fn put<'a, T: AllocPage>(
168         txn: &mut T,
169         page: CowPage,
170         mutable: bool,
171         replace: bool,
172         c: &Self::Cursor,
173         k0: &'a K,
174         v0: &'a V,
175         k1v1: Option<(&'a K, &'a V)>,
176         l: u64,
177         r: u64,
178     ) -> Result<crate::btree::put::Put<'a, K, V>, T::Error>;
179 
180     /// Add an entry to `page`, at position `c`. Does not check
181     /// whether there is enough space to do so. This method is mostly
182     /// useful for cloning pages.
183     #[allow(unused_variables)]
put_mut( page: &mut MutPage, c: &mut Self::Cursor, k0: &K, v0: &V, r: u64, )184     unsafe fn put_mut(
185         page: &mut MutPage,
186         c: &mut Self::Cursor,
187         k0: &K,
188         v0: &V,
189         r: u64,
190     );
191 
192     #[allow(unused_variables)]
set_left_child( page: &mut MutPage, c: &Self::Cursor, l: u64 )193     unsafe fn set_left_child(
194         page: &mut MutPage,
195         c: &Self::Cursor,
196         l: u64
197     );
198 
199 
200     /// Update the left child of the position pointed to by the
201     /// cursor.
update_left_child<T: AllocPage>( txn: &mut T, page: CowPage, mutable: bool, c: &Self::Cursor, r: u64, ) -> Result<crate::btree::put::Ok, T::Error>202     fn update_left_child<T: AllocPage>(
203         txn: &mut T,
204         page: CowPage,
205         mutable: bool,
206         c: &Self::Cursor,
207         r: u64,
208     ) -> Result<crate::btree::put::Ok, T::Error>;
209 
210     type Saved;
211 
212     /// Save a leaf entry as a replacement, when we delete at an
213     /// internal node. This can be a pointer to the leaf if the leaf
214     /// isn't mutated, or the actual value, or something else.
save_deleted_leaf_entry(k: &K, v: &V) -> Self::Saved215     fn save_deleted_leaf_entry(k: &K, v: &V) -> Self::Saved;
216 
217     /// Recover the saved value.
from_saved<'a>(s: &Self::Saved) -> (&'a K, &'a V)218     unsafe fn from_saved<'a>(s: &Self::Saved) -> (&'a K, &'a V);
219 
220     /// Delete an entry on the page, returning the resuting page along
221     /// with the offset of the freed page (or 0 if no page was freed,
222     /// i.e. if `page` is mutable).
del<T: AllocPage>( txn: &mut T, page: CowPage, mutable: bool, c: &Self::Cursor, l: u64, ) -> Result<(MutPage, u64), T::Error>223     fn del<T: AllocPage>(
224         txn: &mut T,
225         page: CowPage,
226         mutable: bool,
227         c: &Self::Cursor,
228         l: u64,
229     ) -> Result<(MutPage, u64), T::Error>;
230 
231     /// Merge, rebalance or update a concatenation.
merge_or_rebalance<'a, 'b, T: AllocPage>( txn: &mut T, m: del::Concat<'a, K, V, Self>, ) -> Result<del::Op<'a, T, K, V>, T::Error>232     fn merge_or_rebalance<'a, 'b, T: AllocPage>(
233         txn: &mut T,
234         m: del::Concat<'a, K, V, Self>,
235     ) -> Result<del::Op<'a, T, K, V>, T::Error>;
236 }
237 
238 /// A database, which is essentially just a page offset along with markers.
239 #[derive(Debug)]
240 pub struct Db_<K: ?Sized, V: ?Sized, P: BTreePage<K, V>> {
241     pub db: u64,
242     pub k: core::marker::PhantomData<K>,
243     pub v: core::marker::PhantomData<V>,
244     pub p: core::marker::PhantomData<P>,
245 }
246 
247 /// A database of sized values.
248 pub type Db<K, V> = Db_<K, V, page::Page<K, V>>;
249 
250 /// A database of unsized values.
251 pub type UDb<K, V> = Db_<K, V, page_unsized::Page<K, V>>;
252 
253 impl<K: ?Sized, V: ?Sized, P: BTreePage<K, V>> Db_<K, V, P> {
254     /// Load a database from a page offset.
from_page(db: u64) -> Self255     pub fn from_page(db: u64) -> Self {
256         Db_ {
257             db,
258             k: core::marker::PhantomData,
259             v: core::marker::PhantomData,
260             p: core::marker::PhantomData,
261         }
262     }
263 }
264 
265 /// Create a database with an arbitrary page implementation.
create_db_<T: AllocPage, K: ?Sized, V: ?Sized, P: BTreeMutPage<K, V>>( txn: &mut T, ) -> Result<Db_<K, V, P>, T::Error>266 pub fn create_db_<T: AllocPage, K: ?Sized, V: ?Sized, P: BTreeMutPage<K, V>>(
267     txn: &mut T,
268 ) -> Result<Db_<K, V, P>, T::Error> {
269     let mut page = txn.alloc_page()?;
270     P::init(&mut page);
271     Ok(Db_ {
272         db: page.0.offset,
273         k: core::marker::PhantomData,
274         v: core::marker::PhantomData,
275         p: core::marker::PhantomData,
276     })
277 }
278 
279 /// Create a database for sized keys and values.
create_db<T: AllocPage, K: Storable, V: Storable>( txn: &mut T, ) -> Result<Db_<K, V, page::Page<K, V>>, T::Error>280 pub fn create_db<T: AllocPage, K: Storable, V: Storable>(
281     txn: &mut T,
282 ) -> Result<Db_<K, V, page::Page<K, V>>, T::Error> {
283     create_db_(txn)
284 }
285 
286 /// Fork a database.
fork_db<T: AllocPage, K: Storable + ?Sized, V: Storable + ?Sized, P: BTreeMutPage<K, V>>( txn: &mut T, db: &Db_<K, V, P>, ) -> Result<Db_<K, V, P>, T::Error>287 pub fn fork_db<T: AllocPage, K: Storable + ?Sized, V: Storable + ?Sized, P: BTreeMutPage<K, V>>(
288     txn: &mut T,
289     db: &Db_<K, V, P>,
290 ) -> Result<Db_<K, V, P>, T::Error> {
291     txn.incr_rc(db.db)?;
292     Ok(Db_ {
293         db: db.db,
294         k: core::marker::PhantomData,
295         v: core::marker::PhantomData,
296         p: core::marker::PhantomData,
297     })
298 }
299 
300 /// Get the first entry of a database greater than or equal to `k` (or
301 /// to `(k, v)` if `v.is_some()`).
get<'a, T: LoadPage, K: Storable + ?Sized, V: Storable + ?Sized, P: BTreePage<K, V>>( txn: &'a T, db: &Db_<K, V, P>, k: &K, v: Option<&V>, ) -> Result<Option<(&'a K, &'a V)>, T::Error>302 pub fn get<'a, T: LoadPage, K: Storable + ?Sized, V: Storable + ?Sized, P: BTreePage<K, V>>(
303     txn: &'a T,
304     db: &Db_<K, V, P>,
305     k: &K,
306     v: Option<&V>,
307 ) -> Result<Option<(&'a K, &'a V)>, T::Error> {
308     // Set the "cursor stack" by setting a skip list cursor in
309     // each page from the root to the appropriate leaf.
310     let mut last_match = None;
311     let mut page = txn.load_page(db.db)?;
312     // This is a for loop, to allow the compiler to unroll (maybe).
313     for _ in 0..cursor::N_CURSORS {
314         let mut cursor = P::cursor_before(&page);
315         if let Ok((kk, vv, _)) = P::set_cursor(txn, page.as_page(), &mut cursor, k, v) {
316             if v.is_some() {
317                 return Ok(Some((kk, vv)));
318             }
319             last_match = Some((kk, vv));
320         } else if let Some((k, v, _)) = P::current(txn, page.as_page(), &cursor) {
321             // Here, Rust says that `k` and `v` have the same lifetime
322             // as `page`, but we actually know that they're alive for
323             // as long as `txn` doesn't change, so we can safely
324             // extend their lifetimes:
325             unsafe { last_match = Some((core::mem::transmute(k), core::mem::transmute(v))) }
326         }
327         // The cursor is set to the first element greater than or
328         // equal to the (k, v) we're looking for, so we need to
329         // explore the left subtree.
330         let next_page = P::left_child(page.as_page(), &cursor);
331         if next_page > 0 {
332             page = txn.load_page(next_page)?;
333         } else {
334             break;
335         }
336     }
337     Ok(last_match)
338 }
339 
340 /// Drop a database recursively, dropping all referenced keys and
341 /// values that aren't shared with other databases.
drop<T: AllocPage, K: Storable + ?Sized, V: Storable + ?Sized, P: BTreePage<K, V>>( txn: &mut T, db: Db_<K, V, P>, ) -> Result<(), T::Error>342 pub fn drop<T: AllocPage, K: Storable + ?Sized, V: Storable + ?Sized, P: BTreePage<K, V>>(
343     txn: &mut T,
344     db: Db_<K, V, P>,
345 ) -> Result<(), T::Error> {
346     // In order to make this function tail-recursive, we simulate a
347     // stack with the following:
348     use core::mem::MaybeUninit;
349     let mut stack: [MaybeUninit<cursor::PageCursor<K, V, P>>; cursor::N_CURSORS] = [
350         MaybeUninit::uninit(),
351         MaybeUninit::uninit(),
352         MaybeUninit::uninit(),
353         MaybeUninit::uninit(),
354         MaybeUninit::uninit(),
355         MaybeUninit::uninit(),
356         MaybeUninit::uninit(),
357         MaybeUninit::uninit(),
358         MaybeUninit::uninit(),
359         MaybeUninit::uninit(),
360         MaybeUninit::uninit(),
361         MaybeUninit::uninit(),
362         MaybeUninit::uninit(),
363         MaybeUninit::uninit(),
364         MaybeUninit::uninit(),
365         MaybeUninit::uninit(),
366         MaybeUninit::uninit(),
367         MaybeUninit::uninit(),
368         MaybeUninit::uninit(),
369         MaybeUninit::uninit(),
370         MaybeUninit::uninit(),
371         MaybeUninit::uninit(),
372         MaybeUninit::uninit(),
373         MaybeUninit::uninit(),
374         MaybeUninit::uninit(),
375         MaybeUninit::uninit(),
376         MaybeUninit::uninit(),
377         MaybeUninit::uninit(),
378         MaybeUninit::uninit(),
379         MaybeUninit::uninit(),
380         MaybeUninit::uninit(),
381         MaybeUninit::uninit(),
382         MaybeUninit::uninit(),
383     ];
384     let mut ptr = 0;
385 
386     // Push the root page of `db` onto the stack.
387     let page = txn.load_page(db.db)?;
388     stack[0] = MaybeUninit::new(cursor::PageCursor {
389         cursor: P::cursor_before(&page),
390         page,
391     });
392 
393     // Then perform a DFS:
394     loop {
395         // Look at the top element of the stack.
396         let cur = unsafe { &mut *stack[ptr].as_mut_ptr() };
397         // If it needs to be dropped (i.e. if its RC is <= 1), iterate
398         // its cursor and drop all its referenced pages.
399         let rc = txn.rc(cur.page.offset)?;
400         if rc <= 1 {
401             // if there's a current element in the cursor (i.e. we
402             // aren't before or after the elements), decrease its RC.
403             if let Some((k, v, _)) = P::current(txn, cur.page.as_page(), &cur.cursor) {
404                 for o in k.page_references().chain(v.page_references()) {
405                     txn.decr_rc(o)?;
406                 }
407             }
408             // In all cases, move next and push the left child onto
409             // the stack. This works because pushed cursors are
410             // initially set to before the page's elements.
411             if P::move_next(&mut cur.cursor) {
412                 let r = P::left_child(cur.page.as_page(), &cur.cursor);
413                 if r > 0 {
414                     ptr += 1;
415                     let page = txn.load_page(r)?;
416                     stack[ptr] = MaybeUninit::new(cursor::PageCursor {
417                         cursor: P::cursor_before(&page),
418                         page,
419                     })
420                 }
421                 continue;
422             }
423         }
424         // Here, either rc > 1 (in which case the only thing we need
425         // to do in this iteration is to decrement the RC), or else
426         // `P::move_next` returned `false`, meaning that the cursor is
427         // after the last element (in which case we are done with this
428         // page, and also need to decrement its RC, in order to free
429         // it).
430         if cur.page.is_dirty() {
431             txn.decr_rc_owned(cur.page.offset)?;
432         } else {
433             txn.decr_rc(cur.page.offset)?;
434         }
435         // If this was the bottom element of the stack, stop, else, pop.
436         if ptr == 0 {
437             break;
438         } else {
439             ptr -= 1;
440         }
441     }
442     Ok(())
443 }
444