1 //! An implementation of B trees. The core operations on B trees
2 //! (lookup, iterate, put and del) are generic in the actual
3 //! implementation of nodes, via the [`BTreePage`] and
4 //! [`BTreeMutPage`]. This allows for a simpler code for the
5 //! high-level functions, as well as specialised, high-performance
6 //! implementations for the nodes.
7 //!
8 //! Two different implementations are supplied: one in [`page`] for
9 //! types with a size known at compile-time, which yields denser
10 //! leaves, and hence shallower trees (if the values are really using
11 //! the space). The other one, in [`page_unsized`], is for
12 //! dynamic-sized types, or types whose representation is dynamic, for
13 //! example enums that are `Sized` in Rust, but whose cases vary
14 //! widely in size.
15 use crate::*;
16 #[doc(hidden)]
17 pub mod cursor;
18 pub use cursor::{iter, rev_iter, Cursor, Iter, RevIter};
19 pub mod del;
20 pub use del::del;
21 pub mod put;
22 pub use put::put;
23 pub mod page;
24 pub mod page_unsized;
25
26 pub trait BTreePage<K: ?Sized, V: ?Sized>: Sized {
27 type Cursor: Clone + Copy + core::fmt::Debug;
28
29 /// Whether this cursor is at the end of the page.
is_empty(c: &Self::Cursor) -> bool30 fn is_empty(c: &Self::Cursor) -> bool;
31
32 /// Whether this cursor is strictly before the first element.
is_init(c: &Self::Cursor) -> bool33 fn is_init(c: &Self::Cursor) -> bool;
34
35 /// Returns a cursor set before the first element of the page
36 /// (i.e. set to -1).
cursor_before(p: &CowPage) -> Self::Cursor37 fn cursor_before(p: &CowPage) -> Self::Cursor;
38
39 /// Returns a cursor set to the first element of the page
40 /// (i.e. 0). If the page is empty, the returned cursor might be
41 /// empty.
cursor_first(p: &CowPage) -> Self::Cursor42 fn cursor_first(p: &CowPage) -> Self::Cursor {
43 let mut c = Self::cursor_before(p);
44 Self::move_next(&mut c);
45 c
46 }
47
48 /// Returns a cursor set after the last element of the page
49 /// (i.e. to element n)
cursor_after(p: &CowPage) -> Self::Cursor50 fn cursor_after(p: &CowPage) -> Self::Cursor;
51
52 /// Returns a cursor set to the last element of the page. If the
53 /// cursor is empty, this is the same as `cursor_before`.
cursor_last(p: &CowPage) -> Self::Cursor54 fn cursor_last(p: &CowPage) -> Self::Cursor {
55 let mut c = Self::cursor_after(p);
56 Self::move_prev(&mut c);
57 c
58 }
59
60 /// Return the element currently pointed to by the cursor (if the
61 /// cursor is not before or after the elements of the page), and
62 /// move the cursor to the next element.
next<'b, T: LoadPage>( txn: &T, p: Page<'b>, c: &mut Self::Cursor, ) -> Option<(&'b K, &'b V, u64)>63 fn next<'b, T: LoadPage>(
64 txn: &T,
65 p: Page<'b>,
66 c: &mut Self::Cursor,
67 ) -> Option<(&'b K, &'b V, u64)> {
68 if let Some((k, v, r)) = Self::current(txn, p, c) {
69 Self::move_next(c);
70 Some((k, v, r))
71 } else {
72 None
73 }
74 }
75
76 /// Move the cursor to the previous element, and return that
77 /// element. Note that this is not the symmetric of `next`.
prev<'b, T: LoadPage>( txn: &T, p: Page<'b>, c: &mut Self::Cursor, ) -> Option<(&'b K, &'b V, u64)>78 fn prev<'b, T: LoadPage>(
79 txn: &T,
80 p: Page<'b>,
81 c: &mut Self::Cursor,
82 ) -> Option<(&'b K, &'b V, u64)> {
83 if Self::move_prev(c) {
84 Self::current(txn, p, c)
85 } else {
86 None
87 }
88 }
89
90 /// Move the cursor to the next position. Returns whether the
91 /// cursor was actually moved (i.e. `true` if and only if the
92 /// cursor isn't already after the last element).
move_next(c: &mut Self::Cursor) -> bool93 fn move_next(c: &mut Self::Cursor) -> bool;
94
95 /// Move the cursor to the previous position. Returns whether the
96 /// cursor was actually moved (i.e. `true` if and only if the
97 /// cursor isn't strictly before the page).
move_prev(c: &mut Self::Cursor) -> bool98 fn move_prev(c: &mut Self::Cursor) -> bool;
99
100 /// Returns the current element, if the cursor is pointing at one.
current<'a, T: LoadPage>( txn: &T, p: Page<'a>, c: &Self::Cursor, ) -> Option<(&'a K, &'a V, u64)>101 fn current<'a, T: LoadPage>(
102 txn: &T,
103 p: Page<'a>,
104 c: &Self::Cursor,
105 ) -> Option<(&'a K, &'a V, u64)>;
106
107 /// Returns the left child of the entry pointed to by the cursor.
left_child(p: Page, c: &Self::Cursor) -> u64108 fn left_child(p: Page, c: &Self::Cursor) -> u64;
109
110 /// Returns the right child of the entry pointed to by the cursor.
right_child(p: Page, c: &Self::Cursor) -> u64111 fn right_child(p: Page, c: &Self::Cursor) -> u64;
112
113 /// Sets the cursor to the last element less than or equal to `k0`
114 /// if `v0.is_none()`, and to `(k0, v0)` if `v0.is_some()`. If a
115 /// match is found (on `k0` only if `v0.is_none()`, on `(k0, v0)`
116 /// else), return the match along with the right child.
117 ///
118 /// Else (in the `Err` case of the `Result`), return the position
119 /// at which `(k0, v0)` can be inserted.
set_cursor<'a, T: LoadPage>( txn: &'a T, page: Page, c: &mut Self::Cursor, k0: &K, v0: Option<&V>, ) -> Result<(&'a K, &'a V, u64), usize>120 fn set_cursor<'a, T: LoadPage>(
121 txn: &'a T,
122 page: Page,
123 c: &mut Self::Cursor,
124 k0: &K,
125 v0: Option<&V>,
126 ) -> Result<(&'a K, &'a V, u64), usize>;
127
128 /// Splits the cursor into two cursors: the elements strictly
129 /// before the current position, and the elements greater than or
130 /// equal the current element.
split_at(c: &Self::Cursor) -> (Self::Cursor, Self::Cursor)131 fn split_at(c: &Self::Cursor) -> (Self::Cursor, Self::Cursor);
132 }
133
134 pub struct PageIterator<'a, T: LoadPage, K: ?Sized, V: ?Sized, P: BTreePage<K, V>> {
135 cursor: P::Cursor,
136 txn: &'a T,
137 page: Page<'a>,
138 }
139
140 impl<'a, T: LoadPage, K: ?Sized + 'a, V: ?Sized + 'a, P: BTreePage<K, V>> Iterator
141 for PageIterator<'a, T, K, V, P>
142 {
143 type Item = (&'a K, &'a V, u64);
next(&mut self) -> Option<Self::Item>144 fn next(&mut self) -> Option<Self::Item> {
145 P::next(self.txn, self.page, &mut self.cursor)
146 }
147 }
148
149 pub trait BTreeMutPage<K: ?Sized, V: ?Sized>: BTreePage<K, V> + core::fmt::Debug {
150 /// Initialise a page.
init(page: &mut MutPage)151 fn init(page: &mut MutPage);
152
153 /// Add an entry to the page, possibly splitting the page in the
154 /// process.
155 ///
156 /// Makes the assumption that `k1v1.is_some()` implies
157 /// `replace`. When `k1v1.is_some()`, we insert both `(k0, v0)`
158 /// (as a replacement), followed by `(k1, v1)`. This is only ever
159 /// used when deleting, and when the right child of a deleted
160 /// entry (in an internal node) has split while we were looking
161 /// for a replacement for the deleted entry.
162 ///
163 /// Since we only look for replacements in the right child, the
164 /// left child of the insertion isn't modified, in which case `l`
165 /// and `r` are interpreted as the left and right child of the new
166 /// entry, `k1v1`.
put<'a, T: AllocPage>( txn: &mut T, page: CowPage, mutable: bool, replace: bool, c: &Self::Cursor, k0: &'a K, v0: &'a V, k1v1: Option<(&'a K, &'a V)>, l: u64, r: u64, ) -> Result<crate::btree::put::Put<'a, K, V>, T::Error>167 fn put<'a, T: AllocPage>(
168 txn: &mut T,
169 page: CowPage,
170 mutable: bool,
171 replace: bool,
172 c: &Self::Cursor,
173 k0: &'a K,
174 v0: &'a V,
175 k1v1: Option<(&'a K, &'a V)>,
176 l: u64,
177 r: u64,
178 ) -> Result<crate::btree::put::Put<'a, K, V>, T::Error>;
179
180 /// Add an entry to `page`, at position `c`. Does not check
181 /// whether there is enough space to do so. This method is mostly
182 /// useful for cloning pages.
183 #[allow(unused_variables)]
put_mut( page: &mut MutPage, c: &mut Self::Cursor, k0: &K, v0: &V, r: u64, )184 unsafe fn put_mut(
185 page: &mut MutPage,
186 c: &mut Self::Cursor,
187 k0: &K,
188 v0: &V,
189 r: u64,
190 );
191
192 #[allow(unused_variables)]
set_left_child( page: &mut MutPage, c: &Self::Cursor, l: u64 )193 unsafe fn set_left_child(
194 page: &mut MutPage,
195 c: &Self::Cursor,
196 l: u64
197 );
198
199
200 /// Update the left child of the position pointed to by the
201 /// cursor.
update_left_child<T: AllocPage>( txn: &mut T, page: CowPage, mutable: bool, c: &Self::Cursor, r: u64, ) -> Result<crate::btree::put::Ok, T::Error>202 fn update_left_child<T: AllocPage>(
203 txn: &mut T,
204 page: CowPage,
205 mutable: bool,
206 c: &Self::Cursor,
207 r: u64,
208 ) -> Result<crate::btree::put::Ok, T::Error>;
209
210 type Saved;
211
212 /// Save a leaf entry as a replacement, when we delete at an
213 /// internal node. This can be a pointer to the leaf if the leaf
214 /// isn't mutated, or the actual value, or something else.
save_deleted_leaf_entry(k: &K, v: &V) -> Self::Saved215 fn save_deleted_leaf_entry(k: &K, v: &V) -> Self::Saved;
216
217 /// Recover the saved value.
from_saved<'a>(s: &Self::Saved) -> (&'a K, &'a V)218 unsafe fn from_saved<'a>(s: &Self::Saved) -> (&'a K, &'a V);
219
220 /// Delete an entry on the page, returning the resuting page along
221 /// with the offset of the freed page (or 0 if no page was freed,
222 /// i.e. if `page` is mutable).
del<T: AllocPage>( txn: &mut T, page: CowPage, mutable: bool, c: &Self::Cursor, l: u64, ) -> Result<(MutPage, u64), T::Error>223 fn del<T: AllocPage>(
224 txn: &mut T,
225 page: CowPage,
226 mutable: bool,
227 c: &Self::Cursor,
228 l: u64,
229 ) -> Result<(MutPage, u64), T::Error>;
230
231 /// Merge, rebalance or update a concatenation.
merge_or_rebalance<'a, 'b, T: AllocPage>( txn: &mut T, m: del::Concat<'a, K, V, Self>, ) -> Result<del::Op<'a, T, K, V>, T::Error>232 fn merge_or_rebalance<'a, 'b, T: AllocPage>(
233 txn: &mut T,
234 m: del::Concat<'a, K, V, Self>,
235 ) -> Result<del::Op<'a, T, K, V>, T::Error>;
236 }
237
238 /// A database, which is essentially just a page offset along with markers.
239 #[derive(Debug)]
240 pub struct Db_<K: ?Sized, V: ?Sized, P: BTreePage<K, V>> {
241 pub db: u64,
242 pub k: core::marker::PhantomData<K>,
243 pub v: core::marker::PhantomData<V>,
244 pub p: core::marker::PhantomData<P>,
245 }
246
247 /// A database of sized values.
248 pub type Db<K, V> = Db_<K, V, page::Page<K, V>>;
249
250 /// A database of unsized values.
251 pub type UDb<K, V> = Db_<K, V, page_unsized::Page<K, V>>;
252
253 impl<K: ?Sized, V: ?Sized, P: BTreePage<K, V>> Db_<K, V, P> {
254 /// Load a database from a page offset.
from_page(db: u64) -> Self255 pub fn from_page(db: u64) -> Self {
256 Db_ {
257 db,
258 k: core::marker::PhantomData,
259 v: core::marker::PhantomData,
260 p: core::marker::PhantomData,
261 }
262 }
263 }
264
265 /// Create a database with an arbitrary page implementation.
create_db_<T: AllocPage, K: ?Sized, V: ?Sized, P: BTreeMutPage<K, V>>( txn: &mut T, ) -> Result<Db_<K, V, P>, T::Error>266 pub fn create_db_<T: AllocPage, K: ?Sized, V: ?Sized, P: BTreeMutPage<K, V>>(
267 txn: &mut T,
268 ) -> Result<Db_<K, V, P>, T::Error> {
269 let mut page = txn.alloc_page()?;
270 P::init(&mut page);
271 Ok(Db_ {
272 db: page.0.offset,
273 k: core::marker::PhantomData,
274 v: core::marker::PhantomData,
275 p: core::marker::PhantomData,
276 })
277 }
278
279 /// Create a database for sized keys and values.
create_db<T: AllocPage, K: Storable, V: Storable>( txn: &mut T, ) -> Result<Db_<K, V, page::Page<K, V>>, T::Error>280 pub fn create_db<T: AllocPage, K: Storable, V: Storable>(
281 txn: &mut T,
282 ) -> Result<Db_<K, V, page::Page<K, V>>, T::Error> {
283 create_db_(txn)
284 }
285
286 /// Fork a database.
fork_db<T: AllocPage, K: Storable + ?Sized, V: Storable + ?Sized, P: BTreeMutPage<K, V>>( txn: &mut T, db: &Db_<K, V, P>, ) -> Result<Db_<K, V, P>, T::Error>287 pub fn fork_db<T: AllocPage, K: Storable + ?Sized, V: Storable + ?Sized, P: BTreeMutPage<K, V>>(
288 txn: &mut T,
289 db: &Db_<K, V, P>,
290 ) -> Result<Db_<K, V, P>, T::Error> {
291 txn.incr_rc(db.db)?;
292 Ok(Db_ {
293 db: db.db,
294 k: core::marker::PhantomData,
295 v: core::marker::PhantomData,
296 p: core::marker::PhantomData,
297 })
298 }
299
300 /// Get the first entry of a database greater than or equal to `k` (or
301 /// to `(k, v)` if `v.is_some()`).
get<'a, T: LoadPage, K: Storable + ?Sized, V: Storable + ?Sized, P: BTreePage<K, V>>( txn: &'a T, db: &Db_<K, V, P>, k: &K, v: Option<&V>, ) -> Result<Option<(&'a K, &'a V)>, T::Error>302 pub fn get<'a, T: LoadPage, K: Storable + ?Sized, V: Storable + ?Sized, P: BTreePage<K, V>>(
303 txn: &'a T,
304 db: &Db_<K, V, P>,
305 k: &K,
306 v: Option<&V>,
307 ) -> Result<Option<(&'a K, &'a V)>, T::Error> {
308 // Set the "cursor stack" by setting a skip list cursor in
309 // each page from the root to the appropriate leaf.
310 let mut last_match = None;
311 let mut page = txn.load_page(db.db)?;
312 // This is a for loop, to allow the compiler to unroll (maybe).
313 for _ in 0..cursor::N_CURSORS {
314 let mut cursor = P::cursor_before(&page);
315 if let Ok((kk, vv, _)) = P::set_cursor(txn, page.as_page(), &mut cursor, k, v) {
316 if v.is_some() {
317 return Ok(Some((kk, vv)));
318 }
319 last_match = Some((kk, vv));
320 } else if let Some((k, v, _)) = P::current(txn, page.as_page(), &cursor) {
321 // Here, Rust says that `k` and `v` have the same lifetime
322 // as `page`, but we actually know that they're alive for
323 // as long as `txn` doesn't change, so we can safely
324 // extend their lifetimes:
325 unsafe { last_match = Some((core::mem::transmute(k), core::mem::transmute(v))) }
326 }
327 // The cursor is set to the first element greater than or
328 // equal to the (k, v) we're looking for, so we need to
329 // explore the left subtree.
330 let next_page = P::left_child(page.as_page(), &cursor);
331 if next_page > 0 {
332 page = txn.load_page(next_page)?;
333 } else {
334 break;
335 }
336 }
337 Ok(last_match)
338 }
339
340 /// Drop a database recursively, dropping all referenced keys and
341 /// values that aren't shared with other databases.
drop<T: AllocPage, K: Storable + ?Sized, V: Storable + ?Sized, P: BTreePage<K, V>>( txn: &mut T, db: Db_<K, V, P>, ) -> Result<(), T::Error>342 pub fn drop<T: AllocPage, K: Storable + ?Sized, V: Storable + ?Sized, P: BTreePage<K, V>>(
343 txn: &mut T,
344 db: Db_<K, V, P>,
345 ) -> Result<(), T::Error> {
346 // In order to make this function tail-recursive, we simulate a
347 // stack with the following:
348 use core::mem::MaybeUninit;
349 let mut stack: [MaybeUninit<cursor::PageCursor<K, V, P>>; cursor::N_CURSORS] = [
350 MaybeUninit::uninit(),
351 MaybeUninit::uninit(),
352 MaybeUninit::uninit(),
353 MaybeUninit::uninit(),
354 MaybeUninit::uninit(),
355 MaybeUninit::uninit(),
356 MaybeUninit::uninit(),
357 MaybeUninit::uninit(),
358 MaybeUninit::uninit(),
359 MaybeUninit::uninit(),
360 MaybeUninit::uninit(),
361 MaybeUninit::uninit(),
362 MaybeUninit::uninit(),
363 MaybeUninit::uninit(),
364 MaybeUninit::uninit(),
365 MaybeUninit::uninit(),
366 MaybeUninit::uninit(),
367 MaybeUninit::uninit(),
368 MaybeUninit::uninit(),
369 MaybeUninit::uninit(),
370 MaybeUninit::uninit(),
371 MaybeUninit::uninit(),
372 MaybeUninit::uninit(),
373 MaybeUninit::uninit(),
374 MaybeUninit::uninit(),
375 MaybeUninit::uninit(),
376 MaybeUninit::uninit(),
377 MaybeUninit::uninit(),
378 MaybeUninit::uninit(),
379 MaybeUninit::uninit(),
380 MaybeUninit::uninit(),
381 MaybeUninit::uninit(),
382 MaybeUninit::uninit(),
383 ];
384 let mut ptr = 0;
385
386 // Push the root page of `db` onto the stack.
387 let page = txn.load_page(db.db)?;
388 stack[0] = MaybeUninit::new(cursor::PageCursor {
389 cursor: P::cursor_before(&page),
390 page,
391 });
392
393 // Then perform a DFS:
394 loop {
395 // Look at the top element of the stack.
396 let cur = unsafe { &mut *stack[ptr].as_mut_ptr() };
397 // If it needs to be dropped (i.e. if its RC is <= 1), iterate
398 // its cursor and drop all its referenced pages.
399 let rc = txn.rc(cur.page.offset)?;
400 if rc <= 1 {
401 // if there's a current element in the cursor (i.e. we
402 // aren't before or after the elements), decrease its RC.
403 if let Some((k, v, _)) = P::current(txn, cur.page.as_page(), &cur.cursor) {
404 for o in k.page_references().chain(v.page_references()) {
405 txn.decr_rc(o)?;
406 }
407 }
408 // In all cases, move next and push the left child onto
409 // the stack. This works because pushed cursors are
410 // initially set to before the page's elements.
411 if P::move_next(&mut cur.cursor) {
412 let r = P::left_child(cur.page.as_page(), &cur.cursor);
413 if r > 0 {
414 ptr += 1;
415 let page = txn.load_page(r)?;
416 stack[ptr] = MaybeUninit::new(cursor::PageCursor {
417 cursor: P::cursor_before(&page),
418 page,
419 })
420 }
421 continue;
422 }
423 }
424 // Here, either rc > 1 (in which case the only thing we need
425 // to do in this iteration is to decrement the RC), or else
426 // `P::move_next` returned `false`, meaning that the cursor is
427 // after the last element (in which case we are done with this
428 // page, and also need to decrement its RC, in order to free
429 // it).
430 if cur.page.is_dirty() {
431 txn.decr_rc_owned(cur.page.offset)?;
432 } else {
433 txn.decr_rc(cur.page.offset)?;
434 }
435 // If this was the bottom element of the stack, stop, else, pop.
436 if ptr == 0 {
437 break;
438 } else {
439 ptr -= 1;
440 }
441 }
442 Ok(())
443 }
444