1 /* $NetBSD: slmdb.c,v 1.4 2022/10/08 16:12:50 christos Exp $ */
2
3 /*++
4 /* NAME
5 /* slmdb 3
6 /* SUMMARY
7 /* Simplified LMDB API
8 /* SYNOPSIS
9 /* #include <slmdb.h>
10 /*
11 /* int slmdb_init(slmdb, curr_limit, size_incr, hard_limit)
12 /* SLMDB *slmdb;
13 /* size_t curr_limit;
14 /* int size_incr;
15 /* size_t hard_limit;
16 /*
17 /* int slmdb_open(slmdb, path, open_flags, lmdb_flags, slmdb_flags)
18 /* SLMDB *slmdb;
19 /* const char *path;
20 /* int open_flags;
21 /* int lmdb_flags;
22 /* int slmdb_flags;
23 /*
24 /* int slmdb_close(slmdb)
25 /* SLMDB *slmdb;
26 /*
27 /* int slmdb_get(slmdb, mdb_key, mdb_value)
28 /* SLMDB *slmdb;
29 /* MDB_val *mdb_key;
30 /* MDB_val *mdb_value;
31 /*
32 /* int slmdb_put(slmdb, mdb_key, mdb_value, flags)
33 /* SLMDB *slmdb;
34 /* MDB_val *mdb_key;
35 /* MDB_val *mdb_value;
36 /* int flags;
37 /*
38 /* int slmdb_del(slmdb, mdb_key)
39 /* SLMDB *slmdb;
40 /* MDB_val *mdb_key;
41 /*
42 /* int slmdb_cursor_get(slmdb, mdb_key, mdb_value, op)
43 /* SLMDB *slmdb;
44 /* MDB_val *mdb_key;
45 /* MDB_val *mdb_value;
46 /* MDB_cursor_op op;
47 /* AUXILIARY FUNCTIONS
48 /* int slmdb_fd(slmdb)
49 /* SLMDB *slmdb;
50 /*
51 /* size_t slmdb_curr_limit(slmdb)
52 /* SLMDB *slmdb;
53 /*
54 /* int slmdb_control(slmdb, request, ...)
55 /* SLMDB *slmdb;
56 /* int request;
57 /* DESCRIPTION
58 /* This module simplifies the LMDB API by hiding recoverable
59 /* errors from the application. Details are given in the
60 /* section "ERROR RECOVERY".
61 /*
62 /* slmdb_init() performs mandatory initialization before opening
63 /* an LMDB database. The result value is an LMDB status code
64 /* (zero in case of success).
65 /*
66 /* slmdb_open() opens an LMDB database. The result value is
67 /* an LMDB status code (zero in case of success).
68 /*
69 /* slmdb_close() finalizes an optional bulk-mode transaction
70 /* and closes a successfully-opened LMDB database. The result
71 /* value is an LMDB status code (zero in case of success).
72 /*
73 /* slmdb_get() is an mdb_get() wrapper with automatic error
74 /* recovery. The result value is an LMDB status code (zero
75 /* in case of success).
76 /*
77 /* slmdb_put() is an mdb_put() wrapper with automatic error
78 /* recovery. The result value is an LMDB status code (zero
79 /* in case of success).
80 /*
81 /* slmdb_del() is an mdb_del() wrapper with automatic error
82 /* recovery. The result value is an LMDB status code (zero
83 /* in case of success).
84 /*
85 /* slmdb_cursor_get() is an mdb_cursor_get() wrapper with
86 /* automatic error recovery. The result value is an LMDB
87 /* status code (zero in case of success). This wrapper supports
88 /* only one cursor per database.
89 /*
90 /* slmdb_fd() returns the file descriptor for the specified
91 /* database. This may be used for file status queries or
92 /* application-controlled locking.
93 /*
94 /* slmdb_curr_limit() returns the current database size limit
95 /* for the specified database.
96 /*
97 /* slmdb_control() specifies optional features. The result is
98 /* an LMDB status code (zero in case of success).
99 /*
100 /* Arguments:
101 /* .IP slmdb
102 /* Pointer to caller-provided storage.
103 /* .IP curr_limit
104 /* The initial memory mapping size limit. This limit is
105 /* automatically increased when the database becomes full.
106 /* .IP size_incr
107 /* An integer factor by which the memory mapping size limit
108 /* is increased when the database becomes full.
109 /* .IP hard_limit
110 /* The upper bound for the memory mapping size limit.
111 /* .IP path
112 /* LMDB database pathname.
113 /* .IP open_flags
114 /* Flags that control file open operations. Do not specify
115 /* locking flags here.
116 /* .IP lmdb_flags
117 /* Flags that control the LMDB environment. If MDB_NOLOCK is
118 /* specified, then each slmdb_get() or slmdb_cursor_get() call
119 /* must be protected with a shared (or exclusive) external lock,
120 /* and each slmdb_put() or slmdb_del() call must be protected
121 /* with an exclusive external lock. A lock may be released
122 /* after the call returns. A writer may atomically downgrade
123 /* an exclusive lock to shared, but it must obtain an exclusive
124 /* lock before making another slmdb(3) write request.
125 /* .sp
126 /* Note: when a database is opened with MDB_NOLOCK, external
127 /* locks such as fcntl() do not protect slmdb(3) requests
128 /* within the same process against each other. If a program
129 /* cannot avoid making simultaneous slmdb(3) requests, then
130 /* it must synchronize these requests with in-process locks,
131 /* in addition to the per-process fcntl(2) locks.
132 /* .IP slmdb_flags
133 /* Bit-wise OR of zero or more of the following:
134 /* .RS
135 /* .IP SLMDB_FLAG_BULK
136 /* Open the database and create a "bulk" transaction that is
137 /* committed when the database is closed. If MDB_NOLOCK is
138 /* specified, then the entire transaction must be protected
139 /* with a persistent external lock. All slmdb_get(), slmdb_put()
140 /* and slmdb_del() requests will be directed to the "bulk"
141 /* transaction.
142 /* .RE
143 /* .IP mdb_key
144 /* Pointer to caller-provided lookup key storage.
145 /* .IP mdb_value
146 /* Pointer to caller-provided value storage.
147 /* .IP op
148 /* LMDB cursor operation.
149 /* .IP request
150 /* The start of a list of (name, value) pairs, terminated with
151 /* CA_SLMDB_CTL_END. The following text enumerates the symbolic
152 /* request names and the corresponding argument types.
153 /* .RS
154 /* .IP "CA_SLMDB_CTL_LONGJMP_FN(void (*)(void *, int))"
155 /* Call-back function pointer. The function is called to repeat
156 /* a failed bulk-mode transaction from the start. The arguments
157 /* are the application context and the setjmp() or sigsetjmp()
158 /* result value.
159 /* .IP "CA_SLMDB_CTL_NOTIFY_FN(void (*)(void *, int, ...))"
160 /* Call-back function pointer. The function is called to report
161 /* successful error recovery. The arguments are the application
162 /* context, the MDB error code, and additional arguments that
163 /* depend on the error code. Details are given in the section
164 /* "ERROR RECOVERY".
165 /* .IP "CA_SLMDB_CTL_ASSERT_FN(void (*)(void *, const char *))"
166 /* Call-back function pointer. The function is called to
167 /* report an LMDB internal assertion failure. The arguments
168 /* are the application context, and text that describes the
169 /* problem.
170 /* .IP "CA_SLMDB_CTL_CB_CONTEXT(void *)"
171 /* Application context that is passed in call-back function
172 /* calls.
173 /* .IP "CA_SLMDB_CTL_API_RETRY_LIMIT(int)"
174 /* How many times to recover from LMDB errors within the
175 /* execution of a single slmdb(3) API call before giving up.
176 /* .IP "CA_SLMDB_CTL_BULK_RETRY_LIMIT(int)"
177 /* How many times to recover from a bulk-mode transaction
178 /* before giving up.
179 /* .RE
180 /* ERROR RECOVERY
181 /* .ad
182 /* .fi
183 /* This module automatically repeats failed requests after
184 /* recoverable errors, up to the limits specified with
185 /* slmdb_control().
186 /*
187 /* Recoverable errors are reported through an optional
188 /* notification function specified with slmdb_control(). With
189 /* recoverable MDB_MAP_FULL and MDB_MAP_RESIZED errors, the
190 /* additional argument is a size_t value with the updated
191 /* current database size limit; with recoverable MDB_READERS_FULL
192 /* errors there is no additional argument.
193 /* BUGS
194 /* Recovery from MDB_MAP_FULL involves resizing the database
195 /* memory mapping. According to LMDB documentation this
196 /* requires that there is no concurrent activity in the same
197 /* database by other threads in the same memory address space.
198 /* SEE ALSO
199 /* lmdb(3) API manpage (currently, non-existent).
200 /* AUTHOR(S)
201 /* Howard Chu
202 /* Symas Corporation
203 /*
204 /* Wietse Venema
205 /* IBM T.J. Watson Research
206 /* P.O. Box 704
207 /* Yorktown Heights, NY 10598, USA
208 /*
209 /* Wietse Venema
210 /* Google, Inc.
211 /* 111 8th Avenue
212 /* New York, NY 10011, USA
213 /*--*/
214
215 /*
216 * DO NOT include other Postfix-specific header files. This LMDB wrapper
217 * must be usable outside Postfix.
218 */
219
220 #ifdef HAS_LMDB
221
222 /* System library. */
223
224 #include <sys/stat.h>
225 #include <errno.h>
226 #include <fcntl.h>
227 #include <string.h>
228 #include <unistd.h>
229 #include <limits.h>
230 #include <stdarg.h>
231 #include <string.h>
232 #include <stdlib.h>
233
234 /* Application-specific. */
235
236 #include <slmdb.h>
237
238 /*
239 * Minimum LMDB patchlevel.
240 *
241 * LMDB 0.9.11 allows Postfix daemons to log an LMDB error message instead of
242 * falling out of the sky without any explanation. Without such logging,
243 * Postfix with LMDB would be too hard to support.
244 *
245 * LMDB 0.9.10 fixes an information leak where LMDB wrote chunks of up to 4096
246 * bytes of uninitialized heap memory to a database. This was a security
247 * violation because it made information persistent that was not meant to be
248 * persisted, or it was sharing information that was not meant to be shared.
249 *
250 * LMDB 0.9.9 allows Postfix to use external (fcntl()-based) locks, instead of
251 * having to use world-writable LMDB lock files.
252 *
253 * LMDB 0.9.8 allows Postfix to update the database size limit on-the-fly, so
254 * that it can recover from an MDB_MAP_FULL error without having to close
255 * the database. It also allows an application to "pick up" a new database
256 * size limit on-the-fly, so that it can recover from an MDB_MAP_RESIZED
257 * error without having to close the database.
258 *
259 * The database size limit that remains is imposed by the hardware memory
260 * address space (31 or 47 bits, typically) or file system. The LMDB
261 * implementation is supposed to handle databases larger than physical
262 * memory. However, this is not necessarily guaranteed for (bulk)
263 * transactions larger than physical memory.
264 */
265 #if MDB_VERSION_FULL < MDB_VERINT(0, 9, 11)
266 #error "This Postfix version requires LMDB version 0.9.11 or later"
267 #endif
268
269 /*
270 * Error recovery.
271 *
272 * The purpose of the slmdb(3) API is to hide LMDB quirks (recoverable
273 * MAP_FULL, MAP_RESIZED, or MDB_READERS_FULL errors). With these out of the
274 * way, applications can pretend that those quirks don't exist, and focus on
275 * their own job.
276 *
277 * - To recover from a single-transaction LMDB error, each wrapper function
278 * uses tail recursion instead of goto. Since LMDB errors are rare, code
279 * clarity is more important than speed.
280 *
281 * - To recover from a bulk-transaction LMDB error, the error-recovery code
282 * triggers a long jump back into the caller to some pre-arranged point (the
283 * closest thing that C has to exception handling). The application is then
284 * expected to repeat the bulk transaction from scratch.
285 *
286 * When any code aborts a bulk transaction, it must reset slmdb->txn to null
287 * to avoid a use-after-free problem in slmdb_close().
288 */
289
290 /*
291 * Our default retry attempt limits. We allow a few retries per slmdb(3) API
292 * call for non-bulk transactions. We allow a number of bulk-transaction
293 * retries that is proportional to the memory address space.
294 */
295 #define SLMDB_DEF_API_RETRY_LIMIT 30 /* Retries per slmdb(3) API call */
296 #define SLMDB_DEF_BULK_RETRY_LIMIT \
297 (2 * sizeof(size_t) * CHAR_BIT) /* Retries per bulk-mode transaction */
298
299 /*
300 * We increment the recursion counter each time we try to recover from
301 * error, and reset the recursion counter when returning to the application
302 * from the slmdb(3) API.
303 */
304 #define SLMDB_API_RETURN(slmdb, status) do { \
305 (slmdb)->api_retry_count = 0; \
306 return (status); \
307 } while (0)
308
309 /*
310 * With MDB_NOLOCK, the application uses an external lock for inter-process
311 * synchronization. Because the caller may release the external lock after
312 * an SLMDB API call, each SLMDB API function must use a short-lived
313 * transaction unless the transaction is a bulk-mode transaction.
314 */
315
316 /* slmdb_cursor_close - close cursor and its read transaction */
317
slmdb_cursor_close(SLMDB * slmdb)318 static void slmdb_cursor_close(SLMDB *slmdb)
319 {
320 MDB_txn *txn;
321
322 /*
323 * Close the cursor and its read transaction. We can restore it later
324 * from the saved key information.
325 */
326 txn = mdb_cursor_txn(slmdb->cursor);
327 mdb_cursor_close(slmdb->cursor);
328 slmdb->cursor = 0;
329 mdb_txn_abort(txn);
330 }
331
332 /* slmdb_saved_key_init - initialize saved key info */
333
slmdb_saved_key_init(SLMDB * slmdb)334 static void slmdb_saved_key_init(SLMDB *slmdb)
335 {
336 slmdb->saved_key.mv_data = 0;
337 slmdb->saved_key.mv_size = 0;
338 slmdb->saved_key_size = 0;
339 }
340
341 /* slmdb_saved_key_free - destroy saved key info */
342
slmdb_saved_key_free(SLMDB * slmdb)343 static void slmdb_saved_key_free(SLMDB *slmdb)
344 {
345 free(slmdb->saved_key.mv_data);
346 slmdb_saved_key_init(slmdb);
347 }
348
349 #define HAVE_SLMDB_SAVED_KEY(s) ((s)->saved_key.mv_data != 0)
350
351 /* slmdb_saved_key_assign - copy the saved key */
352
slmdb_saved_key_assign(SLMDB * slmdb,MDB_val * key_val)353 static int slmdb_saved_key_assign(SLMDB *slmdb, MDB_val *key_val)
354 {
355
356 /*
357 * Extend the buffer to fit the key, so that we can avoid malloc()
358 * overhead most of the time.
359 */
360 if (slmdb->saved_key_size < key_val->mv_size) {
361 if (slmdb->saved_key.mv_data == 0)
362 slmdb->saved_key.mv_data = malloc(key_val->mv_size);
363 else
364 slmdb->saved_key.mv_data =
365 realloc(slmdb->saved_key.mv_data, key_val->mv_size);
366 if (slmdb->saved_key.mv_data == 0) {
367 slmdb_saved_key_init(slmdb);
368 return (ENOMEM);
369 } else {
370 slmdb->saved_key_size = key_val->mv_size;
371 }
372 }
373
374 /*
375 * Copy the key under the cursor.
376 */
377 memcpy(slmdb->saved_key.mv_data, key_val->mv_data, key_val->mv_size);
378 slmdb->saved_key.mv_size = key_val->mv_size;
379 return (0);
380 }
381
382 /* slmdb_prepare - LMDB-specific (re)initialization before actual access */
383
slmdb_prepare(SLMDB * slmdb)384 static int slmdb_prepare(SLMDB *slmdb)
385 {
386 int status = 0;
387
388 /*
389 * This is called before accessing the database, or after recovery from
390 * an LMDB error. Note: this code cannot recover from errors itself.
391 * slmdb->txn is either the database open() transaction or a
392 * freshly-created bulk-mode transaction. When slmdb_prepare() commits or
393 * aborts commits a transaction, it must set slmdb->txn to null to avoid
394 * a use-after-free error in slmdb_close().
395 *
396 * - With O_TRUNC we make a "drop" request before updating the database.
397 *
398 * - With a bulk-mode transaction we commit when the database is closed.
399 */
400 if (slmdb->open_flags & O_TRUNC) {
401 if ((status = mdb_drop(slmdb->txn, slmdb->dbi, 0)) != 0) {
402 mdb_txn_abort(slmdb->txn);
403 slmdb->txn = 0;
404 return (status);
405 }
406 if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) == 0) {
407 status = mdb_txn_commit(slmdb->txn);
408 slmdb->txn = 0;
409 if (status != 0)
410 return (status);
411 }
412 } else if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) == 0) {
413 mdb_txn_abort(slmdb->txn);
414 slmdb->txn = 0;
415 }
416 slmdb->api_retry_count = 0;
417 return (status);
418 }
419
420 /* slmdb_recover - recover from LMDB errors */
421
slmdb_recover(SLMDB * slmdb,int status)422 static int slmdb_recover(SLMDB *slmdb, int status)
423 {
424 MDB_envinfo info;
425 int original_status = status;
426
427 /*
428 * This may be needed in non-MDB_NOLOCK mode. Recovery is rare enough
429 * that we don't care about a few wasted cycles.
430 */
431 if (slmdb->cursor != 0)
432 slmdb_cursor_close(slmdb);
433
434 /*
435 * Limit the number of recovery attempts per slmdb(3) API request.
436 */
437 if ((slmdb->api_retry_count += 1) >= slmdb->api_retry_limit)
438 return (status);
439
440 /*
441 * Limit the number of bulk transaction recovery attempts.
442 */
443 if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) != 0
444 && (slmdb->bulk_retry_count += 1) > slmdb->bulk_retry_limit)
445 return (status);
446
447 /*
448 * Try to clear the error condition.
449 */
450 switch (status) {
451
452 /*
453 * As of LMDB 0.9.8 when a non-bulk update runs into a "map full"
454 * error, we can resize the environment's memory map and clear the
455 * error condition. The caller should retry immediately.
456 */
457 case MDB_MAP_FULL:
458 /* Can we increase the memory map? Give up if we can't. */
459 if (slmdb->curr_limit < slmdb->hard_limit / slmdb->size_incr) {
460 slmdb->curr_limit = slmdb->curr_limit * slmdb->size_incr;
461 } else if (slmdb->curr_limit < slmdb->hard_limit) {
462 slmdb->curr_limit = slmdb->hard_limit;
463 } else {
464 /* Sorry, we are already maxed out. */
465 break;
466 }
467 if (slmdb->notify_fn)
468 slmdb->notify_fn(slmdb->cb_context, MDB_MAP_FULL,
469 slmdb->curr_limit);
470 status = mdb_env_set_mapsize(slmdb->env, slmdb->curr_limit);
471 break;
472
473 /*
474 * When a writer resizes the database, read-only applications must
475 * increase their LMDB memory map size limit, too. Otherwise, they
476 * won't be able to read a table after it grows.
477 *
478 * As of LMDB 0.9.8 we can import the new memory map size limit into the
479 * database environment by calling mdb_env_set_mapsize() with a zero
480 * size argument. Then we extract the map size limit for later use.
481 * The caller should retry immediately.
482 */
483 case MDB_MAP_RESIZED:
484 if ((status = mdb_env_set_mapsize(slmdb->env, 0)) == 0) {
485 /* Do not panic. Maps may shrink after bulk update. */
486 mdb_env_info(slmdb->env, &info);
487 slmdb->curr_limit = info.me_mapsize;
488 if (slmdb->notify_fn)
489 slmdb->notify_fn(slmdb->cb_context, MDB_MAP_RESIZED,
490 slmdb->curr_limit);
491 }
492 break;
493
494 /*
495 * What is it with these built-in hard limits that cause systems to
496 * stop when demand is at its highest? When the system is under
497 * stress it should slow down and keep making progress.
498 */
499 case MDB_READERS_FULL:
500 if (slmdb->notify_fn)
501 slmdb->notify_fn(slmdb->cb_context, MDB_READERS_FULL);
502 sleep(1);
503 status = 0;
504 break;
505
506 /*
507 * We can't solve this problem. The application should terminate with
508 * a fatal run-time error and the program should be re-run later.
509 */
510 default:
511 break;
512 }
513
514 /*
515 * If we cleared the error condition for a non-bulk transaction, return a
516 * success status. The caller should retry the failed operation
517 * immediately.
518 */
519 if (status == 0 && (slmdb->slmdb_flags & SLMDB_FLAG_BULK) != 0) {
520
521 /*
522 * We cleared the error condition for a bulk transaction. If the
523 * transaction is not restartable, return the original error. The
524 * caller should terminate with a fatal run-time error, and the
525 * program should be re-run later.
526 */
527 if (slmdb->longjmp_fn == 0)
528 return (original_status);
529
530 /*
531 * Rebuild a bulk transaction from scratch, by making a long jump
532 * back into the caller at some pre-arranged point. In MDB_NOLOCK
533 * mode, there is no need to upgrade a lock to "exclusive", because a
534 * failed write transaction has no side effects.
535 */
536 if ((status = mdb_txn_begin(slmdb->env, (MDB_txn *) 0,
537 slmdb->lmdb_flags & MDB_RDONLY,
538 &slmdb->txn)) == 0
539 && (status = slmdb_prepare(slmdb)) == 0)
540 slmdb->longjmp_fn(slmdb->cb_context, 1);
541 }
542 return (status);
543 }
544
545 /* slmdb_txn_begin - mdb_txn_begin() wrapper with LMDB error recovery */
546
slmdb_txn_begin(SLMDB * slmdb,int rdonly,MDB_txn ** txn)547 static int slmdb_txn_begin(SLMDB *slmdb, int rdonly, MDB_txn **txn)
548 {
549 int status;
550
551 if ((status = mdb_txn_begin(slmdb->env, (MDB_txn *) 0, rdonly, txn)) != 0
552 && (status = slmdb_recover(slmdb, status)) == 0)
553 status = slmdb_txn_begin(slmdb, rdonly, txn);
554
555 return (status);
556 }
557
558 /* slmdb_get - mdb_get() wrapper with LMDB error recovery */
559
slmdb_get(SLMDB * slmdb,MDB_val * mdb_key,MDB_val * mdb_value)560 int slmdb_get(SLMDB *slmdb, MDB_val *mdb_key, MDB_val *mdb_value)
561 {
562 MDB_txn *txn;
563 int status;
564
565 /*
566 * Start a read transaction if there's no bulk-mode txn.
567 */
568 if (slmdb->txn)
569 txn = slmdb->txn;
570 else if ((status = slmdb_txn_begin(slmdb, MDB_RDONLY, &txn)) != 0)
571 SLMDB_API_RETURN(slmdb, status);
572
573 /*
574 * Do the lookup.
575 */
576 if ((status = mdb_get(txn, slmdb->dbi, mdb_key, mdb_value)) != 0
577 && status != MDB_NOTFOUND) {
578 mdb_txn_abort(txn);
579 if (txn == slmdb->txn)
580 slmdb->txn = 0;
581 if ((status = slmdb_recover(slmdb, status)) == 0)
582 status = slmdb_get(slmdb, mdb_key, mdb_value);
583 SLMDB_API_RETURN(slmdb, status);
584 }
585
586 /*
587 * Close the read txn if it's not the bulk-mode txn.
588 */
589 if (slmdb->txn == 0)
590 mdb_txn_abort(txn);
591
592 SLMDB_API_RETURN(slmdb, status);
593 }
594
595 /* slmdb_put - mdb_put() wrapper with LMDB error recovery */
596
slmdb_put(SLMDB * slmdb,MDB_val * mdb_key,MDB_val * mdb_value,int flags)597 int slmdb_put(SLMDB *slmdb, MDB_val *mdb_key,
598 MDB_val *mdb_value, int flags)
599 {
600 MDB_txn *txn;
601 int status;
602
603 /*
604 * Start a write transaction if there's no bulk-mode txn.
605 */
606 if (slmdb->txn)
607 txn = slmdb->txn;
608 else if ((status = slmdb_txn_begin(slmdb, 0, &txn)) != 0)
609 SLMDB_API_RETURN(slmdb, status);
610
611 /*
612 * Do the update.
613 */
614 if ((status = mdb_put(txn, slmdb->dbi, mdb_key, mdb_value, flags)) != 0) {
615 if (status != MDB_KEYEXIST) {
616 mdb_txn_abort(txn);
617 if (txn == slmdb->txn)
618 slmdb->txn = 0;
619 if ((status = slmdb_recover(slmdb, status)) == 0)
620 status = slmdb_put(slmdb, mdb_key, mdb_value, flags);
621 SLMDB_API_RETURN(slmdb, status);
622 } else {
623 /* Abort non-bulk transaction only. */
624 if (slmdb->txn == 0)
625 mdb_txn_abort(txn);
626 }
627 }
628
629 /*
630 * Commit the transaction if it's not the bulk-mode txn.
631 */
632 if (status == 0 && slmdb->txn == 0 && (status = mdb_txn_commit(txn)) != 0
633 && (status = slmdb_recover(slmdb, status)) == 0)
634 status = slmdb_put(slmdb, mdb_key, mdb_value, flags);
635
636 SLMDB_API_RETURN(slmdb, status);
637 }
638
639 /* slmdb_del - mdb_del() wrapper with LMDB error recovery */
640
slmdb_del(SLMDB * slmdb,MDB_val * mdb_key)641 int slmdb_del(SLMDB *slmdb, MDB_val *mdb_key)
642 {
643 MDB_txn *txn;
644 int status;
645
646 /*
647 * Start a write transaction if there's no bulk-mode txn.
648 */
649 if (slmdb->txn)
650 txn = slmdb->txn;
651 else if ((status = slmdb_txn_begin(slmdb, 0, &txn)) != 0)
652 SLMDB_API_RETURN(slmdb, status);
653
654 /*
655 * Do the update.
656 */
657 if ((status = mdb_del(txn, slmdb->dbi, mdb_key, (MDB_val *) 0)) != 0) {
658 if (status != MDB_NOTFOUND) {
659 mdb_txn_abort(txn);
660 if (txn == slmdb->txn)
661 slmdb->txn = 0;
662 if ((status = slmdb_recover(slmdb, status)) == 0)
663 status = slmdb_del(slmdb, mdb_key);
664 SLMDB_API_RETURN(slmdb, status);
665 } else {
666 /* Abort non-bulk transaction only. */
667 if (slmdb->txn == 0)
668 mdb_txn_abort(txn);
669 }
670 }
671
672 /*
673 * Commit the transaction if it's not the bulk-mode txn.
674 */
675 if (status == 0 && slmdb->txn == 0 && (status = mdb_txn_commit(txn)) != 0
676 && (status = slmdb_recover(slmdb, status)) == 0)
677 status = slmdb_del(slmdb, mdb_key);
678
679 SLMDB_API_RETURN(slmdb, status);
680 }
681
682 /* slmdb_cursor_get - mdb_cursor_get() wrapper with LMDB error recovery */
683
slmdb_cursor_get(SLMDB * slmdb,MDB_val * mdb_key,MDB_val * mdb_value,MDB_cursor_op op)684 int slmdb_cursor_get(SLMDB *slmdb, MDB_val *mdb_key,
685 MDB_val *mdb_value, MDB_cursor_op op)
686 {
687 MDB_txn *txn;
688 int status = 0;
689
690 /*
691 * TODO: figure how we would recover a failing bulk transaction.
692 */
693 if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) != 0) {
694 if (slmdb->assert_fn)
695 slmdb->assert_fn(slmdb->cb_context,
696 "slmdb_cursor_get: bulk transaction is not supported");
697 return (MDB_PANIC);
698 }
699
700 /*
701 * Open a read transaction and cursor if needed.
702 */
703 if (slmdb->cursor == 0) {
704 if ((status = slmdb_txn_begin(slmdb, MDB_RDONLY, &txn)) != 0)
705 SLMDB_API_RETURN(slmdb, status);
706 if ((status = mdb_cursor_open(txn, slmdb->dbi, &slmdb->cursor)) != 0) {
707 mdb_txn_abort(txn);
708 if ((status = slmdb_recover(slmdb, status)) == 0)
709 status = slmdb_cursor_get(slmdb, mdb_key, mdb_value, op);
710 SLMDB_API_RETURN(slmdb, status);
711 }
712
713 /*
714 * Restore the cursor position from the saved key information.
715 */
716 if (HAVE_SLMDB_SAVED_KEY(slmdb) && op != MDB_FIRST)
717 status = mdb_cursor_get(slmdb->cursor, &slmdb->saved_key,
718 (MDB_val *) 0, MDB_SET);
719 }
720
721 /*
722 * Database lookup.
723 */
724 if (status == 0)
725 status = mdb_cursor_get(slmdb->cursor, mdb_key, mdb_value, op);
726
727 /*
728 * Save the cursor position if successful. This can fail only with
729 * ENOMEM.
730 *
731 * Close the cursor read transaction if in MDB_NOLOCK mode, because the
732 * caller may release the external lock after we return.
733 */
734 if (status == 0) {
735 status = slmdb_saved_key_assign(slmdb, mdb_key);
736 if (slmdb->lmdb_flags & MDB_NOLOCK)
737 slmdb_cursor_close(slmdb);
738 }
739
740 /*
741 * Handle end-of-database or other error.
742 */
743 else {
744 /* Do not hand-optimize out the slmdb_cursor_close() calls below. */
745 if (status == MDB_NOTFOUND) {
746 slmdb_cursor_close(slmdb);
747 if (HAVE_SLMDB_SAVED_KEY(slmdb))
748 slmdb_saved_key_free(slmdb);
749 } else {
750 slmdb_cursor_close(slmdb);
751 if ((status = slmdb_recover(slmdb, status)) == 0)
752 status = slmdb_cursor_get(slmdb, mdb_key, mdb_value, op);
753 SLMDB_API_RETURN(slmdb, status);
754 /* Do not hand-optimize out the above return statement. */
755 }
756 }
757 SLMDB_API_RETURN(slmdb, status);
758 }
759
760 /* slmdb_assert_cb - report LMDB assertion failure */
761
slmdb_assert_cb(MDB_env * env,const char * text)762 static void slmdb_assert_cb(MDB_env *env, const char *text)
763 {
764 SLMDB *slmdb = (SLMDB *) mdb_env_get_userctx(env);
765
766 if (slmdb->assert_fn)
767 slmdb->assert_fn(slmdb->cb_context, text);
768 }
769
770 /* slmdb_control - control optional settings */
771
slmdb_control(SLMDB * slmdb,int first,...)772 int slmdb_control(SLMDB *slmdb, int first,...)
773 {
774 va_list ap;
775 int status = 0;
776 int reqno;
777 int rc;
778
779 va_start(ap, first);
780 for (reqno = first; status == 0 && reqno != SLMDB_CTL_END; reqno = va_arg(ap, int)) {
781 switch (reqno) {
782 case SLMDB_CTL_LONGJMP_FN:
783 slmdb->longjmp_fn = va_arg(ap, SLMDB_LONGJMP_FN);
784 break;
785 case SLMDB_CTL_NOTIFY_FN:
786 slmdb->notify_fn = va_arg(ap, SLMDB_NOTIFY_FN);
787 break;
788 case SLMDB_CTL_ASSERT_FN:
789 slmdb->assert_fn = va_arg(ap, SLMDB_ASSERT_FN);
790 if ((rc = mdb_env_set_userctx(slmdb->env, (void *) slmdb)) != 0
791 || (rc = mdb_env_set_assert(slmdb->env, slmdb_assert_cb)) != 0)
792 status = rc;
793 break;
794 case SLMDB_CTL_CB_CONTEXT:
795 slmdb->cb_context = va_arg(ap, void *);
796 break;
797 case SLMDB_CTL_API_RETRY_LIMIT:
798 slmdb->api_retry_limit = va_arg(ap, int);
799 break;
800 case SLMDB_CTL_BULK_RETRY_LIMIT:
801 slmdb->bulk_retry_limit = va_arg(ap, int);
802 break;
803 default:
804 status = errno = EINVAL;
805 break;
806 }
807 }
808 va_end(ap);
809 return (status);
810 }
811
812 /* slmdb_close - wrapper with LMDB error recovery */
813
slmdb_close(SLMDB * slmdb)814 int slmdb_close(SLMDB *slmdb)
815 {
816 int status = 0;
817
818 /*
819 * Finish an open bulk transaction. If slmdb_recover() returns after a
820 * bulk-transaction error, then it was unable to clear the error
821 * condition, or unable to restart the bulk transaction.
822 */
823 if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) != 0 && slmdb->txn != 0
824 && (status = mdb_txn_commit(slmdb->txn)) != 0)
825 status = slmdb_recover(slmdb, status);
826
827 /*
828 * Clean up after an unfinished sequence() operation.
829 */
830 if (slmdb->cursor != 0)
831 slmdb_cursor_close(slmdb);
832
833 mdb_env_close(slmdb->env);
834
835 /*
836 * Clean up the saved key information.
837 */
838 if (HAVE_SLMDB_SAVED_KEY(slmdb))
839 slmdb_saved_key_free(slmdb);
840
841 SLMDB_API_RETURN(slmdb, status);
842 }
843
844 /* slmdb_init - mandatory initialization */
845
slmdb_init(SLMDB * slmdb,size_t curr_limit,int size_incr,size_t hard_limit)846 int slmdb_init(SLMDB *slmdb, size_t curr_limit, int size_incr,
847 size_t hard_limit)
848 {
849
850 /*
851 * This is a separate operation to keep the slmdb_open() API simple.
852 * Don't allocate resources here. Just store control information,
853 */
854 slmdb->curr_limit = curr_limit;
855 slmdb->size_incr = size_incr;
856 slmdb->hard_limit = hard_limit;
857
858 return (MDB_SUCCESS);
859 }
860
861 /* slmdb_open - open wrapped LMDB database */
862
slmdb_open(SLMDB * slmdb,const char * path,int open_flags,int lmdb_flags,int slmdb_flags)863 int slmdb_open(SLMDB *slmdb, const char *path, int open_flags,
864 int lmdb_flags, int slmdb_flags)
865 {
866 struct stat st;
867 MDB_env *env;
868 MDB_txn *txn;
869 MDB_dbi dbi;
870 int db_fd;
871 int status;
872
873 /*
874 * Create LMDB environment.
875 */
876 if ((status = mdb_env_create(&env)) != 0)
877 return (status);
878
879 /*
880 * Make sure that the memory map has room to store and commit an initial
881 * "drop" transaction as well as fixed database metadata. We have no way
882 * to recover from errors before the first application-level I/O request.
883 */
884 #define SLMDB_FUDGE 10240
885
886 if (slmdb->curr_limit < SLMDB_FUDGE)
887 slmdb->curr_limit = SLMDB_FUDGE;
888 if (stat(path, &st) == 0
889 && st.st_size > slmdb->curr_limit - SLMDB_FUDGE) {
890 if (st.st_size > slmdb->hard_limit)
891 slmdb->hard_limit = st.st_size;
892 if (st.st_size < slmdb->hard_limit - SLMDB_FUDGE)
893 slmdb->curr_limit = st.st_size + SLMDB_FUDGE;
894 else
895 slmdb->curr_limit = slmdb->hard_limit;
896 }
897
898 /*
899 * mdb_open() requires a txn, but since the default DB always exists in
900 * an LMDB environment, we usually don't need to do anything else with
901 * the txn. It is currently used for truncate and for bulk transactions.
902 */
903 if ((status = mdb_env_set_mapsize(env, slmdb->curr_limit)) != 0
904 || (status = mdb_env_open(env, path, lmdb_flags, 0644)) != 0
905 || (status = mdb_txn_begin(env, (MDB_txn *) 0,
906 lmdb_flags & MDB_RDONLY, &txn)) != 0
907 || (status = mdb_open(txn, (const char *) 0, 0, &dbi)) != 0
908 || (status = mdb_env_get_fd(env, &db_fd)) != 0) {
909 mdb_env_close(env);
910 return (status);
911 }
912
913 /*
914 * Bundle up.
915 */
916 slmdb->open_flags = open_flags;
917 slmdb->lmdb_flags = lmdb_flags;
918 slmdb->slmdb_flags = slmdb_flags;
919 slmdb->env = env;
920 slmdb->dbi = dbi;
921 slmdb->db_fd = db_fd;
922 slmdb->cursor = 0;
923 slmdb_saved_key_init(slmdb);
924 slmdb->api_retry_count = 0;
925 slmdb->bulk_retry_count = 0;
926 slmdb->api_retry_limit = SLMDB_DEF_API_RETRY_LIMIT;
927 slmdb->bulk_retry_limit = SLMDB_DEF_BULK_RETRY_LIMIT;
928 slmdb->longjmp_fn = 0;
929 slmdb->notify_fn = 0;
930 slmdb->assert_fn = 0;
931 slmdb->cb_context = 0;
932 slmdb->txn = txn;
933
934 if ((status = slmdb_prepare(slmdb)) != 0)
935 mdb_env_close(env);
936
937 return (status);
938 }
939
940 #endif
941