1 /*	$NetBSD: slmdb.c,v 1.4 2022/10/08 16:12:50 christos Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	slmdb 3
6 /* SUMMARY
7 /*	Simplified LMDB API
8 /* SYNOPSIS
9 /*	#include <slmdb.h>
10 /*
11 /*	int	slmdb_init(slmdb, curr_limit, size_incr, hard_limit)
12 /*	SLMDB	*slmdb;
13 /*	size_t	curr_limit;
14 /*	int	size_incr;
15 /*	size_t	hard_limit;
16 /*
17 /*	int	slmdb_open(slmdb, path, open_flags, lmdb_flags, slmdb_flags)
18 /*	SLMDB	*slmdb;
19 /*	const char *path;
20 /*	int	open_flags;
21 /*	int	lmdb_flags;
22 /*	int	slmdb_flags;
23 /*
24 /*	int	slmdb_close(slmdb)
25 /*	SLMDB	*slmdb;
26 /*
27 /*	int	slmdb_get(slmdb, mdb_key, mdb_value)
28 /*	SLMDB	*slmdb;
29 /*	MDB_val	*mdb_key;
30 /*	MDB_val	*mdb_value;
31 /*
32 /*	int	slmdb_put(slmdb, mdb_key, mdb_value, flags)
33 /*	SLMDB	*slmdb;
34 /*	MDB_val	*mdb_key;
35 /*	MDB_val	*mdb_value;
36 /*	int	flags;
37 /*
38 /*	int	slmdb_del(slmdb, mdb_key)
39 /*	SLMDB	*slmdb;
40 /*	MDB_val	*mdb_key;
41 /*
42 /*	int	slmdb_cursor_get(slmdb, mdb_key, mdb_value, op)
43 /*	SLMDB	*slmdb;
44 /*	MDB_val	*mdb_key;
45 /*	MDB_val	*mdb_value;
46 /*	MDB_cursor_op op;
47 /* AUXILIARY FUNCTIONS
48 /*	int	slmdb_fd(slmdb)
49 /*	SLMDB	*slmdb;
50 /*
51 /*	size_t	slmdb_curr_limit(slmdb)
52 /*	SLMDB	*slmdb;
53 /*
54 /*	int	slmdb_control(slmdb, request, ...)
55 /*	SLMDB	*slmdb;
56 /*	int	request;
57 /* DESCRIPTION
58 /*	This module simplifies the LMDB API by hiding recoverable
59 /*	errors from the application.  Details are given in the
60 /*	section "ERROR RECOVERY".
61 /*
62 /*	slmdb_init() performs mandatory initialization before opening
63 /*	an LMDB database. The result value is an LMDB status code
64 /*	(zero in case of success).
65 /*
66 /*	slmdb_open() opens an LMDB database.  The result value is
67 /*	an LMDB status code (zero in case of success).
68 /*
69 /*	slmdb_close() finalizes an optional bulk-mode transaction
70 /*	and closes a successfully-opened LMDB database.  The result
71 /*	value is an LMDB status code (zero in case of success).
72 /*
73 /*	slmdb_get() is an mdb_get() wrapper with automatic error
74 /*	recovery.  The result value is an LMDB status code (zero
75 /*	in case of success).
76 /*
77 /*	slmdb_put() is an mdb_put() wrapper with automatic error
78 /*	recovery.  The result value is an LMDB status code (zero
79 /*	in case of success).
80 /*
81 /*	slmdb_del() is an mdb_del() wrapper with automatic error
82 /*	recovery.  The result value is an LMDB status code (zero
83 /*	in case of success).
84 /*
85 /*	slmdb_cursor_get() is an mdb_cursor_get() wrapper with
86 /*	automatic error recovery.  The result value is an LMDB
87 /*	status code (zero in case of success). This wrapper supports
88 /*	only one cursor per database.
89 /*
90 /*	slmdb_fd() returns the file descriptor for the specified
91 /*	database.  This may be used for file status queries or
92 /*	application-controlled locking.
93 /*
94 /*	slmdb_curr_limit() returns the current database size limit
95 /*	for the specified database.
96 /*
97 /*	slmdb_control() specifies optional features. The result is
98 /*	an LMDB status code (zero in case of success).
99 /*
100 /*	Arguments:
101 /* .IP slmdb
102 /*	Pointer to caller-provided storage.
103 /* .IP curr_limit
104 /*	The initial memory mapping size limit. This limit is
105 /*	automatically increased when the database becomes full.
106 /* .IP size_incr
107 /*	An integer factor by which the memory mapping size limit
108 /*	is increased when the database becomes full.
109 /* .IP hard_limit
110 /*	The upper bound for the memory mapping size limit.
111 /* .IP path
112 /*	LMDB database pathname.
113 /* .IP open_flags
114 /*	Flags that control file open operations. Do not specify
115 /*	locking flags here.
116 /* .IP lmdb_flags
117 /*	Flags that control the LMDB environment. If MDB_NOLOCK is
118 /*	specified, then each slmdb_get() or slmdb_cursor_get() call
119 /*	must be protected with a shared (or exclusive) external lock,
120 /*	and each slmdb_put() or slmdb_del() call must be protected
121 /*	with an exclusive external lock. A lock may be released
122 /*	after the call returns. A writer may atomically downgrade
123 /*	an exclusive lock to shared, but it must obtain an exclusive
124 /*	lock before making another slmdb(3) write request.
125 /* .sp
126 /*	Note: when a database is opened with MDB_NOLOCK, external
127 /*	locks such as fcntl() do not protect slmdb(3) requests
128 /*	within the same process against each other.  If a program
129 /*	cannot avoid making simultaneous slmdb(3) requests, then
130 /*	it must synchronize these requests with in-process locks,
131 /*	in addition to the per-process fcntl(2) locks.
132 /* .IP slmdb_flags
133 /*	Bit-wise OR of zero or more of the following:
134 /* .RS
135 /* .IP SLMDB_FLAG_BULK
136 /*	Open the database and create a "bulk" transaction that is
137 /*	committed when the database is closed. If MDB_NOLOCK is
138 /*	specified, then the entire transaction must be protected
139 /*	with a persistent external lock.  All slmdb_get(), slmdb_put()
140 /*	and slmdb_del() requests will be directed to the "bulk"
141 /*	transaction.
142 /* .RE
143 /* .IP mdb_key
144 /*	Pointer to caller-provided lookup key storage.
145 /* .IP mdb_value
146 /*	Pointer to caller-provided value storage.
147 /* .IP op
148 /*	LMDB cursor operation.
149 /* .IP request
150 /*	The start of a list of (name, value) pairs, terminated with
151 /*	CA_SLMDB_CTL_END.  The following text enumerates the symbolic
152 /*	request names and the corresponding argument types.
153 /* .RS
154 /* .IP "CA_SLMDB_CTL_LONGJMP_FN(void (*)(void *, int))"
155 /*	Call-back function pointer. The function is called to repeat
156 /*	a failed bulk-mode transaction from the start. The arguments
157 /*	are the application context and the setjmp() or sigsetjmp()
158 /*	result value.
159 /* .IP "CA_SLMDB_CTL_NOTIFY_FN(void (*)(void *, int, ...))"
160 /*	Call-back function pointer. The function is called to report
161 /*	successful error recovery. The arguments are the application
162 /*	context, the MDB error code, and additional arguments that
163 /*	depend on the error code.  Details are given in the section
164 /*	"ERROR RECOVERY".
165 /* .IP "CA_SLMDB_CTL_ASSERT_FN(void (*)(void *, const char *))"
166 /*	Call-back function pointer.  The function is called to
167 /*	report an LMDB internal assertion failure. The arguments
168 /*	are the application context, and text that describes the
169 /*	problem.
170 /* .IP "CA_SLMDB_CTL_CB_CONTEXT(void *)"
171 /*	Application context that is passed in call-back function
172 /*	calls.
173 /* .IP "CA_SLMDB_CTL_API_RETRY_LIMIT(int)"
174 /*	How many times to recover from LMDB errors within the
175 /*	execution of a single slmdb(3) API call before giving up.
176 /* .IP "CA_SLMDB_CTL_BULK_RETRY_LIMIT(int)"
177 /*	How many times to recover from a bulk-mode transaction
178 /*	before giving up.
179 /* .RE
180 /* ERROR RECOVERY
181 /* .ad
182 /* .fi
183 /*	This module automatically repeats failed requests after
184 /*	recoverable errors, up to the limits specified with
185 /*	slmdb_control().
186 /*
187 /*	Recoverable errors are reported through an optional
188 /*	notification function specified with slmdb_control().  With
189 /*	recoverable MDB_MAP_FULL and MDB_MAP_RESIZED errors, the
190 /*	additional argument is a size_t value with the updated
191 /*	current database size limit; with recoverable MDB_READERS_FULL
192 /*	errors there is no additional argument.
193 /* BUGS
194 /*	Recovery from MDB_MAP_FULL involves resizing the database
195 /*	memory mapping.  According to LMDB documentation this
196 /*	requires that there is no concurrent activity in the same
197 /*	database by other threads in the same memory address space.
198 /* SEE ALSO
199 /*	lmdb(3) API manpage (currently, non-existent).
200 /* AUTHOR(S)
201 /*	Howard Chu
202 /*	Symas Corporation
203 /*
204 /*	Wietse Venema
205 /*	IBM T.J. Watson Research
206 /*	P.O. Box 704
207 /*	Yorktown Heights, NY 10598, USA
208 /*
209 /*	Wietse Venema
210 /*	Google, Inc.
211 /*	111 8th Avenue
212 /*	New York, NY 10011, USA
213 /*--*/
214 
215  /*
216   * DO NOT include other Postfix-specific header files. This LMDB wrapper
217   * must be usable outside Postfix.
218   */
219 
220 #ifdef HAS_LMDB
221 
222 /* System library. */
223 
224 #include <sys/stat.h>
225 #include <errno.h>
226 #include <fcntl.h>
227 #include <string.h>
228 #include <unistd.h>
229 #include <limits.h>
230 #include <stdarg.h>
231 #include <string.h>
232 #include <stdlib.h>
233 
234 /* Application-specific. */
235 
236 #include <slmdb.h>
237 
238  /*
239   * Minimum LMDB patchlevel.
240   *
241   * LMDB 0.9.11 allows Postfix daemons to log an LMDB error message instead of
242   * falling out of the sky without any explanation. Without such logging,
243   * Postfix with LMDB would be too hard to support.
244   *
245   * LMDB 0.9.10 fixes an information leak where LMDB wrote chunks of up to 4096
246   * bytes of uninitialized heap memory to a database. This was a security
247   * violation because it made information persistent that was not meant to be
248   * persisted, or it was sharing information that was not meant to be shared.
249   *
250   * LMDB 0.9.9 allows Postfix to use external (fcntl()-based) locks, instead of
251   * having to use world-writable LMDB lock files.
252   *
253   * LMDB 0.9.8 allows Postfix to update the database size limit on-the-fly, so
254   * that it can recover from an MDB_MAP_FULL error without having to close
255   * the database. It also allows an application to "pick up" a new database
256   * size limit on-the-fly, so that it can recover from an MDB_MAP_RESIZED
257   * error without having to close the database.
258   *
259   * The database size limit that remains is imposed by the hardware memory
260   * address space (31 or 47 bits, typically) or file system. The LMDB
261   * implementation is supposed to handle databases larger than physical
262   * memory. However, this is not necessarily guaranteed for (bulk)
263   * transactions larger than physical memory.
264   */
265 #if MDB_VERSION_FULL < MDB_VERINT(0, 9, 11)
266 #error "This Postfix version requires LMDB version 0.9.11 or later"
267 #endif
268 
269  /*
270   * Error recovery.
271   *
272   * The purpose of the slmdb(3) API is to hide LMDB quirks (recoverable
273   * MAP_FULL, MAP_RESIZED, or MDB_READERS_FULL errors). With these out of the
274   * way, applications can pretend that those quirks don't exist, and focus on
275   * their own job.
276   *
277   * - To recover from a single-transaction LMDB error, each wrapper function
278   * uses tail recursion instead of goto. Since LMDB errors are rare, code
279   * clarity is more important than speed.
280   *
281   * - To recover from a bulk-transaction LMDB error, the error-recovery code
282   * triggers a long jump back into the caller to some pre-arranged point (the
283   * closest thing that C has to exception handling). The application is then
284   * expected to repeat the bulk transaction from scratch.
285   *
286   * When any code aborts a bulk transaction, it must reset slmdb->txn to null
287   * to avoid a use-after-free problem in slmdb_close().
288   */
289 
290  /*
291   * Our default retry attempt limits. We allow a few retries per slmdb(3) API
292   * call for non-bulk transactions. We allow a number of bulk-transaction
293   * retries that is proportional to the memory address space.
294   */
295 #define SLMDB_DEF_API_RETRY_LIMIT 30	/* Retries per slmdb(3) API call */
296 #define SLMDB_DEF_BULK_RETRY_LIMIT \
297         (2 * sizeof(size_t) * CHAR_BIT)	/* Retries per bulk-mode transaction */
298 
299  /*
300   * We increment the recursion counter each time we try to recover from
301   * error, and reset the recursion counter when returning to the application
302   * from the slmdb(3) API.
303   */
304 #define SLMDB_API_RETURN(slmdb, status) do { \
305 	(slmdb)->api_retry_count = 0; \
306 	return (status); \
307     } while (0)
308 
309  /*
310   * With MDB_NOLOCK, the application uses an external lock for inter-process
311   * synchronization. Because the caller may release the external lock after
312   * an SLMDB API call, each SLMDB API function must use a short-lived
313   * transaction unless the transaction is a bulk-mode transaction.
314   */
315 
316 /* slmdb_cursor_close - close cursor and its read transaction */
317 
slmdb_cursor_close(SLMDB * slmdb)318 static void slmdb_cursor_close(SLMDB *slmdb)
319 {
320     MDB_txn *txn;
321 
322     /*
323      * Close the cursor and its read transaction. We can restore it later
324      * from the saved key information.
325      */
326     txn = mdb_cursor_txn(slmdb->cursor);
327     mdb_cursor_close(slmdb->cursor);
328     slmdb->cursor = 0;
329     mdb_txn_abort(txn);
330 }
331 
332 /* slmdb_saved_key_init - initialize saved key info */
333 
slmdb_saved_key_init(SLMDB * slmdb)334 static void slmdb_saved_key_init(SLMDB *slmdb)
335 {
336     slmdb->saved_key.mv_data = 0;
337     slmdb->saved_key.mv_size = 0;
338     slmdb->saved_key_size = 0;
339 }
340 
341 /* slmdb_saved_key_free - destroy saved key info */
342 
slmdb_saved_key_free(SLMDB * slmdb)343 static void slmdb_saved_key_free(SLMDB *slmdb)
344 {
345     free(slmdb->saved_key.mv_data);
346     slmdb_saved_key_init(slmdb);
347 }
348 
349 #define HAVE_SLMDB_SAVED_KEY(s) ((s)->saved_key.mv_data != 0)
350 
351 /* slmdb_saved_key_assign - copy the saved key */
352 
slmdb_saved_key_assign(SLMDB * slmdb,MDB_val * key_val)353 static int slmdb_saved_key_assign(SLMDB *slmdb, MDB_val *key_val)
354 {
355 
356     /*
357      * Extend the buffer to fit the key, so that we can avoid malloc()
358      * overhead most of the time.
359      */
360     if (slmdb->saved_key_size < key_val->mv_size) {
361 	if (slmdb->saved_key.mv_data == 0)
362 	    slmdb->saved_key.mv_data = malloc(key_val->mv_size);
363 	else
364 	    slmdb->saved_key.mv_data =
365 		realloc(slmdb->saved_key.mv_data, key_val->mv_size);
366 	if (slmdb->saved_key.mv_data == 0) {
367 	    slmdb_saved_key_init(slmdb);
368 	    return (ENOMEM);
369 	} else {
370 	    slmdb->saved_key_size = key_val->mv_size;
371 	}
372     }
373 
374     /*
375      * Copy the key under the cursor.
376      */
377     memcpy(slmdb->saved_key.mv_data, key_val->mv_data, key_val->mv_size);
378     slmdb->saved_key.mv_size = key_val->mv_size;
379     return (0);
380 }
381 
382 /* slmdb_prepare - LMDB-specific (re)initialization before actual access */
383 
slmdb_prepare(SLMDB * slmdb)384 static int slmdb_prepare(SLMDB *slmdb)
385 {
386     int     status = 0;
387 
388     /*
389      * This is called before accessing the database, or after recovery from
390      * an LMDB error. Note: this code cannot recover from errors itself.
391      * slmdb->txn is either the database open() transaction or a
392      * freshly-created bulk-mode transaction. When slmdb_prepare() commits or
393      * aborts commits a transaction, it must set slmdb->txn to null to avoid
394      * a use-after-free error in slmdb_close().
395      *
396      * - With O_TRUNC we make a "drop" request before updating the database.
397      *
398      * - With a bulk-mode transaction we commit when the database is closed.
399      */
400     if (slmdb->open_flags & O_TRUNC) {
401 	if ((status = mdb_drop(slmdb->txn, slmdb->dbi, 0)) != 0) {
402 	    mdb_txn_abort(slmdb->txn);
403 	    slmdb->txn = 0;
404 	    return (status);
405 	}
406 	if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) == 0) {
407 	    status = mdb_txn_commit(slmdb->txn);
408 	    slmdb->txn = 0;
409 	    if (status != 0)
410 		return (status);
411 	}
412     } else if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) == 0) {
413 	mdb_txn_abort(slmdb->txn);
414 	slmdb->txn = 0;
415     }
416     slmdb->api_retry_count = 0;
417     return (status);
418 }
419 
420 /* slmdb_recover - recover from LMDB errors */
421 
slmdb_recover(SLMDB * slmdb,int status)422 static int slmdb_recover(SLMDB *slmdb, int status)
423 {
424     MDB_envinfo info;
425     int     original_status = status;
426 
427     /*
428      * This may be needed in non-MDB_NOLOCK mode. Recovery is rare enough
429      * that we don't care about a few wasted cycles.
430      */
431     if (slmdb->cursor != 0)
432 	slmdb_cursor_close(slmdb);
433 
434     /*
435      * Limit the number of recovery attempts per slmdb(3) API request.
436      */
437     if ((slmdb->api_retry_count += 1) >= slmdb->api_retry_limit)
438 	return (status);
439 
440     /*
441      * Limit the number of bulk transaction recovery attempts.
442      */
443     if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) != 0
444 	&& (slmdb->bulk_retry_count += 1) > slmdb->bulk_retry_limit)
445 	return (status);
446 
447     /*
448      * Try to clear the error condition.
449      */
450     switch (status) {
451 
452 	/*
453 	 * As of LMDB 0.9.8 when a non-bulk update runs into a "map full"
454 	 * error, we can resize the environment's memory map and clear the
455 	 * error condition. The caller should retry immediately.
456 	 */
457     case MDB_MAP_FULL:
458 	/* Can we increase the memory map? Give up if we can't. */
459 	if (slmdb->curr_limit < slmdb->hard_limit / slmdb->size_incr) {
460 	    slmdb->curr_limit = slmdb->curr_limit * slmdb->size_incr;
461 	} else if (slmdb->curr_limit < slmdb->hard_limit) {
462 	    slmdb->curr_limit = slmdb->hard_limit;
463 	} else {
464 	    /* Sorry, we are already maxed out. */
465 	    break;
466 	}
467 	if (slmdb->notify_fn)
468 	    slmdb->notify_fn(slmdb->cb_context, MDB_MAP_FULL,
469 			     slmdb->curr_limit);
470 	status = mdb_env_set_mapsize(slmdb->env, slmdb->curr_limit);
471 	break;
472 
473 	/*
474 	 * When a writer resizes the database, read-only applications must
475 	 * increase their LMDB memory map size limit, too. Otherwise, they
476 	 * won't be able to read a table after it grows.
477 	 *
478 	 * As of LMDB 0.9.8 we can import the new memory map size limit into the
479 	 * database environment by calling mdb_env_set_mapsize() with a zero
480 	 * size argument. Then we extract the map size limit for later use.
481 	 * The caller should retry immediately.
482 	 */
483     case MDB_MAP_RESIZED:
484 	if ((status = mdb_env_set_mapsize(slmdb->env, 0)) == 0) {
485 	    /* Do not panic. Maps may shrink after bulk update. */
486 	    mdb_env_info(slmdb->env, &info);
487 	    slmdb->curr_limit = info.me_mapsize;
488 	    if (slmdb->notify_fn)
489 		slmdb->notify_fn(slmdb->cb_context, MDB_MAP_RESIZED,
490 				 slmdb->curr_limit);
491 	}
492 	break;
493 
494 	/*
495 	 * What is it with these built-in hard limits that cause systems to
496 	 * stop when demand is at its highest? When the system is under
497 	 * stress it should slow down and keep making progress.
498 	 */
499     case MDB_READERS_FULL:
500 	if (slmdb->notify_fn)
501 	    slmdb->notify_fn(slmdb->cb_context, MDB_READERS_FULL);
502 	sleep(1);
503 	status = 0;
504 	break;
505 
506 	/*
507 	 * We can't solve this problem. The application should terminate with
508 	 * a fatal run-time error and the program should be re-run later.
509 	 */
510     default:
511 	break;
512     }
513 
514     /*
515      * If we cleared the error condition for a non-bulk transaction, return a
516      * success status. The caller should retry the failed operation
517      * immediately.
518      */
519     if (status == 0 && (slmdb->slmdb_flags & SLMDB_FLAG_BULK) != 0) {
520 
521 	/*
522 	 * We cleared the error condition for a	bulk transaction. If the
523 	 * transaction is not restartable, return the original error. The
524 	 * caller should terminate with a fatal run-time error, and the
525 	 * program should be re-run later.
526 	 */
527 	if (slmdb->longjmp_fn == 0)
528 	    return (original_status);
529 
530 	/*
531 	 * Rebuild a bulk transaction from scratch, by making a long jump
532 	 * back into the caller at some pre-arranged point. In MDB_NOLOCK
533 	 * mode, there is no need to upgrade a lock to "exclusive", because a
534 	 * failed write transaction has no side effects.
535 	 */
536 	if ((status = mdb_txn_begin(slmdb->env, (MDB_txn *) 0,
537 				    slmdb->lmdb_flags & MDB_RDONLY,
538 				    &slmdb->txn)) == 0
539 	    && (status = slmdb_prepare(slmdb)) == 0)
540 	    slmdb->longjmp_fn(slmdb->cb_context, 1);
541     }
542     return (status);
543 }
544 
545 /* slmdb_txn_begin - mdb_txn_begin() wrapper with LMDB error recovery */
546 
slmdb_txn_begin(SLMDB * slmdb,int rdonly,MDB_txn ** txn)547 static int slmdb_txn_begin(SLMDB *slmdb, int rdonly, MDB_txn **txn)
548 {
549     int     status;
550 
551     if ((status = mdb_txn_begin(slmdb->env, (MDB_txn *) 0, rdonly, txn)) != 0
552 	&& (status = slmdb_recover(slmdb, status)) == 0)
553 	status = slmdb_txn_begin(slmdb, rdonly, txn);
554 
555     return (status);
556 }
557 
558 /* slmdb_get - mdb_get() wrapper with LMDB error recovery */
559 
slmdb_get(SLMDB * slmdb,MDB_val * mdb_key,MDB_val * mdb_value)560 int     slmdb_get(SLMDB *slmdb, MDB_val *mdb_key, MDB_val *mdb_value)
561 {
562     MDB_txn *txn;
563     int     status;
564 
565     /*
566      * Start a read transaction if there's no bulk-mode txn.
567      */
568     if (slmdb->txn)
569 	txn = slmdb->txn;
570     else if ((status = slmdb_txn_begin(slmdb, MDB_RDONLY, &txn)) != 0)
571 	SLMDB_API_RETURN(slmdb, status);
572 
573     /*
574      * Do the lookup.
575      */
576     if ((status = mdb_get(txn, slmdb->dbi, mdb_key, mdb_value)) != 0
577 	&& status != MDB_NOTFOUND) {
578 	mdb_txn_abort(txn);
579 	if (txn == slmdb->txn)
580 	    slmdb->txn = 0;
581 	if ((status = slmdb_recover(slmdb, status)) == 0)
582 	    status = slmdb_get(slmdb, mdb_key, mdb_value);
583 	SLMDB_API_RETURN(slmdb, status);
584     }
585 
586     /*
587      * Close the read txn if it's not the bulk-mode txn.
588      */
589     if (slmdb->txn == 0)
590 	mdb_txn_abort(txn);
591 
592     SLMDB_API_RETURN(slmdb, status);
593 }
594 
595 /* slmdb_put - mdb_put() wrapper with LMDB error recovery */
596 
slmdb_put(SLMDB * slmdb,MDB_val * mdb_key,MDB_val * mdb_value,int flags)597 int     slmdb_put(SLMDB *slmdb, MDB_val *mdb_key,
598 		          MDB_val *mdb_value, int flags)
599 {
600     MDB_txn *txn;
601     int     status;
602 
603     /*
604      * Start a write transaction if there's no bulk-mode txn.
605      */
606     if (slmdb->txn)
607 	txn = slmdb->txn;
608     else if ((status = slmdb_txn_begin(slmdb, 0, &txn)) != 0)
609 	SLMDB_API_RETURN(slmdb, status);
610 
611     /*
612      * Do the update.
613      */
614     if ((status = mdb_put(txn, slmdb->dbi, mdb_key, mdb_value, flags)) != 0) {
615 	if (status != MDB_KEYEXIST) {
616 	    mdb_txn_abort(txn);
617 	    if (txn == slmdb->txn)
618 		slmdb->txn = 0;
619 	    if ((status = slmdb_recover(slmdb, status)) == 0)
620 		status = slmdb_put(slmdb, mdb_key, mdb_value, flags);
621 	    SLMDB_API_RETURN(slmdb, status);
622 	} else {
623 	    /* Abort non-bulk transaction only. */
624 	    if (slmdb->txn == 0)
625 		mdb_txn_abort(txn);
626 	}
627     }
628 
629     /*
630      * Commit the transaction if it's not the bulk-mode txn.
631      */
632     if (status == 0 && slmdb->txn == 0 && (status = mdb_txn_commit(txn)) != 0
633 	&& (status = slmdb_recover(slmdb, status)) == 0)
634 	status = slmdb_put(slmdb, mdb_key, mdb_value, flags);
635 
636     SLMDB_API_RETURN(slmdb, status);
637 }
638 
639 /* slmdb_del - mdb_del() wrapper with LMDB error recovery */
640 
slmdb_del(SLMDB * slmdb,MDB_val * mdb_key)641 int     slmdb_del(SLMDB *slmdb, MDB_val *mdb_key)
642 {
643     MDB_txn *txn;
644     int     status;
645 
646     /*
647      * Start a write transaction if there's no bulk-mode txn.
648      */
649     if (slmdb->txn)
650 	txn = slmdb->txn;
651     else if ((status = slmdb_txn_begin(slmdb, 0, &txn)) != 0)
652 	SLMDB_API_RETURN(slmdb, status);
653 
654     /*
655      * Do the update.
656      */
657     if ((status = mdb_del(txn, slmdb->dbi, mdb_key, (MDB_val *) 0)) != 0) {
658 	if (status != MDB_NOTFOUND) {
659 	    mdb_txn_abort(txn);
660 	    if (txn == slmdb->txn)
661 		slmdb->txn = 0;
662 	    if ((status = slmdb_recover(slmdb, status)) == 0)
663 		status = slmdb_del(slmdb, mdb_key);
664 	    SLMDB_API_RETURN(slmdb, status);
665 	} else {
666 	    /* Abort non-bulk transaction only. */
667 	    if (slmdb->txn == 0)
668 		mdb_txn_abort(txn);
669 	}
670     }
671 
672     /*
673      * Commit the transaction if it's not the bulk-mode txn.
674      */
675     if (status == 0 && slmdb->txn == 0 && (status = mdb_txn_commit(txn)) != 0
676 	&& (status = slmdb_recover(slmdb, status)) == 0)
677 	status = slmdb_del(slmdb, mdb_key);
678 
679     SLMDB_API_RETURN(slmdb, status);
680 }
681 
682 /* slmdb_cursor_get - mdb_cursor_get() wrapper with LMDB error recovery */
683 
slmdb_cursor_get(SLMDB * slmdb,MDB_val * mdb_key,MDB_val * mdb_value,MDB_cursor_op op)684 int     slmdb_cursor_get(SLMDB *slmdb, MDB_val *mdb_key,
685 			         MDB_val *mdb_value, MDB_cursor_op op)
686 {
687     MDB_txn *txn;
688     int     status = 0;
689 
690     /*
691      * TODO: figure how we would recover a failing bulk transaction.
692      */
693     if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) != 0) {
694 	if (slmdb->assert_fn)
695 	    slmdb->assert_fn(slmdb->cb_context,
696 		     "slmdb_cursor_get: bulk transaction is not supported");
697 	return (MDB_PANIC);
698     }
699 
700     /*
701      * Open a read transaction and cursor if needed.
702      */
703     if (slmdb->cursor == 0) {
704 	if ((status = slmdb_txn_begin(slmdb, MDB_RDONLY, &txn)) != 0)
705 	    SLMDB_API_RETURN(slmdb, status);
706 	if ((status = mdb_cursor_open(txn, slmdb->dbi, &slmdb->cursor)) != 0) {
707 	    mdb_txn_abort(txn);
708 	    if ((status = slmdb_recover(slmdb, status)) == 0)
709 		status = slmdb_cursor_get(slmdb, mdb_key, mdb_value, op);
710 	    SLMDB_API_RETURN(slmdb, status);
711 	}
712 
713 	/*
714 	 * Restore the cursor position from the saved key information.
715 	 */
716 	if (HAVE_SLMDB_SAVED_KEY(slmdb) && op != MDB_FIRST)
717 	    status = mdb_cursor_get(slmdb->cursor, &slmdb->saved_key,
718 				    (MDB_val *) 0, MDB_SET);
719     }
720 
721     /*
722      * Database lookup.
723      */
724     if (status == 0)
725 	status = mdb_cursor_get(slmdb->cursor, mdb_key, mdb_value, op);
726 
727     /*
728      * Save the cursor position if successful. This can fail only with
729      * ENOMEM.
730      *
731      * Close the cursor read transaction if in MDB_NOLOCK mode, because the
732      * caller may release the external lock after we return.
733      */
734     if (status == 0) {
735 	status = slmdb_saved_key_assign(slmdb, mdb_key);
736 	if (slmdb->lmdb_flags & MDB_NOLOCK)
737 	    slmdb_cursor_close(slmdb);
738     }
739 
740     /*
741      * Handle end-of-database or other error.
742      */
743     else {
744 	/* Do not hand-optimize out the slmdb_cursor_close() calls below. */
745 	if (status == MDB_NOTFOUND) {
746 	    slmdb_cursor_close(slmdb);
747 	    if (HAVE_SLMDB_SAVED_KEY(slmdb))
748 		slmdb_saved_key_free(slmdb);
749 	} else {
750 	    slmdb_cursor_close(slmdb);
751 	    if ((status = slmdb_recover(slmdb, status)) == 0)
752 		status = slmdb_cursor_get(slmdb, mdb_key, mdb_value, op);
753 	    SLMDB_API_RETURN(slmdb, status);
754 	    /* Do not hand-optimize out the above return statement. */
755 	}
756     }
757     SLMDB_API_RETURN(slmdb, status);
758 }
759 
760 /* slmdb_assert_cb - report LMDB assertion failure */
761 
slmdb_assert_cb(MDB_env * env,const char * text)762 static void slmdb_assert_cb(MDB_env *env, const char *text)
763 {
764     SLMDB  *slmdb = (SLMDB *) mdb_env_get_userctx(env);
765 
766     if (slmdb->assert_fn)
767 	slmdb->assert_fn(slmdb->cb_context, text);
768 }
769 
770 /* slmdb_control - control optional settings */
771 
slmdb_control(SLMDB * slmdb,int first,...)772 int     slmdb_control(SLMDB *slmdb, int first,...)
773 {
774     va_list ap;
775     int     status = 0;
776     int     reqno;
777     int     rc;
778 
779     va_start(ap, first);
780     for (reqno = first; status == 0 && reqno != SLMDB_CTL_END; reqno = va_arg(ap, int)) {
781 	switch (reqno) {
782 	case SLMDB_CTL_LONGJMP_FN:
783 	    slmdb->longjmp_fn = va_arg(ap, SLMDB_LONGJMP_FN);
784 	    break;
785 	case SLMDB_CTL_NOTIFY_FN:
786 	    slmdb->notify_fn = va_arg(ap, SLMDB_NOTIFY_FN);
787 	    break;
788 	case SLMDB_CTL_ASSERT_FN:
789 	    slmdb->assert_fn = va_arg(ap, SLMDB_ASSERT_FN);
790 	    if ((rc = mdb_env_set_userctx(slmdb->env, (void *) slmdb)) != 0
791 	     || (rc = mdb_env_set_assert(slmdb->env, slmdb_assert_cb)) != 0)
792 		status = rc;
793 	    break;
794 	case SLMDB_CTL_CB_CONTEXT:
795 	    slmdb->cb_context = va_arg(ap, void *);
796 	    break;
797 	case SLMDB_CTL_API_RETRY_LIMIT:
798 	    slmdb->api_retry_limit = va_arg(ap, int);
799 	    break;
800 	case SLMDB_CTL_BULK_RETRY_LIMIT:
801 	    slmdb->bulk_retry_limit = va_arg(ap, int);
802 	    break;
803 	default:
804 	    status = errno = EINVAL;
805 	    break;
806 	}
807     }
808     va_end(ap);
809     return (status);
810 }
811 
812 /* slmdb_close - wrapper with LMDB error recovery */
813 
slmdb_close(SLMDB * slmdb)814 int     slmdb_close(SLMDB *slmdb)
815 {
816     int     status = 0;
817 
818     /*
819      * Finish an open bulk transaction. If slmdb_recover() returns after a
820      * bulk-transaction error, then it was unable to clear the error
821      * condition, or unable to restart the bulk transaction.
822      */
823     if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) != 0 && slmdb->txn != 0
824 	&& (status = mdb_txn_commit(slmdb->txn)) != 0)
825 	status = slmdb_recover(slmdb, status);
826 
827     /*
828      * Clean up after an unfinished sequence() operation.
829      */
830     if (slmdb->cursor != 0)
831 	slmdb_cursor_close(slmdb);
832 
833     mdb_env_close(slmdb->env);
834 
835     /*
836      * Clean up the saved key information.
837      */
838     if (HAVE_SLMDB_SAVED_KEY(slmdb))
839 	slmdb_saved_key_free(slmdb);
840 
841     SLMDB_API_RETURN(slmdb, status);
842 }
843 
844 /* slmdb_init - mandatory initialization */
845 
slmdb_init(SLMDB * slmdb,size_t curr_limit,int size_incr,size_t hard_limit)846 int     slmdb_init(SLMDB *slmdb, size_t curr_limit, int size_incr,
847 		           size_t hard_limit)
848 {
849 
850     /*
851      * This is a separate operation to keep the slmdb_open() API simple.
852      * Don't allocate resources here. Just store control information,
853      */
854     slmdb->curr_limit = curr_limit;
855     slmdb->size_incr = size_incr;
856     slmdb->hard_limit = hard_limit;
857 
858     return (MDB_SUCCESS);
859 }
860 
861 /* slmdb_open - open wrapped LMDB database */
862 
slmdb_open(SLMDB * slmdb,const char * path,int open_flags,int lmdb_flags,int slmdb_flags)863 int     slmdb_open(SLMDB *slmdb, const char *path, int open_flags,
864 		           int lmdb_flags, int slmdb_flags)
865 {
866     struct stat st;
867     MDB_env *env;
868     MDB_txn *txn;
869     MDB_dbi dbi;
870     int     db_fd;
871     int     status;
872 
873     /*
874      * Create LMDB environment.
875      */
876     if ((status = mdb_env_create(&env)) != 0)
877 	return (status);
878 
879     /*
880      * Make sure that the memory map has room to store and commit an initial
881      * "drop" transaction as well as fixed database metadata. We have no way
882      * to recover from errors before the first application-level I/O request.
883      */
884 #define SLMDB_FUDGE      10240
885 
886     if (slmdb->curr_limit < SLMDB_FUDGE)
887 	slmdb->curr_limit = SLMDB_FUDGE;
888     if (stat(path, &st) == 0
889 	&& st.st_size > slmdb->curr_limit - SLMDB_FUDGE) {
890 	if (st.st_size > slmdb->hard_limit)
891 	    slmdb->hard_limit = st.st_size;
892 	if (st.st_size < slmdb->hard_limit - SLMDB_FUDGE)
893 	    slmdb->curr_limit = st.st_size + SLMDB_FUDGE;
894 	else
895 	    slmdb->curr_limit = slmdb->hard_limit;
896     }
897 
898     /*
899      * mdb_open() requires a txn, but since the default DB always exists in
900      * an LMDB environment, we usually don't need to do anything else with
901      * the txn. It is currently used for truncate and for bulk transactions.
902      */
903     if ((status = mdb_env_set_mapsize(env, slmdb->curr_limit)) != 0
904 	|| (status = mdb_env_open(env, path, lmdb_flags, 0644)) != 0
905 	|| (status = mdb_txn_begin(env, (MDB_txn *) 0,
906 				   lmdb_flags & MDB_RDONLY, &txn)) != 0
907 	|| (status = mdb_open(txn, (const char *) 0, 0, &dbi)) != 0
908 	|| (status = mdb_env_get_fd(env, &db_fd)) != 0) {
909 	mdb_env_close(env);
910 	return (status);
911     }
912 
913     /*
914      * Bundle up.
915      */
916     slmdb->open_flags = open_flags;
917     slmdb->lmdb_flags = lmdb_flags;
918     slmdb->slmdb_flags = slmdb_flags;
919     slmdb->env = env;
920     slmdb->dbi = dbi;
921     slmdb->db_fd = db_fd;
922     slmdb->cursor = 0;
923     slmdb_saved_key_init(slmdb);
924     slmdb->api_retry_count = 0;
925     slmdb->bulk_retry_count = 0;
926     slmdb->api_retry_limit = SLMDB_DEF_API_RETRY_LIMIT;
927     slmdb->bulk_retry_limit = SLMDB_DEF_BULK_RETRY_LIMIT;
928     slmdb->longjmp_fn = 0;
929     slmdb->notify_fn = 0;
930     slmdb->assert_fn = 0;
931     slmdb->cb_context = 0;
932     slmdb->txn = txn;
933 
934     if ((status = slmdb_prepare(slmdb)) != 0)
935 	mdb_env_close(env);
936 
937     return (status);
938 }
939 
940 #endif
941