1 /* DO NOT EDIT: automatically built by dist/s_windows. */
2 /*-
3  * See the file LICENSE for redistribution information.
4  *
5  * Copyright (c) 1996, 2013 Oracle and/or its affiliates.  All rights reserved.
6  *
7  * $Id$
8  */
9 
10 #ifndef _DB_INT_H_
11 #define	_DB_INT_H_
12 
13 /*******************************************************
14  * Berkeley DB ANSI/POSIX include files.
15  *******************************************************/
16 #ifdef HAVE_SYSTEM_INCLUDE_FILES
17 #include <sys/types.h>
18 #ifdef DIAG_MVCC
19 #include <sys/mman.h>
20 #endif
21 #include <sys/stat.h>
22 
23 #if defined(HAVE_REPLICATION_THREADS)
24 #ifdef HAVE_SYS_SELECT_H
25 #include <sys/select.h>
26 #endif
27 #ifdef HAVE_VXWORKS
28 #include <selectLib.h>
29 #endif
30 #endif
31 
32 #if TIME_WITH_SYS_TIME
33 #include <sys/time.h>
34 #include <time.h>
35 #else
36 #if HAVE_SYS_TIME_H
37 #include <sys/time.h>
38 #else
39 #include <time.h>
40 #endif
41 #endif
42 
43 #ifdef HAVE_VXWORKS
44 #include <net/uio.h>
45 #else
46 #include <sys/uio.h>
47 #endif
48 
49 #if defined(HAVE_REPLICATION_THREADS)
50 #ifdef HAVE_SYS_SOCKET_H
51 #include <sys/socket.h>
52 #endif
53 #include <netinet/in.h>
54 #include <netdb.h>
55 #include <arpa/inet.h>
56 #endif
57 
58 #if defined(STDC_HEADERS) || defined(__cplusplus)
59 #include <stdarg.h>
60 #else
61 #include <varargs.h>
62 #endif
63 
64 #include <ctype.h>
65 #include <errno.h>
66 #include <fcntl.h>
67 #include <limits.h>
68 #include <signal.h>
69 #include <stddef.h>
70 #include <stdio.h>
71 #include <stdlib.h>
72 #include <string.h>
73 #include <unistd.h>
74 
75 #endif /* !HAVE_SYSTEM_INCLUDE_FILES */
76 
77 #ifdef DB_WIN32
78 #include "dbinc/win_db.h"
79 #endif
80 
81 #ifdef HAVE_DBM
82 #undef	DB_DBM_HSEARCH
83 #define	DB_DBM_HSEARCH 1
84 #endif
85 
86 #include "db.h"
87 #include "clib_port.h"
88 
89 #include "dbinc/queue.h"
90 #include "dbinc/shqueue.h"
91 #include "dbinc/perfmon.h"
92 
93 #if defined(__cplusplus)
94 extern "C" {
95 #endif
96 
97 /*
98  * The Windows compiler needs to be told about structures that are available
99  * outside a dll.
100  */
101 #if defined(DB_WIN32) && defined(_MSC_VER) && \
102     !defined(DB_CREATE_DLL) && !defined(_LIB)
103 #define	__DB_IMPORT __declspec(dllimport)
104 #else
105 #define	__DB_IMPORT
106 #endif
107 
108 /*******************************************************
109  * Forward structure declarations.
110  *******************************************************/
111 struct __db_commit_info; typedef struct __db_commit_info DB_COMMIT_INFO;
112 struct __db_reginfo_t;	typedef struct __db_reginfo_t REGINFO;
113 struct __db_txnhead;	typedef struct __db_txnhead DB_TXNHEAD;
114 struct __db_txnlist;	typedef struct __db_txnlist DB_TXNLIST;
115 struct __vrfy_childinfo;typedef struct __vrfy_childinfo VRFY_CHILDINFO;
116 struct __vrfy_dbinfo;   typedef struct __vrfy_dbinfo VRFY_DBINFO;
117 struct __vrfy_pageinfo; typedef struct __vrfy_pageinfo VRFY_PAGEINFO;
118 
119 struct __db_log_verify_info;
120 struct __txn_verify_info;
121 struct __lv_filereg_info;
122 struct __lv_ckp_info;
123 struct __lv_timestamp_info;
124 typedef struct __db_log_verify_info DB_LOG_VRFY_INFO;
125 typedef struct __txn_verify_info VRFY_TXN_INFO;
126 typedef struct __lv_filereg_info VRFY_FILEREG_INFO;
127 typedef struct __lv_filelife VRFY_FILELIFE;
128 typedef struct __lv_ckp_info VRFY_CKP_INFO;
129 typedef struct __lv_timestamp_info VRFY_TIMESTAMP_INFO;
130 
131 /*
132  * TXNINFO_HANDLER --
133  *	Callback function pointer type for __iterate_txninfo.
134  */
135 typedef int (*TXNINFO_HANDLER) __P((DB_LOG_VRFY_INFO *, VRFY_TXN_INFO *, void *));
136 
137 typedef SH_TAILQ_HEAD(__hash_head) DB_HASHTAB;
138 
139 /*******************************************************
140  * General purpose constants and macros.
141  *******************************************************/
142 #undef	FALSE
143 #define	FALSE		0
144 #undef	TRUE
145 #define	TRUE		(!FALSE)
146 
147 #define	MEGABYTE	1048576
148 #define	GIGABYTE	1073741824
149 
150 #define	NS_PER_MS	1000000		/* Nanoseconds in a millisecond */
151 #define	NS_PER_US	1000		/* Nanoseconds in a microsecond */
152 #define	NS_PER_SEC	1000000000	/* Nanoseconds in a second */
153 #define	US_PER_MS	1000		/* Microseconds in a millisecond */
154 #define	US_PER_SEC	1000000		/* Microseconds in a second */
155 #define	MS_PER_SEC	1000		/* Milliseconds in a second */
156 
157 #define	RECNO_OOB	0		/* Illegal record number. */
158 
159 /*
160  * Define a macro which has no runtime effect, yet avoids triggering empty
161  * statement compiler warnings. Use it as the text of conditionally-null macros.
162  */
163 #define	NOP_STATEMENT	do { } while (0)
164 
165 /* Test for a power-of-two (tests true for zero, which doesn't matter here). */
166 #define	POWER_OF_TWO(x)	(((x) & ((x) - 1)) == 0)
167 
168 /* Test for valid page sizes. */
169 #define	DB_MIN_PGSIZE	0x000200	/* Minimum page size (512). */
170 #define	DB_MAX_PGSIZE	0x010000	/* Maximum page size (65536). */
171 #define	IS_VALID_PAGESIZE(x)						\
172 	(POWER_OF_TWO(x) && (x) >= DB_MIN_PGSIZE && ((x) <= DB_MAX_PGSIZE))
173 
174 /* Minimum number of pages cached, by default. */
175 #define	DB_MINPAGECACHE	16
176 
177 /*
178  * If we are unable to determine the underlying filesystem block size, use
179  * 8K on the grounds that most OS's use less than 8K for a VM page size.
180  */
181 #define	DB_DEF_IOSIZE	(8 * 1024)
182 
183 /* Align an integer to a specific boundary. */
184 #undef	DB_ALIGN
185 #define	DB_ALIGN(v, bound)						\
186 	(((v) + (bound) - 1) & ~(((uintmax_t)(bound)) - 1))
187 
188 /* Increment a pointer to a specific boundary. */
189 #undef	ALIGNP_INC
190 #define	ALIGNP_INC(p, bound)						\
191 	(void *)(((uintptr_t)(p) + (bound) - 1) & ~(((uintptr_t)(bound)) - 1))
192 
193 /*
194  * DB_ALIGN8 adjusts structure alignments to make sure shared structures have
195  * fixed size and filed offset on both 32bit and 64bit platforms when
196  * HAVE_MIXED_SIZE_ADDRESSING is defined.
197  */
198 #ifdef HAVE_MIXED_SIZE_ADDRESSING
199 #define DB_ALIGN8 __declspec(align(8))
200 #else
201 #define DB_ALIGN8
202 #endif
203 
204 /*
205  * Berkeley DB uses the va_copy macro from C99, not all compilers include
206  * it, so add a dumb implementation compatible with pre C99 implementations.
207  */
208 #ifndef	va_copy
209 #define	va_copy(d, s)	((d) = (s))
210 #endif
211 
212 /*
213  * Print an address as a u_long (a u_long is the largest type we can print
214  * portably).  Most 64-bit systems have made longs 64-bits, so this should
215  * work.
216  */
217 #define	P_TO_ULONG(p)	((u_long)(uintptr_t)(p))
218 
219 /*
220  * Convert a pointer to an integral value.
221  *
222  * The (u_int16_t)(uintptr_t) cast avoids warnings: the (uintptr_t) cast
223  * converts the value to an integral type, and the (u_int16_t) cast converts
224  * it to a small integral type so we don't get complaints when we assign the
225  * final result to an integral type smaller than uintptr_t.
226  */
227 #define	P_TO_UINT32(p)	((u_int32_t)(uintptr_t)(p))
228 #define	P_TO_UINT16(p)	((u_int16_t)(uintptr_t)(p))
229 #define	P_TO_ROFF(p)	((roff_t)(uintptr_t)(p))
230 
231 /* The converse of P_TO_ROFF() above. */
232 #define	ROFF_TO_P(roff)	((void *)(uintptr_t)(roff))
233 
234 /*
235  * There are several on-page structures that are declared to have a number of
236  * fields followed by a variable length array of items.  The structure size
237  * without including the variable length array or the address of the first of
238  * those elements can be found using SSZ.
239  *
240  * This macro can also be used to find the offset of a structure element in a
241  * structure.  This is used in various places to copy structure elements from
242  * unaligned memory references, e.g., pointers into a packed page.
243  *
244  * There are two versions because compilers object if you take the address of
245  * an array.
246  */
247 #undef	SSZ
248 #define	SSZ(name, field)  P_TO_UINT16(&(((name *)0)->field))
249 
250 #undef	SSZA
251 #define	SSZA(name, field) P_TO_UINT16(&(((name *)0)->field[0]))
252 
253 /* Structure used to print flag values. */
254 typedef struct __fn {
255 	u_int32_t mask;			/* Flag value. */
256 	const char *name;		/* Flag name. */
257 } FN;
258 
259 /* Set, clear and test flags. */
260 #define	FLD_CLR(fld, f)		(fld) &= ~(f)
261 #define	FLD_ISSET(fld, f)	((fld) & (f))
262 #define	FLD_SET(fld, f)		(fld) |= (f)
263 #define	F_CLR(p, f)		(p)->flags &= ~(f)
264 #define	F_ISSET(p, f)		((p)->flags & (f))
265 #define	F_SET(p, f)		(p)->flags |= (f)
266 #define	F2_CLR(p, f)		((p)->flags2 &= ~(f))
267 #define	F2_ISSET(p, f)		((p)->flags2 & (f))
268 #define	F2_SET(p, f)		((p)->flags2 |= (f))
269 #define	LF_CLR(f)		((flags) &= ~(f))
270 #define	LF_ISSET(f)		((flags) & (f))
271 #define	LF_SET(f)		((flags) |= (f))
272 
273 /*
274  * Calculate a percentage.  The values can overflow 32-bit integer arithmetic
275  * so we use floating point.
276  *
277  * When calculating a bytes-vs-page size percentage, we're getting the inverse
278  * of the percentage in all cases, that is, we want 100 minus the percentage we
279  * calculate.
280  */
281 #define	DB_PCT(v, total)						\
282 	((int)((total) == 0 ? 0 : ((double)(v) * 100) / (total)))
283 #define	DB_PCT_PG(v, total, pgsize)					\
284 	((int)((total) == 0 ? 0 :					\
285 	    100 - ((double)(v) * 100) / (((double)total) * (pgsize))))
286 
287 /*
288  * Statistics update shared memory and so are expensive -- don't update the
289  * values unless we're going to display the results.
290  * When performance monitoring is enabled, the changed value can be published
291  * (via DTrace or SystemTap) along with another associated value or two.
292  */
293 #undef	STAT
294 #ifdef	HAVE_STATISTICS
295 #define	STAT(x)	x
296 #define	STAT_ADJUST(env, cat, subcat, val, amount, id)			\
297 	do {								\
298 		(val) += (amount);					\
299 		STAT_PERFMON2((env), cat, subcat, (val), (id));		\
300 	} while (0)
301 #define	STAT_ADJUST_VERB(env, cat, subcat, val, amount, id1, id2)	\
302 	do {								\
303 		(val) += (amount);					\
304 		STAT_PERFMON3((env), cat, subcat, (val), (id1), (id2));	\
305 	} while (0)
306 #define	STAT_INC(env, cat, subcat, val, id) 				\
307 	STAT_ADJUST(env, cat, subcat, (val), 1, (id))
308 #define	STAT_INC_VERB(env, cat, subcat, val, id1, id2) 			\
309 	STAT_ADJUST_VERB((env), cat, subcat, (val), 1, (id1), (id2))
310 /*
311  * STAT_DEC() subtracts one rather than adding (-1) with STAT_ADJUST(); the
312  * latter might generate a compilation warning for an unsigned value.
313  */
314 #define	STAT_DEC(env, cat, subcat, val, id) 				\
315 	do {								\
316 		(val)--;						\
317 		STAT_PERFMON2((env), cat, subcat, (val), (id));		\
318 	} while (0)
319 /* N.B.: Add a verbose version of STAT_DEC() when needed. */
320 
321 #define	STAT_SET(env, cat, subcat, val, newval, id) 			\
322 	do {								\
323 		(val) = (newval);					\
324 		STAT_PERFMON2((env), cat, subcat, (val), (id));		\
325 	} while (0)
326 #define	STAT_SET_VERB(env, cat, subcat, val, newval, id1, id2) 		\
327 	do {								\
328 		(val) = (newval);					\
329 		STAT_PERFMON3((env), cat, subcat, (val), (id1), (id2));	\
330 	} while (0)
331 #else
332 #define	STAT(x)							NOP_STATEMENT
333 #define	STAT_ADJUST(env, cat, subcat, val, amt, id)		NOP_STATEMENT
334 #define	STAT_ADJUST_VERB(env, cat, subcat, val, amt, id1, id2)	NOP_STATEMENT
335 #define	STAT_INC(env, cat, subcat, val, id)			NOP_STATEMENT
336 #define	STAT_INC_VERB(env, cat, subcat, val, id1, id2)		NOP_STATEMENT
337 #define	STAT_DEC(env, cat, subcat, val, id)			NOP_STATEMENT
338 #define	STAT_SET(env, cat, subcat, val, newval, id)		NOP_STATEMENT
339 #define	STAT_SET_VERB(env, cat, subcat, val, newval, id1, id2)	NOP_STATEMENT
340 #endif
341 
342 #if defined HAVE_SIMPLE_THREAD_TYPE
343 #define DB_THREADID_INIT(t)	COMPQUIET((t), 0)
344 #else
345 #define DB_THREADID_INIT(t)	memset(&(t), 0, sizeof(t))
346 #endif
347 
348 /*
349  * These macros are used when an error condition is first noticed. They allow
350  * one to be notified (via e.g. DTrace, SystemTap, ...) when an error occurs
351  * deep inside DB, rather than when it is returned back through the API.
352  *
353  * The second actual argument to these is the second part of the error or
354  * warning event name. They work when 'errcode' is a symbolic name e.g.
355  * EINVAL or DB_LOCK_DEALOCK, not a variable.  Noticing system call failures
356  * would be handled by tracing on syscall exit; when e.g., it returns < 0.
357  */
358 #define	ERR_ORIGIN(env, errcode)        				\
359 	(PERFMON0(env, error, errcode), errcode)
360 
361 #define	ERR_ORIGIN_MSG(env, errcode, msg)				\
362 	(PERFMON1(env, error, errcode, msg), errcode)
363 
364 #define	WARNING_ORIGIN(env, errcode)					\
365 	(PERFMON0(env, warning, errcode), errcode)
366 
367 /*
368  * Structure used for callback message aggregation.
369  *
370  * Display values in XXX_stat_print calls.
371  */
372 typedef struct __db_msgbuf {
373 	char *buf;			/* Heap allocated buffer. */
374 	char *cur;			/* Current end of message. */
375 	size_t len;			/* Allocated length of buffer. */
376 } DB_MSGBUF;
377 #define	DB_MSGBUF_INIT(a) do {						\
378 	(a)->buf = (a)->cur = NULL;					\
379 	(a)->len = 0;							\
380 } while (0)
381 #define	DB_MSGBUF_FLUSH(env, a) do {					\
382 	if ((a)->buf != NULL) {						\
383 		if ((a)->cur != (a)->buf)				\
384 			__db_msg(env, "%s", (a)->buf);			\
385 		__os_free(env, (a)->buf);				\
386 		DB_MSGBUF_INIT(a);					\
387 	}								\
388 } while (0)
389 #define	DB_MSGBUF_REP_FLUSH(env, a, diag_msg, regular_msg) do {		\
390 	if ((a)->buf != NULL) {						\
391 		if ((a)->cur != (a)->buf && diag_msg)			\
392 			__db_repmsg(env, "%s", (a)->buf);		\
393 		if (regular_msg)					\
394 			DB_MSGBUF_FLUSH(env, a);			\
395 		else {							\
396 			__os_free(env, (a)->buf);			\
397 			DB_MSGBUF_INIT(a);				\
398 		}							\
399 	}								\
400 } while (0)
401 #define	STAT_FMT(msg, fmt, type, v) do {				\
402 	DB_MSGBUF __mb;							\
403 	DB_MSGBUF_INIT(&__mb);						\
404 	__db_msgadd(env, &__mb, fmt, (type)(v));			\
405 	__db_msgadd(env, &__mb, "\t%s", msg);				\
406 	DB_MSGBUF_FLUSH(env, &__mb);					\
407 } while (0)
408 #define	STAT_HEX(msg, v)						\
409 	__db_msg(env, "%#lx\t%s", (u_long)(v), msg)
410 #define	STAT_ISSET(msg, p)						\
411 	__db_msg(env, "%sSet\t%s", (p) == NULL ? "!" : " ", msg)
412 #define	STAT_LONG(msg, v)						\
413 	__db_msg(env, "%ld\t%s", (long)(v), msg)
414 #define	STAT_LSN(msg, lsnp)						\
415 	__db_msg(env, "%lu/%lu\t%s",					\
416 	    (u_long)(lsnp)->file, (u_long)(lsnp)->offset, msg)
417 #define	STAT_POINTER(msg, v)						\
418 	__db_msg(env, "%#lx\t%s", P_TO_ULONG(v), msg)
419 #define	STAT_STRING(msg, p) do {					\
420 	const char *__p = p;	/* p may be a function call. */		\
421 	__db_msg(env, "%s\t%s", __p == NULL ? "!Set" : __p, msg);	\
422 } while (0)
423 #define	STAT_ULONG(msg, v)						\
424 	__db_msg(env, "%lu\t%s", (u_long)(v), msg)
425 
426 /*
427  * The following macros are used to control how error and message strings are
428  * output by Berkeley DB. There are essentially three different controls
429  * available:
430  *  - Default behavior is to output error strings with its unique identifier.
431  *  - If HAVE_STRIPPED_MESSAGES is enabled, a unique identifier along with any
432  *    parameters to the error string will be output.
433  *  - If HAVE_LOCALIZATION is defined, and the '_()' macro is implemented, a
434  *    gettext or ICU style translation will be done.
435  *
436  * Each new string that will be output should be wrapped in a DB_STR* macro.
437  * There are three versions of this macro for different scenarions:
438  *  - DB_STR for strings that need an identifier, and don't have any argument.
439  *  - DB_STR_A for strings that need an identifier, and have argument(s).
440  *  - DB_STR_P for strings that don't need an identifier, and don't have
441  *    arguments.
442  *
443  * Error message IDs are automatically assigned by dist/s_message_id script.
444  */
445 #ifdef HAVE_LOCALIZATION
446 #define _(msg)	msg	/* Replace with localization function. */
447 #else
448 #define _(msg)	msg
449 #endif
450 
451 #ifdef HAVE_STRIPPED_MESSAGES
452 #define DB_STR_C(msg, fmt)	fmt
453 #else
454 #define DB_STR_C(msg, fmt)	_(msg)
455 #endif
456 
457 #define DB_MSGID(id)		"BDB" id
458 
459 #define DB_STR(id, msg)		DB_MSGID(id) " " DB_STR_C(msg, "")
460 
461 #define DB_STR_A(id, msg, fmt)	DB_MSGID(id) " " DB_STR_C(msg, fmt)
462 
463 #define DB_STR_P(msg)		_(msg)
464 
465 /*
466  * There are quite a few places in Berkeley DB where we want to initialize
467  * a DBT from a string or other random pointer type, using a length typed
468  * to size_t in most cases.  This macro avoids a lot of casting.  The macro
469  * comes in two flavors because we often want to clear the DBT first.
470  */
471 #define	DB_SET_DBT(dbt, d, s)  do {					\
472 	(dbt).data = (void *)(d);					\
473 	(dbt).size = (u_int32_t)(s);					\
474 } while (0)
475 #define	DB_INIT_DBT(dbt, d, s)  do {					\
476 	memset(&(dbt), 0, sizeof(dbt));					\
477 	DB_SET_DBT(dbt, d, s);						\
478 } while (0)
479 
480 /*******************************************************
481  * API return values
482  *******************************************************/
483 /*
484  * Return values that are OK for each different call.  Most calls have a
485  * standard 'return of 0 is only OK value', but some, like db->get have
486  * DB_NOTFOUND as a return value, but it really isn't an error.
487  */
488 #define	DB_RETOK_STD(ret)	((ret) == 0)
489 #define	DB_RETOK_DBCDEL(ret)	((ret) == 0 || (ret) == DB_KEYEMPTY || \
490 				    (ret) == DB_NOTFOUND)
491 #define	DB_RETOK_DBCGET(ret)	((ret) == 0 || (ret) == DB_KEYEMPTY || \
492 				    (ret) == DB_NOTFOUND)
493 #define	DB_RETOK_DBCPUT(ret)	((ret) == 0 || (ret) == DB_KEYEXIST || \
494 				    (ret) == DB_NOTFOUND)
495 #define	DB_RETOK_DBDEL(ret)	DB_RETOK_DBCDEL(ret)
496 #define	DB_RETOK_DBGET(ret)	DB_RETOK_DBCGET(ret)
497 #define	DB_RETOK_DBPUT(ret)	((ret) == 0 || (ret) == DB_KEYEXIST)
498 #define	DB_RETOK_EXISTS(ret)	DB_RETOK_DBCGET(ret)
499 #define	DB_RETOK_LGGET(ret)	((ret) == 0 || (ret) == DB_NOTFOUND)
500 #define	DB_RETOK_MPGET(ret)	((ret) == 0 || (ret) == DB_PAGE_NOTFOUND)
501 #define	DB_RETOK_REPPMSG(ret)	((ret) == 0 || \
502 				    (ret) == DB_REP_IGNORE || \
503 				    (ret) == DB_REP_ISPERM || \
504 				    (ret) == DB_REP_NEWMASTER || \
505 				    (ret) == DB_REP_NEWSITE || \
506 				    (ret) == DB_REP_NOTPERM || \
507 				    (ret) == DB_REP_WOULDROLLBACK)
508 #define	DB_RETOK_REPMGR_LOCALSITE(ret)	((ret) == 0 || (ret) == DB_NOTFOUND)
509 #define	DB_RETOK_REPMGR_START(ret) ((ret) == 0 || (ret) == DB_REP_IGNORE)
510 #define	DB_RETOK_TXNAPPLIED(ret) ((ret) == 0 || \
511 				    (ret) == DB_NOTFOUND ||		\
512 				    (ret) == DB_TIMEOUT ||		\
513 				    (ret) == DB_KEYEMPTY)
514 
515 /* Find a reasonable operation-not-supported error. */
516 #ifdef	EOPNOTSUPP
517 #define	DB_OPNOTSUP	EOPNOTSUPP
518 #else
519 #ifdef	ENOTSUP
520 #define	DB_OPNOTSUP	ENOTSUP
521 #else
522 #define	DB_OPNOTSUP	EINVAL
523 #endif
524 #endif
525 
526 /*******************************************************
527  * Files.
528  *******************************************************/
529 /*
530  * We use 1024 as the maximum path length.  It's too hard to figure out what
531  * the real path length is, as it was traditionally stored in <sys/param.h>,
532  * and that file isn't always available.
533  */
534 #define	DB_MAXPATHLEN	1024
535 
536 #define	PATH_DOT	"."	/* Current working directory. */
537 				/* Path separator character(s). */
538 #define	PATH_SEPARATOR	"\\/:"
539 
540 /*******************************************************
541  * Environment.
542  *******************************************************/
543 /* Type passed to __db_appname(). */
544 typedef enum {
545 	DB_APP_NONE=0,			/* No type (region). */
546 	DB_APP_DATA,			/* Data file. */
547 	DB_APP_LOG,			/* Log file. */
548 	DB_APP_META,			/* Persistent metadata file. */
549 	DB_APP_RECOVER,			/* We are in recovery. */
550 	DB_APP_TMP			/* Temporary file. */
551 } APPNAME;
552 
553 /*
554  * A set of macros to check if various functionality has been configured.
555  *
556  * ALIVE_ON	The is_alive function is configured.
557  * CDB_LOCKING	CDB product locking.
558  * CRYPTO_ON	Security has been configured.
559  * LOCKING_ON	Locking has been configured.
560  * LOGGING_ON	Logging has been configured.
561  * MUTEX_ON	Mutexes have been configured.
562  * MPOOL_ON	Memory pool has been configured.
563  * REP_ON	Replication has been configured.
564  * TXN_ON	Transactions have been configured.
565  *
566  * REP_ON is more complex than most: if the BDB library was compiled without
567  * replication support, ENV->rep_handle will be NULL; if the BDB library has
568  * replication support, but it was not configured, the region reference will
569  * be NULL.
570  */
571 #define	ALIVE_ON(env)		((env)->dbenv->is_alive != NULL)
572 #define	CDB_LOCKING(env)	F_ISSET(env, ENV_CDB)
573 #define	CRYPTO_ON(env)		((env)->crypto_handle != NULL)
574 #define	LOCKING_ON(env)		((env)->lk_handle != NULL)
575 #define	LOGGING_ON(env)		((env)->lg_handle != NULL)
576 #define	MPOOL_ON(env)		((env)->mp_handle != NULL)
577 #define	MUTEX_ON(env)		((env)->mutex_handle != NULL)
578 #define	REP_ON(env)							\
579 	((env)->rep_handle != NULL && (env)->rep_handle->region != NULL)
580 #define	TXN_ON(env)		((env)->tx_handle != NULL)
581 
582 /*
583  * STD_LOCKING	Standard locking, that is, locking was configured and CDB
584  *		was not.  We do not do locking in off-page duplicate trees,
585  *		so we check for that in the cursor first.
586  */
587 #define	STD_LOCKING(dbc)						\
588 	(!F_ISSET(dbc, DBC_OPD) &&					\
589 	    !CDB_LOCKING((dbc)->env) && LOCKING_ON((dbc)->env))
590 
591 /*
592  * IS_RECOVERING: The system is running recovery.
593  */
594 #define	IS_RECOVERING(env)						\
595 	(LOGGING_ON(env) && F_ISSET((env)->lg_handle, DBLOG_RECOVER))
596 
597 /* Initialization methods are often illegal before/after open is called. */
598 #define	ENV_ILLEGAL_AFTER_OPEN(env, name)				\
599 	if (F_ISSET((env), ENV_OPEN_CALLED))				\
600 		return (__db_mi_open(env, name, 1));
601 #define	ENV_ILLEGAL_BEFORE_OPEN(env, name)				\
602 	if (!F_ISSET((env), ENV_OPEN_CALLED))				\
603 		return (__db_mi_open(env, name, 0));
604 
605 /* We're not actually user hostile, honest. */
606 #define	ENV_REQUIRES_CONFIG(env, handle, i, flags)			\
607 	if (handle == NULL)						\
608 		return (__env_not_config(env, i, flags));
609 #define	ENV_REQUIRES_CONFIG_XX(env, handle, i, flags)			\
610 	if ((env)->handle->region == NULL)				\
611 		return (__env_not_config(env, i, flags));
612 #define	ENV_NOT_CONFIGURED(env, handle, i, flags)			\
613 	if (F_ISSET((env), ENV_OPEN_CALLED))				\
614 		ENV_REQUIRES_CONFIG(env, handle, i, flags)
615 
616 #define	ENV_ENTER_RET(env, ip, ret) do {				\
617 	ret = 0;							\
618 	PANIC_CHECK_RET(env, ret);					\
619  	if (ret == 0) {							\
620 		if ((env)->thr_hashtab == NULL)				\
621 			ip = NULL;					\
622 		else 							\
623 			ret = __env_set_state(env, &(ip), THREAD_ACTIVE);\
624 	}								\
625 } while (0)
626 
627 #define	ENV_ENTER(env, ip) do {						\
628 	int __ret;							\
629 	ip = NULL;							\
630 	ENV_ENTER_RET(env, ip, __ret);					\
631 	if (__ret != 0)							\
632 		return (__ret);						\
633 } while (0)
634 
635 #define	FAILCHK_THREAD(env, ip) do {					\
636 	if ((ip) != NULL)						\
637 		(ip)->dbth_state = THREAD_FAILCHK;			\
638 } while (0)
639 
640 #define	ENV_GET_THREAD_INFO(env, ip) ENV_ENTER(env, ip)
641 
642 #ifdef DIAGNOSTIC
643 #define	ENV_LEAVE(env, ip) do {						\
644 	if ((ip) != NULL) {						\
645 		DB_ASSERT(env, ((ip)->dbth_state == THREAD_ACTIVE  ||	\
646 		    (ip)->dbth_state == THREAD_FAILCHK));		\
647 		(ip)->dbth_state = THREAD_OUT;				\
648 	}								\
649 } while (0)
650 #else
651 #define	ENV_LEAVE(env, ip) do {						\
652 	if ((ip) != NULL)						\
653 		(ip)->dbth_state = THREAD_OUT;				\
654 } while (0)
655 #endif
656 #ifdef DIAGNOSTIC
657 #define	CHECK_THREAD(env) do {						\
658 	if ((env)->thr_hashtab != NULL)					\
659 		(void)__env_set_state(env, NULL, THREAD_VERIFY);	\
660 } while (0)
661 #ifdef HAVE_STATISTICS
662 #define	CHECK_MTX_THREAD(env, mtx) do {					\
663 	if (mtx->alloc_id != MTX_MUTEX_REGION &&			\
664 	    mtx->alloc_id != MTX_ENV_REGION &&				\
665 	    mtx->alloc_id != MTX_APPLICATION)				\
666 		CHECK_THREAD(env);					\
667 } while (0)
668 #else
669 #define	CHECK_MTX_THREAD(env, mtx)	NOP_STATEMENT
670 #endif
671 #else
672 #define	CHECK_THREAD(env)		NOP_STATEMENT
673 #define	CHECK_MTX_THREAD(env, mtx)	NOP_STATEMENT
674 #endif
675 
676 typedef enum {
677 	THREAD_SLOT_NOT_IN_USE=0,
678 	THREAD_OUT,
679 	THREAD_ACTIVE,
680 	THREAD_BLOCKED,
681 	THREAD_BLOCKED_DEAD,
682 	THREAD_FAILCHK,
683 	THREAD_VERIFY
684 } DB_THREAD_STATE;
685 
686 typedef struct __pin_list {
687 	roff_t b_ref;		/* offset to buffer. */
688 	int region;		/* region containing buffer. */
689 } PIN_LIST;
690 #define	PINMAX 4
691 
692 struct __db_thread_info { /* SHARED */
693 	pid_t		dbth_pid;
694 	db_threadid_t	dbth_tid;
695 	DB_THREAD_STATE	dbth_state;
696 	SH_TAILQ_ENTRY	dbth_links;
697 	/*
698 	 * The next field contains the (process local) reference to the XA
699 	 * transaction currently associated with this thread of control.
700 	 */
701 	SH_TAILQ_HEAD(__dbth_xatxn) dbth_xatxn;
702 	u_int32_t	dbth_xa_status;
703 	/*
704 	 * The following fields track which buffers this thread of
705 	 * control has pinned in the mpool buffer cache.
706 	 */
707 	u_int16_t	dbth_pincount;	/* Number of pins for this thread. */
708 	u_int16_t	dbth_pinmax;	/* Number of slots allocated. */
709 	roff_t		dbth_pinlist;	/* List of pins. */
710 	PIN_LIST	dbth_pinarray[PINMAX];	/* Initial array of slots. */
711 #ifdef DIAGNOSTIC
712 	roff_t		dbth_locker;	/* Current locker for this thread. */
713 	u_int32_t	dbth_check_off;	/* Count of number of LOCK_OFF calls. */
714 #endif
715 };
716 #ifdef DIAGNOSTIC
717 #define LOCK_CHECK_OFF(ip) if ((ip) != NULL)				\
718 	(ip)->dbth_check_off++
719 
720 #define LOCK_CHECK_ON(ip) if ((ip) != NULL)				\
721 	(ip)->dbth_check_off--
722 
723 #define LOCK_CHECK(dbc, pgno, mode, type)				\
724 	DB_ASSERT((dbc)->dbp->env, (dbc)->locker == NULL ||		\
725 	     __db_haslock((dbc)->dbp->env,				\
726 	    (dbc)->locker, (dbc)->dbp->mpf, pgno, mode, type) == 0)
727 #else
728 #define LOCK_CHECK_OFF(ip)	NOP_STATEMENT
729 #define LOCK_CHECK_ON(ip)	NOP_STATEMENT
730 #define LOCK_CHECK(dbc, pgno, mode)	NOP_STATEMENT
731 #endif
732 
733 typedef struct __env_thread_info {
734 	u_int32_t	thr_count;
735 	u_int32_t	thr_init;
736 	u_int32_t	thr_max;
737 	u_int32_t	thr_nbucket;
738 	roff_t		thr_hashoff;
739 } THREAD_INFO;
740 
741 #define	DB_EVENT(env, e, einfo) do {					\
742 	DB_ENV *__dbenv = (env)->dbenv;					\
743 	if (__dbenv->db_event_func != NULL)				\
744 		__dbenv->db_event_func(__dbenv, e, einfo);		\
745 } while (0)
746 
747 typedef struct __flag_map {
748 	u_int32_t inflag, outflag;
749 } FLAG_MAP;
750 
751 typedef struct __db_backup_handle {
752 	int	(*open) __P((DB_ENV *, const char *, const char *, void **));
753 	int	(*write) __P((DB_ENV *,
754 		    u_int32_t, u_int32_t, u_int32_t, u_int8_t *, void *));
755 	int	(*close) __P((DB_ENV *, const char *, void *));
756 	u_int32_t	size;
757 	u_int32_t	read_count;
758 	u_int32_t	read_sleep;
759 #define	BACKUP_WRITE_DIRECT	0x0001
760 	int	flags;
761 } DB_BACKUP;
762 
763 /*
764  * Internal database environment structure.
765  *
766  * This is the private database environment handle.  The public environment
767  * handle is the DB_ENV structure.   The library owns this structure, the user
768  * owns the DB_ENV structure.  The reason there are two structures is because
769  * the user's configuration outlives any particular DB_ENV->open call, and
770  * separate structures allows us to easily discard internal information without
771  * discarding the user's configuration.
772  */
773 struct __env {
774 	DB_ENV *dbenv;			/* Linked DB_ENV structure */
775 
776 	/*
777 	 * The ENV structure can be used concurrently, so field access is
778 	 * protected.
779 	 */
780 	db_mutex_t mtx_env;		/* ENV structure mutex */
781 
782 	/*
783 	 * Some fields are included in the ENV structure rather than in the
784 	 * DB_ENV structure because they are only set as arguments to the
785 	 * DB_ENV->open method.  In other words, because of the historic API,
786 	 * not for any rational reason.
787 	 *
788 	 * Arguments to DB_ENV->open.
789 	 */
790 	char	 *db_home;		/* Database home */
791 	u_int32_t open_flags;		/* Flags */
792 	int	  db_mode;		/* Default open permissions */
793 
794 	pid_t	pid_cache;		/* Cached process ID */
795 
796 	DB_FH	*lockfhp;		/* fcntl(2) locking file handle */
797 
798 	DB_LOCKER *env_lref;		/* Locker in non-threaded handles */
799 
800 	DB_DISTAB   recover_dtab;	/* Dispatch table for recover funcs */
801 
802 	int dir_mode;			/* Intermediate directory perms. */
803 
804 #define ENV_DEF_DATA_LEN		100
805 	u_int32_t data_len;		/* Data length in __db_prbytes. */
806 
807 	/* Thread tracking */
808 	u_int32_t	 thr_nbucket;	/* Number of hash buckets */
809 	DB_HASHTAB	*thr_hashtab;	/* Hash table of DB_THREAD_INFO */
810 
811 	/*
812 	 * List of open DB handles for this ENV, used for cursor
813 	 * adjustment.  Must be protected for multi-threaded support.
814 	 */
815 	db_mutex_t mtx_dblist;
816 	int	   db_ref;		/* DB handle reference count */
817 	TAILQ_HEAD(__dblist, __db) dblist;
818 
819 	/*
820 	 * List of open file handles for this ENV.  Must be protected
821 	 * for multi-threaded support.
822 	 */
823 	TAILQ_HEAD(__fdlist, __fh_t) fdlist;
824 
825 	db_mutex_t	 mtx_mt;	/* Mersenne Twister mutex */
826 	int		 mti;		/* Mersenne Twister index */
827 	u_long		*mt;		/* Mersenne Twister state vector */
828 
829 	DB_CIPHER	*crypto_handle;	/* Crypto handle */
830 	DB_LOCKTAB	*lk_handle;	/* Lock handle */
831 	DB_LOG		*lg_handle;	/* Log handle */
832 	DB_MPOOL	*mp_handle;	/* Mpool handle */
833 	DB_MUTEXMGR	*mutex_handle;	/* Mutex handle */
834 	DB_REP		*rep_handle;	/* Replication handle */
835 	DB_TXNMGR	*tx_handle;	/* Txn handle */
836 
837 	DB_BACKUP	*backup_handle;	/* database copy configuration. */
838 
839 	/*
840 	 * XA support.
841 	 */
842 	int		 xa_rmid;	/* XA Resource Manager ID */
843 	int		 xa_ref;	/* XA Reference count */
844 	TAILQ_ENTRY(__env) links;	/* XA environments */
845 
846 	/* Application callback to copy data to/from a custom data source */
847 #define	DB_USERCOPY_GETDATA	0x0001
848 #define	DB_USERCOPY_SETDATA	0x0002
849 	int (*dbt_usercopy)
850 	    __P((DBT *, u_int32_t, void *, u_int32_t, u_int32_t));
851 
852 	int (*log_verify_wrap) __P((ENV *, const char *, u_int32_t,
853 	    const char *, const char *, time_t, time_t, u_int32_t,  u_int32_t,
854 	    u_int32_t, u_int32_t, int, int));
855 
856 	REGINFO	*reginfo;		/* REGINFO structure reference */
857 
858 #define	DB_TEST_ELECTINIT	 1	/* after __rep_elect_init */
859 #define	DB_TEST_ELECTVOTE1	 2	/* after sending VOTE1 */
860 #define	DB_TEST_NO_PAGES	 3	/* before sending PAGE */
861 #define	DB_TEST_POSTDESTROY	 4	/* after destroy op */
862 #define	DB_TEST_POSTLOG		 5	/* after logging all pages */
863 #define	DB_TEST_POSTLOGMETA	 6	/* after logging meta in btree */
864 #define	DB_TEST_POSTOPEN	 7	/* after __os_open */
865 #define	DB_TEST_POSTSYNC	 8	/* after syncing the log */
866 #define	DB_TEST_PREDESTROY	 9	/* before destroy op */
867 #define	DB_TEST_PREOPEN		 10	/* before __os_open */
868 #define	DB_TEST_REPMGR_PERM	 11	/* repmgr perm/archiving tests */
869 #define	DB_TEST_SUBDB_LOCKS	 12	/* subdb locking tests */
870 	int	test_abort;		/* Abort value for testing */
871 	int	test_check;		/* Checkpoint value for testing */
872 	int	test_copy;		/* Copy value for testing */
873 
874 #define	ENV_CDB			0x00000001 /* DB_INIT_CDB */
875 #define	ENV_DBLOCAL		0x00000002 /* Environment for a private DB */
876 #define	ENV_LITTLEENDIAN	0x00000004 /* Little endian system. */
877 #define	ENV_LOCKDOWN		0x00000008 /* DB_LOCKDOWN set */
878 #define	ENV_NO_OUTPUT_SET	0x00000010 /* No output channel set */
879 #define	ENV_OPEN_CALLED		0x00000020 /* DB_ENV->open called */
880 #define	ENV_PRIVATE		0x00000040 /* DB_PRIVATE set */
881 #define	ENV_RECOVER_FATAL	0x00000080 /* Doing fatal recovery in env */
882 #define	ENV_REF_COUNTED		0x00000100 /* Region references this handle */
883 #define	ENV_SYSTEM_MEM		0x00000200 /* DB_SYSTEM_MEM set */
884 #define	ENV_THREAD		0x00000400 /* DB_THREAD set */
885 #define ENV_FORCE_TXN_BULK	0x00000800 /* Txns use bulk mode-for testing */
886 	u_int32_t flags;
887 };
888 
889 /*******************************************************
890  * Database Access Methods.
891  *******************************************************/
892 /*
893  * DB_IS_THREADED --
894  *	The database handle is free-threaded (was opened with DB_THREAD).
895  */
896 #define	DB_IS_THREADED(dbp)						\
897 	((dbp)->mutex != MUTEX_INVALID)
898 
899 /* Initialization methods are often illegal before/after open is called. */
900 #define	DB_ILLEGAL_AFTER_OPEN(dbp, name)				\
901 	if (F_ISSET((dbp), DB_AM_OPEN_CALLED))				\
902 		return (__db_mi_open((dbp)->env, name, 1));
903 #define	DB_ILLEGAL_BEFORE_OPEN(dbp, name)				\
904 	if (!F_ISSET((dbp), DB_AM_OPEN_CALLED))				\
905 		return (__db_mi_open((dbp)->env, name, 0));
906 /* Some initialization methods are illegal if environment isn't local. */
907 #define	DB_ILLEGAL_IN_ENV(dbp, name)					\
908 	if (!F_ISSET((dbp)->env, ENV_DBLOCAL))				\
909 		return (__db_mi_env((dbp)->env, name));
910 #define	DB_ILLEGAL_METHOD(dbp, flags) {					\
911 	int __ret;							\
912 	if ((__ret = __dbh_am_chk(dbp, flags)) != 0)			\
913 		return (__ret);						\
914 }
915 
916 /*
917  * Common DBC->internal fields.  Each access method adds additional fields
918  * to this list, but the initial fields are common.
919  */
920 #define	__DBC_INTERNAL							\
921 	DBC	 *opd;			/* Off-page duplicate cursor. */\
922 	DBC	 *pdbc;			/* Pointer to parent cursor. */ \
923 									\
924 	void	 *page;			/* Referenced page. */		\
925 	u_int32_t part;			/* Partition number. */		\
926 	db_pgno_t root;			/* Tree root. */		\
927 	db_pgno_t pgno;			/* Referenced page number. */	\
928 	db_indx_t indx;			/* Referenced key item index. */\
929 									\
930 	/* Streaming -- cache last position. */				\
931 	db_pgno_t stream_start_pgno;	/* Last start pgno. */		\
932 	u_int32_t stream_off;		/* Current offset. */		\
933 	db_pgno_t stream_curr_pgno;	/* Current overflow page. */	\
934 									\
935 	DB_LOCK		lock;		/* Cursor lock. */		\
936 	db_lockmode_t	lock_mode;	/* Lock mode. */
937 
938 struct __dbc_internal {
939 	__DBC_INTERNAL
940 };
941 
942 /* Actions that __db_master_update can take. */
943 typedef enum { MU_REMOVE, MU_RENAME, MU_OPEN, MU_MOVE } mu_action;
944 
945 /*
946  * Access-method-common macro for determining whether a cursor
947  * has been initialized.
948  */
949 #ifdef HAVE_PARTITION
950 #define	IS_INITIALIZED(dbc)	(DB_IS_PARTITIONED((dbc)->dbp) ?	\
951 		((PART_CURSOR *)(dbc)->internal)->sub_cursor != NULL && \
952 		((PART_CURSOR *)(dbc)->internal)->sub_cursor->		\
953 		    internal->pgno != PGNO_INVALID :			\
954 		(dbc)->internal->pgno != PGNO_INVALID)
955 #else
956 #define	IS_INITIALIZED(dbc)	((dbc)->internal->pgno != PGNO_INVALID)
957 #endif
958 
959 /* Free the callback-allocated buffer, if necessary, hanging off of a DBT. */
960 #define	FREE_IF_NEEDED(env, dbt)					\
961 	if (F_ISSET((dbt), DB_DBT_APPMALLOC)) {				\
962 		__os_ufree((env), (dbt)->data);				\
963 		F_CLR((dbt), DB_DBT_APPMALLOC);				\
964 	}
965 
966 /*
967  * Use memory belonging to object "owner" to return the results of
968  * any no-DBT-flag get ops on cursor "dbc".
969  */
970 #define	SET_RET_MEM(dbc, owner)				\
971 	do {						\
972 		(dbc)->rskey = &(owner)->my_rskey;	\
973 		(dbc)->rkey = &(owner)->my_rkey;	\
974 		(dbc)->rdata = &(owner)->my_rdata;	\
975 	} while (0)
976 
977 /* Use the return-data memory src is currently set to use in dest as well. */
978 #define	COPY_RET_MEM(src, dest)				\
979 	do {						\
980 		(dest)->rskey = (src)->rskey;		\
981 		(dest)->rkey = (src)->rkey;		\
982 		(dest)->rdata = (src)->rdata;		\
983 	} while (0)
984 
985 /* Reset the returned-memory pointers to their defaults. */
986 #define	RESET_RET_MEM(dbc)				\
987 	do {						\
988 		(dbc)->rskey = &(dbc)->my_rskey;	\
989 		(dbc)->rkey = &(dbc)->my_rkey;		\
990 		(dbc)->rdata = &(dbc)->my_rdata;	\
991 	} while (0)
992 
993 #define	COMPACT_TRUNCATE(c_data) do {			\
994 	if (c_data->compact_truncate > 1)		\
995 		c_data->compact_truncate--;		\
996 } while (0)
997 
998 /*******************************************************
999  * Mpool.
1000  *******************************************************/
1001 /*
1002  * File types for DB access methods.  Negative numbers are reserved to DB.
1003  */
1004 #define	DB_FTYPE_SET		-1		/* Call pgin/pgout functions. */
1005 #define	DB_FTYPE_NOTSET		 0		/* Don't call... */
1006 #define	DB_LSN_OFF_NOTSET	-1		/* Not yet set. */
1007 #define	DB_CLEARLEN_NOTSET	UINT32_MAX	/* Not yet set. */
1008 
1009 /* Structure used as the DB pgin/pgout pgcookie. */
1010 typedef struct __dbpginfo {
1011 	u_int32_t db_pagesize;		/* Underlying page size. */
1012 	u_int32_t flags;		/* Some DB_AM flags needed. */
1013 	DBTYPE  type;			/* DB type */
1014 } DB_PGINFO;
1015 
1016 /*******************************************************
1017  * Log.
1018  *******************************************************/
1019 /* Initialize an LSN to 'zero'. */
1020 #define	ZERO_LSN(LSN) do {						\
1021 	(LSN).file = 0;							\
1022 	(LSN).offset = 0;						\
1023 } while (0)
1024 #define	IS_ZERO_LSN(LSN)	((LSN).file == 0 && (LSN).offset == 0)
1025 
1026 #define	IS_INIT_LSN(LSN)	((LSN).file == 1 && (LSN).offset == 0)
1027 #define	INIT_LSN(LSN)		do {					\
1028 	(LSN).file = 1;							\
1029 	(LSN).offset = 0;						\
1030 } while (0)
1031 
1032 #define	MAX_LSN(LSN) do {						\
1033 	(LSN).file = UINT32_MAX;					\
1034 	(LSN).offset = UINT32_MAX;					\
1035 } while (0)
1036 #define	IS_MAX_LSN(LSN) \
1037 	((LSN).file == UINT32_MAX && (LSN).offset == UINT32_MAX)
1038 
1039 /* If logging is turned off, smash the lsn. */
1040 #define	LSN_NOT_LOGGED(LSN) do {					\
1041 	(LSN).file = 0;							\
1042 	(LSN).offset = 1;						\
1043 } while (0)
1044 #define	IS_NOT_LOGGED_LSN(LSN) \
1045 	((LSN).file == 0 && (LSN).offset == 1)
1046 
1047 /*
1048  * LOG_COMPARE -- compare two LSNs.
1049  */
1050 
1051 #define	LOG_COMPARE(lsn0, lsn1)						\
1052 	((lsn0)->file != (lsn1)->file ?					\
1053 	((lsn0)->file < (lsn1)->file ? -1 : 1) :			\
1054 	((lsn0)->offset != (lsn1)->offset ?				\
1055 	((lsn0)->offset < (lsn1)->offset ? -1 : 1) : 0))
1056 
1057 /*******************************************************
1058  * Txn.
1059  *******************************************************/
1060 #define	DB_NONBLOCK(C)	((C)->txn != NULL && F_ISSET((C)->txn, TXN_NOWAIT))
1061 #define	NOWAIT_FLAG(txn) \
1062 	((txn) != NULL && F_ISSET((txn), TXN_NOWAIT) ? DB_LOCK_NOWAIT : 0)
1063 #define	IS_REAL_TXN(txn)						\
1064 	((txn) != NULL && !F_ISSET(txn, TXN_FAMILY))
1065 #define	IS_SUBTRANSACTION(txn)						\
1066 	((txn) != NULL && (txn)->parent != NULL)
1067 
1068 /* Checks for existence of an XA transaction in access method interfaces. */
1069 #define	XA_CHECK_TXN(ip, txn) 						\
1070 	if ((ip) != NULL && (txn) == NULL) {				\
1071 		(txn) = SH_TAILQ_FIRST(&(ip)->dbth_xatxn, __db_txn);	\
1072 		DB_ASSERT(env, txn == NULL ||				\
1073 		    txn->xa_thr_status == TXN_XA_THREAD_ASSOCIATED);	\
1074 	}
1075 
1076 /* Ensure that there is no XA transaction active. */
1077 #define	XA_NO_TXN(ip, retval) {						\
1078 	DB_TXN *__txn;							\
1079 	retval = 0;							\
1080 	if ((ip) != NULL) {						\
1081 		__txn = SH_TAILQ_FIRST(&(ip)->dbth_xatxn, __db_txn);	\
1082 		if (__txn != NULL &&					\
1083 		    __txn->xa_thr_status == TXN_XA_THREAD_ASSOCIATED)	\
1084 		    	retval = EINVAL;				\
1085 	}								\
1086 }
1087 
1088 /*******************************************************
1089  * Crypto.
1090  *******************************************************/
1091 #define	DB_IV_BYTES     16		/* Bytes per IV */
1092 #define	DB_MAC_KEY	20		/* Bytes per MAC checksum */
1093 
1094 /*******************************************************
1095  * Compression
1096  *******************************************************/
1097 #define	CMP_INT_SPARE_VAL	0xFC	/* Smallest byte value that the integer
1098 					   compression algorithm doesn't use */
1099 
1100 #if defined(__cplusplus)
1101 }
1102 #endif
1103 
1104 /*******************************************************
1105  * Remaining general DB includes.
1106  *******************************************************/
1107 
1108 
1109 #include "dbinc/globals.h"
1110 #include "dbinc/clock.h"
1111 #include "dbinc/debug.h"
1112 #include "dbinc/region.h"
1113 #include "dbinc_auto/env_ext.h"
1114 #include "dbinc/mutex.h"
1115 #ifdef HAVE_REPLICATION_THREADS
1116 #include "dbinc/repmgr.h"
1117 #endif
1118 #include "dbinc/rep.h"
1119 #include "dbinc/os.h"
1120 #include "dbinc_auto/clib_ext.h"
1121 #include "dbinc_auto/common_ext.h"
1122 
1123 /*******************************************************
1124  * Remaining Log.
1125  * These need to be defined after the general includes
1126  * because they need rep.h from above.
1127  *******************************************************/
1128 /*
1129  * Test if the environment is currently logging changes.  If we're in recovery
1130  * or we're a replication client, we don't need to log changes because they're
1131  * already in the log, even though we have a fully functional log system.
1132  */
1133 #define	DBENV_LOGGING(env)						\
1134 	(LOGGING_ON(env) && !IS_REP_CLIENT(env) && (!IS_RECOVERING(env)))
1135 
1136 /*
1137  * Test if we need to log a change.  By default, we don't log operations without
1138  * associated transactions, unless DIAGNOSTIC, DEBUG_ROP or DEBUG_WOP are on.
1139  * This is because we want to get log records for read/write operations, and, if
1140  * we are trying to debug something, more information is always better.
1141  *
1142  * The DBC_RECOVER flag is set when we're in abort, as well as during recovery;
1143  * thus DBC_LOGGING may be false for a particular dbc even when DBENV_LOGGING
1144  * is true.
1145  *
1146  * We explicitly use LOGGING_ON/IS_REP_CLIENT here because we don't want to pull
1147  * in the log headers, which IS_RECOVERING (and thus DBENV_LOGGING) rely on, and
1148  * because DBC_RECOVER should be set anytime IS_RECOVERING would be true.
1149  *
1150  * If we're not in recovery (master - doing an abort or a client applying
1151  * a txn), then a client's only path through here is on an internal
1152  * operation, and a master's only path through here is a transactional
1153  * operation.  Detect if either is not the case.
1154  */
1155 #if defined(DIAGNOSTIC) || defined(DEBUG_ROP)  || defined(DEBUG_WOP)
1156 #define	DBC_LOGGING(dbc)	__dbc_logging(dbc)
1157 #else
1158 #define	DBC_LOGGING(dbc)						\
1159 	((dbc)->txn != NULL && LOGGING_ON((dbc)->env) &&		\
1160 	    !F_ISSET((dbc), DBC_RECOVER) && !IS_REP_CLIENT((dbc)->env))
1161 #endif
1162 
1163 #endif /* !_DB_INT_H_ */
1164