1 /*-
2  * Copyright (c) 2014-2018 MongoDB, Inc.
3  * Copyright (c) 2008-2014 WiredTiger, Inc.
4  *	All rights reserved.
5  *
6  * See the file LICENSE for redistribution information.
7  */
8 
9 /*
10  * Quiet compiler warnings about unused function parameters and variables,
11  * and unused function return values.
12  */
13 #define	WT_UNUSED(var)		(void)(var)
14 #define	WT_NOT_READ(v, val) do {					\
15 	(v) = (val);							\
16 	(void)(v);							\
17 } while (0);
18 #define	WT_IGNORE_RET(call) do {					\
19 	int __ignored_ret;						\
20 	__ignored_ret = (call);						\
21 	WT_UNUSED(__ignored_ret);					\
22 } while (0)
23 
24 #define	WT_DIVIDER	"=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
25 
26 /* Basic constants. */
27 #define	WT_THOUSAND	(1000)
28 #define	WT_MILLION	(1000000)
29 #define	WT_BILLION	(1000000000)
30 
31 #define	WT_MINUTE	(60)
32 
33 #define	WT_KILOBYTE	(1024)
34 #define	WT_MEGABYTE	(1048576)
35 #define	WT_GIGABYTE	(1073741824)
36 #define	WT_TERABYTE	((uint64_t)1099511627776)
37 #define	WT_PETABYTE	((uint64_t)1125899906842624)
38 #define	WT_EXABYTE	((uint64_t)1152921504606846976)
39 
40 /*
41  * Sizes that cannot be larger than 2**32 are stored in uint32_t fields in
42  * common structures to save space.  To minimize conversions from size_t to
43  * uint32_t through the code, we use the following macros.
44  */
45 #define	WT_STORE_SIZE(s)	((uint32_t)(s))
46 #define	WT_PTRDIFF(end, begin)						\
47 	((size_t)((const uint8_t *)(end) - (const uint8_t *)(begin)))
48 #define	WT_PTRDIFF32(end, begin)					\
49 	WT_STORE_SIZE(WT_PTRDIFF((end), (begin)))
50 #define	WT_BLOCK_FITS(p, len, begin, maxlen)				\
51 	((const uint8_t *)(p) >= (const uint8_t *)(begin) &&		\
52 	((const uint8_t *)(p) + (len) <= (const uint8_t *)(begin) + (maxlen)))
53 #define	WT_PTR_IN_RANGE(p, begin, maxlen)				\
54 	WT_BLOCK_FITS((p), 1, (begin), (maxlen))
55 
56 /*
57  * Align an unsigned value of any type to a specified power-of-2, including the
58  * offset result of a pointer subtraction; do the calculation using the largest
59  * unsigned integer type available.
60  */
61 #define	WT_ALIGN(n, v)							\
62 	((((uintmax_t)(n)) + ((v) - 1)) & ~(((uintmax_t)(v)) - 1))
63 
64 #define	WT_ALIGN_NEAREST(n, v)						\
65 	((((uintmax_t)(n)) + ((v) / 2)) & ~(((uintmax_t)(v)) - 1))
66 
67 /* Min, max. */
68 #define	WT_MIN(a, b)	((a) < (b) ? (a) : (b))
69 #define	WT_MAX(a, b)	((a) < (b) ? (b) : (a))
70 
71 /* Elements in an array. */
72 #define	WT_ELEMENTS(a)	(sizeof(a) / sizeof((a)[0]))
73 
74 /* 10 level skip lists, 1/4 have a link to the next element. */
75 #define	WT_SKIP_MAXDEPTH	10
76 #define	WT_SKIP_PROBABILITY	(UINT32_MAX >> 2)
77 
78 /*
79  * Encryption needs to know its original length before either the
80  * block or logging subsystems pad.  Constant value.
81  */
82 #define	WT_ENCRYPT_LEN_SIZE	sizeof(uint32_t)
83 
84 /*
85  * Default hash table size; we don't need a prime number of buckets
86  * because we always use a good hash function.
87  */
88 #define	WT_HASH_ARRAY_SIZE	512
89 
90 /*
91  * __wt_calloc_def, __wt_calloc_one --
92  *	Most calloc calls don't need separate count or sizeof arguments.
93  */
94 #define	__wt_calloc_def(session, number, addr)				\
95 	__wt_calloc(session, (size_t)(number), sizeof(**(addr)), addr)
96 #define	__wt_calloc_one(session, addr)					\
97 	__wt_calloc(session, (size_t)1, sizeof(**(addr)), addr)
98 
99 /*
100  * __wt_realloc_def --
101  *	Common case allocate-and-grow function.
102  *	Starts by allocating the requested number of items (at least 10), then
103  *	doubles each time the list needs to grow.
104  */
105 #define	__wt_realloc_def(session, sizep, number, addr)			\
106 	(((number) * sizeof(**(addr)) <= *(sizep)) ? 0 :		\
107 	    __wt_realloc(session, sizep, WT_MAX(*(sizep) * 2,		\
108 		WT_MAX(10, (number)) * sizeof(**(addr))), addr))
109 /*
110  * Our internal free function clears the underlying address atomically so there
111  * is a smaller chance of racing threads seeing intermediate results while a
112  * structure is being free'd.	(That would be a bug, of course, but I'd rather
113  * not drop core, just the same.)  That's a non-standard "free" API, and the
114  * resulting bug is a mother to find -- make sure we get it right, don't make
115  * the caller remember to put the & operator on the pointer.
116  */
117 #define	__wt_free(session, p) do {					\
118 	void *__p = &(p);						\
119 	if (*(void **)__p != NULL)					\
120 		__wt_free_int(session, __p);				\
121 } while (0)
122 #ifdef HAVE_DIAGNOSTIC
123 #define	__wt_overwrite_and_free(session, p) do {			\
124 	memset(p, WT_DEBUG_BYTE, sizeof(*(p)));				\
125 	__wt_free(session, p);						\
126 } while (0)
127 #define	__wt_overwrite_and_free_len(session, p, len) do {		\
128 	memset(p, WT_DEBUG_BYTE, len);					\
129 	__wt_free(session, p);						\
130 } while (0)
131 #else
132 #define	__wt_overwrite_and_free(session, p)		__wt_free(session, p)
133 #define	__wt_overwrite_and_free_len(session, p, len)	__wt_free(session, p)
134 #endif
135 
136 /*
137  * Flag set, clear and test.
138  *
139  * They come in 3 flavors: F_XXX (handles a field named "flags" in the structure
140  * referenced by its argument), LF_XXX (handles a local variable named "flags"),
141  * and FLD_XXX (handles any variable, anywhere).
142  *
143  * Flags are unsigned 32-bit values -- we cast to keep the compiler quiet (the
144  * hex constant might be a negative integer), and to ensure the hex constant is
145  * the correct size before applying the bitwise not operator.
146  */
147 #define	FLD_CLR(field, mask)	        ((void)((field) &= ~(mask)))
148 #define	FLD_MASK(field, mask)	        ((field) & (mask))
149 #define	FLD_ISSET(field, mask)	        (FLD_MASK(field, mask) != 0)
150 #define	FLD_SET(field, mask)	        ((void)((field) |= (mask)))
151 
152 #define	F_CLR(p, mask)		        FLD_CLR((p)->flags, mask)
153 #define	F_ISSET(p, mask)	        FLD_ISSET((p)->flags, mask)
154 #define	F_MASK(p, mask)	                FLD_MASK((p)->flags, mask)
155 #define	F_SET(p, mask)		        FLD_SET((p)->flags, mask)
156 
157 #define	LF_CLR(mask)		        FLD_CLR(flags, mask)
158 #define	LF_ISSET(mask)		        FLD_ISSET(flags, mask)
159 #define	LF_MASK(mask)		        FLD_MASK(flags, mask)
160 #define	LF_SET(mask)		        FLD_SET(flags, mask)
161 
162 /*
163  * Insertion sort, for sorting small sets of values.
164  *
165  * The "compare_lt" argument is a function or macro that returns true when
166  * its first argument is less than its second argument.
167  */
168 #define	WT_INSERTION_SORT(arrayp, n, value_type, compare_lt) do {	\
169 	value_type __v;							\
170 	int __i, __j, __n = (int)(n);					\
171 	if (__n == 2) {							\
172 		__v = (arrayp)[1];					\
173 		if (compare_lt(__v, (arrayp)[0])) {			\
174 			(arrayp)[1] = (arrayp)[0];			\
175 			(arrayp)[0] = __v;				\
176 		}							\
177 	}								\
178 	if (__n > 2) {							\
179 		for (__i = 1; __i < __n; ++__i) {			\
180 			__v = (arrayp)[__i];				\
181 			for (__j = __i - 1; __j >= 0 &&			\
182 			    compare_lt(__v, (arrayp)[__j]); --__j)	\
183 				(arrayp)[__j + 1] = (arrayp)[__j];	\
184 			(arrayp)[__j + 1] = __v;			\
185 		}							\
186 	}								\
187 } while (0)
188 
189 /*
190  * Some C compiler address sanitizers complain if qsort is passed a NULL base
191  * reference, even if there are no elements to compare (note zero elements is
192  * allowed by the IEEE Std 1003.1-2017 standard). Avoid the complaint.
193  */
194 #define	__wt_qsort(base, nmemb, size, compar)				\
195 	if ((nmemb) != 0)						\
196 		qsort(base, nmemb, size, compar)
197 
198 /*
199  * Binary search for an integer key.
200  */
201 #define	WT_BINARY_SEARCH(key, arrayp, n, found) do {			\
202 	uint32_t __base, __indx, __limit;				\
203 	(found) = false;						\
204 	for (__base = 0, __limit = (n); __limit != 0; __limit >>= 1) {	\
205 		__indx = __base + (__limit >> 1);			\
206 		if ((arrayp)[__indx] < (key)) {				\
207 			__base = __indx + 1;				\
208 			--__limit;					\
209 		} else if ((arrayp)[__indx] == (key)) {			\
210 			(found) = true;					\
211 			break;						\
212 		}							\
213 	}								\
214 } while (0)
215 
216 /* Verbose messages. */
217 #define	WT_VERBOSE_ISSET(session, f)					\
218 	(FLD_ISSET(S2C(session)->verbose, f))
219 
220 #define	WT_CLEAR(s)							\
221 	memset(&(s), 0, sizeof(s))
222 
223 /* Check if a string matches a prefix. */
224 #define	WT_PREFIX_MATCH(str, pfx)					\
225 	(((const char *)(str))[0] == ((const char *)(pfx))[0] &&	\
226 	    strncmp(str, pfx, strlen(pfx)) == 0)
227 
228 /* Check if a string matches a prefix, and move past it. */
229 #define	WT_PREFIX_SKIP(str, pfx)					\
230 	(WT_PREFIX_MATCH(str, pfx) ? ((str) += strlen(pfx), 1) : 0)
231 
232 /* Assert that a string matches a prefix, and move past it. */
233 #define	WT_PREFIX_SKIP_REQUIRED(session, str, pfx) do {			\
234 	WT_ASSERT(session, WT_PREFIX_MATCH(str, pfx));			\
235 	(str) += strlen(pfx);						\
236 } while (0)
237 
238 /*
239  * Check if a variable string equals a constant string. Inline the common case
240  * for WiredTiger of a single byte string. This is required because not all
241  * compilers optimize this case in strcmp (e.g., clang). While this macro works
242  * in the case of comparing two pointers (a sizeof operator on a pointer won't
243  * equal 2 and the extra code will be discarded at compile time), that's not its
244  * purpose.
245  */
246 #define	WT_STREQ(s, cs)							\
247 	(sizeof(cs) == 2 ? (s)[0] == (cs)[0] && (s)[1] == '\0' :	\
248 	strcmp(s, cs) == 0)
249 
250 /* Check if a string matches a byte string of len bytes. */
251 #define	WT_STRING_MATCH(str, bytes, len)				\
252 	(((const char *)(str))[0] == ((const char *)(bytes))[0] &&	\
253 	    strncmp(str, bytes, len) == 0 && (str)[len] == '\0')
254 
255 /*
256  * Macro that produces a string literal that isn't wrapped in quotes, to avoid
257  * tripping up spell checkers.
258  */
259 #define	WT_UNCHECKED_STRING(str) #str
260 
261 /* Function return value and scratch buffer declaration and initialization. */
262 #define	WT_DECL_ITEM(i)	WT_ITEM *i = NULL
263 #define	WT_DECL_RET	int ret = 0
264 
265 /* If a WT_ITEM data field points somewhere in its allocated memory. */
266 #define	WT_DATA_IN_ITEM(i)						\
267 	((i)->mem != NULL && (i)->data >= (i)->mem &&			\
268 	    WT_PTRDIFF((i)->data, (i)->mem) < (i)->memsize)
269 
270 /* Copy the data and size fields of an item. */
271 #define	WT_ITEM_SET(dst, src) do {					\
272 	(dst).data = (src).data;					\
273 	(dst).size = (src).size;					\
274 } while (0)
275 
276 /* Timestamp type and helper macros. */
277 #if WT_TIMESTAMP_SIZE > 0
278 #define	HAVE_TIMESTAMPS
279 #else
280 #undef	HAVE_TIMESTAMPS
281 #endif
282 
283 #ifdef HAVE_TIMESTAMPS
284 struct __wt_timestamp_t {
285 #if WT_TIMESTAMP_SIZE == 8
286 	uint64_t val;
287 #else
288 	uint8_t ts[WT_TIMESTAMP_SIZE];
289 #endif
290 };
291 typedef struct __wt_timestamp_t wt_timestamp_t;
292 #define	WT_DECL_TIMESTAMP(x)	wt_timestamp_t x;
293 #define	WT_TIMESTAMP_NULL(x)	(x)
294 #else
295 typedef void wt_timestamp_t;
296 #define	WT_DECL_TIMESTAMP(x)
297 #define	WT_TIMESTAMP_NULL(x)	(NULL)
298 #endif
299 
300 /*
301  * In diagnostic mode we track the locations from which hazard pointers and
302  * scratch buffers were acquired.
303  */
304 #ifdef HAVE_DIAGNOSTIC
305 #define	__wt_scr_alloc(session, size, scratchp)				\
306 	__wt_scr_alloc_func(session, size, scratchp, __func__, __LINE__)
307 #define	__wt_page_in(session, ref, flags)				\
308 	__wt_page_in_func(session, ref, flags, __func__, __LINE__)
309 #define	__wt_page_swap(session, held, want, flags)			\
310 	__wt_page_swap_func(session, held, want, flags, __func__, __LINE__)
311 #else
312 #define	__wt_scr_alloc(session, size, scratchp)				\
313 	__wt_scr_alloc_func(session, size, scratchp)
314 #define	__wt_page_in(session, ref, flags)				\
315 	__wt_page_in_func(session, ref, flags)
316 #define	__wt_page_swap(session, held, want, flags)			\
317 	__wt_page_swap_func(session, held, want, flags)
318 #endif
319 
320 /* Random number generator state. */
321 union __wt_rand_state {
322 	uint64_t v;
323 	struct {
324 		uint32_t w, z;
325 	} x;
326 };
327 
328 /*
329  * WT_TAILQ_SAFE_REMOVE_BEGIN/END --
330  *	Macro to safely walk a TAILQ where we're expecting some underlying
331  * function to remove elements from the list, but we don't want to stop on
332  * error, nor do we want an error to turn into an infinite loop. Used during
333  * shutdown, when we're shutting down various lists. Unlike TAILQ_FOREACH_SAFE,
334  * this macro works even when the next element gets removed along with the
335  * current one.
336  */
337 #define	WT_TAILQ_SAFE_REMOVE_BEGIN(var, head, field, tvar)		\
338 	for ((tvar) = NULL; ((var) = TAILQ_FIRST(head)) != NULL;	\
339 	    (tvar) = (var)) {						\
340 		if ((tvar) == (var)) {					\
341 			/* Leak the structure. */			\
342 			TAILQ_REMOVE(head, (var), field);		\
343 			continue;					\
344 		}
345 #define	WT_TAILQ_SAFE_REMOVE_END }
346 
347 /*
348  * WT_VA_ARGS_BUF_FORMAT --
349  *	Format into a scratch buffer, extending it as necessary. This is a
350  * macro because we need to repeatedly call va_start/va_end and there's no
351  * way to do that inside a function call.
352  */
353 #define	WT_VA_ARGS_BUF_FORMAT(session, buf, fmt, concatenate) do {	\
354 	size_t __len, __space;						\
355 	va_list __ap;							\
356 	int __ret_xx;		/* __ret already used by WT_RET */	\
357 	char *__p;							\
358 									\
359 	/*								\
360 	 * This macro is used to both initialize and concatenate into a	\
361 	 * buffer. If not concatenating, clear the size so we don't use	\
362 	 * any existing contents.					\
363 	 */								\
364 	if (!(concatenate))						\
365 		(buf)->size = 0;					\
366 	for (;;) {							\
367 		WT_ASSERT(session, (buf)->memsize >= (buf)->size);	\
368 		__p = (char *)((uint8_t *)(buf)->mem + (buf)->size);	\
369 		__space = (buf)->memsize - (buf)->size;			\
370 									\
371 		/* Format into the buffer. */				\
372 		va_start(__ap, fmt);					\
373 		__ret_xx = __wt_vsnprintf_len_set(			\
374 		    __p, __space, &__len, fmt, __ap);			\
375 		va_end(__ap);						\
376 		WT_RET(__ret_xx);					\
377 									\
378 		/* Check if there was enough space. */			\
379 		if (__len < __space) {					\
380 			(buf)->data = (buf)->mem;			\
381 			(buf)->size += __len;				\
382 			break;						\
383 		}							\
384 									\
385 		/*							\
386 		 * If not, double the size of the buffer: we're dealing	\
387 		 * with strings, we don't expect the size to get huge.	\
388 		 */							\
389 		WT_RET(__wt_buf_extend(					\
390 		    session, buf, (buf)->size + __len + 1));		\
391 	}								\
392 } while (0)
393 
394 /*
395  * HAVE_LONG_RUNNING_PREPARE
396  * 	To enable functionality of evicting prepared transactions using
397  * cache overflow mechanism.
398  */
399 #undef	HAVE_LONG_RUNNING_PREPARE
400