1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 2013 Oracle and/or its affiliates.  All rights reserved.
5  *
6  * $Id$
7  */
8 
9 #include "db_config.h"
10 
11 #include "db_int.h"
12 #include "dbinc/log.h"
13 #include "dbinc/lock.h"
14 #include "dbinc/mp.h"
15 #include "dbinc/txn.h"
16 
17 static db_size_t __mutex_align_size __P((ENV *));
18 static int __mutex_region_init __P((ENV *, DB_MUTEXMGR *));
19 static size_t __mutex_region_size __P((ENV *));
20 static size_t __mutex_region_max __P((ENV *));
21 
22 /*
23  * __mutex_open --
24  *	Open a mutex region.
25  *
26  * PUBLIC: int __mutex_open __P((ENV *, int));
27  */
28 int
__mutex_open(env,create_ok)29 __mutex_open(env, create_ok)
30 	ENV *env;
31 	int create_ok;
32 {
33 	DB_ENV *dbenv;
34 	DB_MUTEXMGR *mtxmgr;
35 	DB_MUTEXREGION *mtxregion;
36 	size_t size;
37 	u_int32_t cpu_count;
38 	int ret;
39 #ifndef HAVE_ATOMIC_SUPPORT
40 	u_int i;
41 #endif
42 
43 	dbenv = env->dbenv;
44 	if (dbenv->mutex_max == 0 &&
45 	    dbenv->mutex_cnt == 0 && dbenv->mutex_inc == 0 &&
46 	    F_ISSET(env, ENV_PRIVATE | ENV_THREAD) == ENV_PRIVATE)
47 		return (0);
48 
49 	/*
50 	 * Initialize the ENV handle information if not already initialized.
51 	 *
52 	 * Align mutexes on the byte boundaries specified by the application.
53 	 */
54 	if (dbenv->mutex_align == 0)
55 		dbenv->mutex_align = MUTEX_ALIGN;
56 	if (dbenv->mutex_tas_spins == 0) {
57 		cpu_count = __os_cpu_count();
58 		if ((ret = __mutex_set_tas_spins(dbenv, cpu_count == 1 ?
59 		    cpu_count : cpu_count * MUTEX_SPINS_PER_PROCESSOR)) != 0)
60 			return (ret);
61 	}
62 
63 	/*
64 	 * If the user didn't set an absolute value on the number of mutexes
65 	 * we'll need, figure it out.  We're conservative in our allocation,
66 	 * we need mutexes for DB handles, group-commit queues and other things
67 	 * applications allocate at run-time.  The application may have kicked
68 	 * up our count to allocate its own mutexes, add that in.
69 	 */
70 	if (dbenv->mutex_cnt == 0 &&
71 	    F_ISSET(env, ENV_PRIVATE | ENV_THREAD) != ENV_PRIVATE)
72 		dbenv->mutex_cnt =
73 		    __lock_region_mutex_count(env) +
74 		    __log_region_mutex_count(env) +
75 		    __memp_region_mutex_count(env) +
76 		    __txn_region_mutex_count(env);
77 
78 	if (dbenv->mutex_max != 0 && dbenv->mutex_cnt > dbenv->mutex_max)
79 		dbenv->mutex_cnt = dbenv->mutex_max;
80 
81 	/* Create/initialize the mutex manager structure. */
82 	if ((ret = __os_calloc(env, 1, sizeof(DB_MUTEXMGR), &mtxmgr)) != 0)
83 		return (ret);
84 
85 	/* Join/create the mutex region. */
86 	mtxmgr->reginfo.env = env;
87 	mtxmgr->reginfo.type = REGION_TYPE_MUTEX;
88 	mtxmgr->reginfo.id = INVALID_REGION_ID;
89 	mtxmgr->reginfo.flags = REGION_JOIN_OK;
90 	size = __mutex_region_size(env);
91 	if (create_ok)
92 		F_SET(&mtxmgr->reginfo, REGION_CREATE_OK);
93 	if ((ret = __env_region_attach(env,
94 	    &mtxmgr->reginfo, size, size + __mutex_region_max(env))) != 0)
95 		goto err;
96 
97 	/* If we created the region, initialize it. */
98 	if (F_ISSET(&mtxmgr->reginfo, REGION_CREATE))
99 		if ((ret = __mutex_region_init(env, mtxmgr)) != 0)
100 			goto err;
101 
102 	/* Set the local addresses. */
103 	mtxregion = mtxmgr->reginfo.primary =
104 	    R_ADDR(&mtxmgr->reginfo, mtxmgr->reginfo.rp->primary);
105 	mtxmgr->mutex_array = R_ADDR(&mtxmgr->reginfo, mtxregion->mutex_off);
106 
107 	env->mutex_handle = mtxmgr;
108 
109 #ifndef HAVE_ATOMIC_SUPPORT
110 	/* If necessary allocate the atomic emulation mutexes.  */
111 	if (F_ISSET(&mtxmgr->reginfo, REGION_CREATE))
112 		for (i = 0; i != MAX_ATOMIC_MUTEXES; i++)
113 			if ((ret = __mutex_alloc_int(
114 			    env, 0, MTX_ATOMIC_EMULATION,
115 			    0, &mtxregion->mtx_atomic[i])) != 0)
116 				return (ret);
117 #endif
118 
119 	return (0);
120 
121 err:	env->mutex_handle = NULL;
122 	if (mtxmgr->reginfo.addr != NULL)
123 		(void)__env_region_detach(env, &mtxmgr->reginfo, 0);
124 
125 	__os_free(env, mtxmgr);
126 	return (ret);
127 }
128 
129 /*
130  * __mutex_region_init --
131  *	Initialize a mutex region in shared memory.
132  */
133 static int
__mutex_region_init(env,mtxmgr)134 __mutex_region_init(env, mtxmgr)
135 	ENV *env;
136 	DB_MUTEXMGR *mtxmgr;
137 {
138 	DB_ENV *dbenv;
139 	DB_MUTEX *mutexp;
140 	DB_MUTEXREGION *mtxregion;
141 	db_mutex_t mutex;
142 	int ret;
143 	void *mutex_array;
144 
145 	dbenv = env->dbenv;
146 
147 	COMPQUIET(mutexp, NULL);
148 
149 	if ((ret = __env_alloc(&mtxmgr->reginfo,
150 	    sizeof(DB_MUTEXREGION), &mtxmgr->reginfo.primary)) != 0) {
151 		__db_errx(env, DB_STR("2013",
152 		    "Unable to allocate memory for the mutex region"));
153 		return (ret);
154 	}
155 	mtxmgr->reginfo.rp->primary =
156 	    R_OFFSET(&mtxmgr->reginfo, mtxmgr->reginfo.primary);
157 	mtxregion = mtxmgr->reginfo.primary;
158 	memset(mtxregion, 0, sizeof(*mtxregion));
159 
160 	mtxregion->mutex_size = __mutex_align_size(env);
161 
162 	mtxregion->stat.st_mutex_align = dbenv->mutex_align;
163 	if (dbenv->mutex_cnt == 0)
164 		dbenv->mutex_cnt = 1;
165 	mtxregion->stat.st_mutex_init =
166 	     mtxregion->stat.st_mutex_cnt = dbenv->mutex_cnt;
167 	mtxregion->stat.st_mutex_max = dbenv->mutex_max;
168 	if (mtxregion->stat.st_mutex_max != 0)
169 		mtxregion->stat.st_mutex_max += dbenv->mutex_inc;
170 	mtxregion->stat.st_mutex_tas_spins = dbenv->mutex_tas_spins;
171 
172 	/*
173 	 * Get a chunk of memory to be used for the mutexes themselves.  Each
174 	 * piece of the memory must be properly aligned, and that alignment
175 	 * may be more restrictive than the memory alignment returned by the
176 	 * underlying allocation code.  We already know how much memory each
177 	 * mutex in the array will take up, but we need to offset the first
178 	 * mutex in the array so the array begins properly aligned.
179 	 *
180 	 * The OOB mutex (MUTEX_INVALID) is 0.  To make this work, we ignore
181 	 * the first allocated slot when we build the free list.  We have to
182 	 * correct the count by 1 here, though, otherwise our counter will be
183 	 * off by 1.
184 	 */
185 	if ((ret = __env_alloc(&mtxmgr->reginfo,
186 	    mtxregion->stat.st_mutex_align +
187 	    (mtxregion->stat.st_mutex_cnt + 1) * mtxregion->mutex_size,
188 	    &mutex_array)) != 0) {
189 		__db_errx(env, DB_STR("2014",
190 		    "Unable to allocate memory for mutexes from the region"));
191 		return (ret);
192 	}
193 
194 	mtxregion->mutex_off_alloc = R_OFFSET(&mtxmgr->reginfo, mutex_array);
195 	mutex_array = ALIGNP_INC(mutex_array, mtxregion->stat.st_mutex_align);
196 	mtxregion->mutex_off = R_OFFSET(&mtxmgr->reginfo, mutex_array);
197 	mtxmgr->mutex_array = mutex_array;
198 
199 	/*
200 	 * Put the mutexes on a free list and clear the allocated flag.
201 	 *
202 	 * The OOB mutex (MUTEX_INVALID) is 0, skip it.
203 	 *
204 	 * The comparison is <, not <=, because we're looking ahead one
205 	 * in each link.
206 	 */
207 	env->mutex_handle = mtxmgr;
208 	if (F_ISSET(env, ENV_PRIVATE)) {
209 		mutexp = (DB_MUTEX *)mutex_array;
210 		mutexp++;
211 		mutexp = ALIGNP_INC(mutexp, mtxregion->stat.st_mutex_align);
212 		mtxregion->mutex_next = (db_mutex_t)mutexp;
213 	} else {
214 		mtxregion->mutex_next = 1;
215 		mutexp = MUTEXP_SET(env, 1);
216 	}
217 	for (mutex = 1; mutex < mtxregion->stat.st_mutex_cnt; ++mutex) {
218 		mutexp->flags = 0;
219 		if (F_ISSET(env, ENV_PRIVATE))
220 			mutexp->mutex_next_link = (db_mutex_t)(mutexp + 1);
221 		else
222 			mutexp->mutex_next_link = mutex + 1;
223 		mutexp++;
224 		mutexp = ALIGNP_INC(mutexp, mtxregion->stat.st_mutex_align);
225 	}
226 	mutexp->flags = 0;
227 	mutexp->mutex_next_link = MUTEX_INVALID;
228 	mtxregion->stat.st_mutex_free = mtxregion->stat.st_mutex_cnt;
229 	mtxregion->stat.st_mutex_inuse = mtxregion->stat.st_mutex_inuse_max = 0;
230 	if ((ret = __mutex_alloc(env, MTX_MUTEX_REGION, 0, &mutex)) != 0)
231 		return (ret);
232 	mtxmgr->reginfo.mtx_alloc = mtxregion->mtx_region = mutex;
233 
234 	/*
235 	 * This is the first place we can test mutexes and we need to
236 	 * know if they're working.  (They CAN fail, for example on
237 	 * SunOS, when using fcntl(2) for locking and using an
238 	 * in-memory filesystem as the database environment directory.
239 	 * But you knew that, I'm sure -- it probably wasn't worth
240 	 * mentioning.)
241 	 */
242 	mutex = MUTEX_INVALID;
243 	if ((ret =
244 	    __mutex_alloc(env, MTX_MUTEX_TEST, 0, &mutex) != 0) ||
245 	    (ret = __mutex_lock(env, mutex)) != 0 ||
246 	    (ret = __mutex_unlock(env, mutex)) != 0 ||
247 	    (ret = __mutex_trylock(env, mutex)) != 0 ||
248 	    (ret = __mutex_unlock(env, mutex)) != 0 ||
249 	    (ret = __mutex_free(env, &mutex)) != 0) {
250 		__db_errx(env, DB_STR("2015",
251 	    "Unable to acquire/release a mutex; check configuration"));
252 		return (ret);
253 	}
254 #ifdef HAVE_SHARED_LATCHES
255 	if ((ret =
256 	    __mutex_alloc(env,
257 		MTX_MUTEX_TEST, DB_MUTEX_SHARED, &mutex) != 0) ||
258 	    (ret = __mutex_lock(env, mutex)) != 0 ||
259 	    (ret = __mutex_tryrdlock(env, mutex)) != DB_LOCK_NOTGRANTED ||
260 	    (ret = __mutex_unlock(env, mutex)) != 0 ||
261 	    (ret = __mutex_rdlock(env, mutex)) != 0 ||
262 	    (ret = __mutex_rdlock(env, mutex)) != 0 ||
263 	    (ret = __mutex_unlock(env, mutex)) != 0 ||
264 	    (ret = __mutex_unlock(env, mutex)) != 0 ||
265 	    (ret = __mutex_free(env, &mutex)) != 0) {
266 		__db_errx(env, DB_STR("2016",
267     "Unable to acquire/release a shared latch; check configuration"));
268 		return (ret);
269 	}
270 #endif
271 
272 	return (0);
273 }
274 
275 /*
276  * __mutex_env_refresh --
277  *	Clean up after the mutex region on a close or failed open.
278  *
279  * PUBLIC: int __mutex_env_refresh __P((ENV *));
280  */
281 int
__mutex_env_refresh(env)282 __mutex_env_refresh(env)
283 	ENV *env;
284 {
285 	DB_MUTEXMGR *mtxmgr;
286 	DB_MUTEXREGION *mtxregion;
287 	REGINFO *reginfo;
288 	int ret;
289 
290 	mtxmgr = env->mutex_handle;
291 	reginfo = &mtxmgr->reginfo;
292 	mtxregion = mtxmgr->reginfo.primary;
293 
294 	/*
295 	 * If a private region, return the memory to the heap.  Not needed for
296 	 * filesystem-backed or system shared memory regions, that memory isn't
297 	 * owned by any particular process.
298 	 */
299 	if (F_ISSET(env, ENV_PRIVATE)) {
300 		reginfo->mtx_alloc = MUTEX_INVALID;
301 
302 #ifdef HAVE_MUTEX_SYSTEM_RESOURCES
303 		/*
304 		 * If destroying the mutex region, return any system resources
305 		 * to the system.
306 		 */
307 		__mutex_resource_return(env, reginfo);
308 #endif
309 		/* Discard the mutex array. */
310 		__env_alloc_free(
311 		    reginfo, R_ADDR(reginfo, mtxregion->mutex_off_alloc));
312 	}
313 
314 	/* Detach from the region. */
315 	ret = __env_region_detach(env, reginfo, 0);
316 
317 	__os_free(env, mtxmgr);
318 
319 	env->mutex_handle = NULL;
320 
321 	return (ret);
322 }
323 
324 /*
325  * __mutex_align_size --
326  *	Return how much memory each mutex will take up if an array of them
327  *	are to be properly aligned, individually, within the array.
328  */
329 static db_size_t
__mutex_align_size(env)330 __mutex_align_size(env)
331 	ENV *env;
332 {
333 	DB_ENV *dbenv;
334 
335 	dbenv = env->dbenv;
336 
337 	return ((db_size_t)DB_ALIGN(sizeof(DB_MUTEX), dbenv->mutex_align));
338 }
339 
340 /*
341  * __mutex_region_size --
342  *	 Return the amount of space needed for the mutex region.
343  */
344 static size_t
__mutex_region_size(env)345 __mutex_region_size(env)
346 	ENV *env;
347 {
348 	DB_ENV *dbenv;
349 	size_t s;
350 
351 	dbenv = env->dbenv;
352 
353 	s = sizeof(DB_MUTEXMGR) + 1024;
354 
355 	/* We discard one mutex for the OOB slot. */
356 	s += __env_alloc_size(
357 	    (dbenv->mutex_cnt + 1) *__mutex_align_size(env));
358 
359 	return (s);
360 }
361 
362 /*
363  * __mutex_region_max --
364  *	 Return the amount of space needed to reach the maximum size.
365  */
366 static size_t
__mutex_region_max(env)367 __mutex_region_max(env)
368 	ENV *env;
369 {
370 	DB_ENV *dbenv;
371 	u_int32_t max;
372 
373 	dbenv = env->dbenv;
374 
375 	if ((max = dbenv->mutex_max) == 0) {
376 		if (F_ISSET(env, ENV_PRIVATE | ENV_THREAD) == ENV_PRIVATE)
377 			max = dbenv->mutex_inc + 1;
378 		else
379 			max = __lock_region_mutex_max(env) +
380 			    __txn_region_mutex_max(env) +
381 			    __log_region_mutex_max(env) +
382 			    dbenv->mutex_inc + 100;
383 	} else if (max <= dbenv->mutex_cnt)
384 		return (0);
385 	else
386 		max -= dbenv->mutex_cnt;
387 
388 	return ( __env_alloc_size(max * __mutex_align_size(env)));
389 }
390 
391 #ifdef	HAVE_MUTEX_SYSTEM_RESOURCES
392 /*
393  * __mutex_resource_return
394  *	Return any system-allocated mutex resources to the system.
395  *
396  * PUBLIC: void __mutex_resource_return __P((ENV *, REGINFO *));
397  */
398 void
__mutex_resource_return(env,infop)399 __mutex_resource_return(env, infop)
400 	ENV *env;
401 	REGINFO *infop;
402 {
403 	DB_MUTEX *mutexp;
404 	DB_MUTEXMGR *mtxmgr, mtxmgr_st;
405 	DB_MUTEXREGION *mtxregion;
406 	db_mutex_t i, indx;
407 	void *orig_handle, *chunk;
408 	uintmax_t size;
409 
410 	/*
411 	 * This routine is called in two cases: when discarding the regions
412 	 * from a previous Berkeley DB run, during recovery, and two, when
413 	 * discarding regions as we shut down the database environment.
414 	 *
415 	 * Walk the list of mutexes and destroy any live ones.
416 	 *
417 	 * This is just like joining a region -- the REGINFO we're handed is
418 	 * the same as the one returned by __env_region_attach(), all we have
419 	 * to do is fill in the links.
420 	 *
421 	 * !!!
422 	 * The region may be corrupted, of course.  We're safe because the
423 	 * only things we look at are things that are initialized when the
424 	 * region is created, and never modified after that.
425 	 */
426 	memset(&mtxmgr_st, 0, sizeof(mtxmgr_st));
427 	mtxmgr = &mtxmgr_st;
428 	mtxmgr->reginfo = *infop;
429 	mtxregion = mtxmgr->reginfo.primary =
430 	    R_ADDR(&mtxmgr->reginfo, mtxmgr->reginfo.rp->primary);
431 	mtxmgr->mutex_array = R_ADDR(&mtxmgr->reginfo, mtxregion->mutex_off);
432 
433 	/*
434 	 * This is a little strange, but the mutex_handle is what all of the
435 	 * underlying mutex routines will use to determine if they should do
436 	 * any work and to find their information.  Save/restore the handle
437 	 * around the work loop.
438 	 *
439 	 * The OOB mutex (MUTEX_INVALID) is 0, skip it.
440 	 */
441 	orig_handle = env->mutex_handle;
442 	env->mutex_handle = mtxmgr;
443 	if (F_ISSET(env, ENV_PRIVATE)) {
444 		mutexp = (DB_MUTEX *)mtxmgr->mutex_array + 1;
445 		chunk = NULL;
446 		size = __env_elem_size(env,
447 		    (void *)mtxregion->mutex_off_alloc);
448 		size -= sizeof(*mutexp);
449 	} else
450 		mutexp = MUTEXP_SET(env, 1);
451 	for (i = 1; i <= mtxregion->stat.st_mutex_cnt; ++i) {
452 		if (F_ISSET(env, ENV_PRIVATE))
453 			indx = (db_mutex_t)mutexp;
454 		else
455 			indx = i;
456 		if (F_ISSET(mutexp, DB_MUTEX_ALLOCATED))
457 			(void)__mutex_destroy(env, indx);
458 		mutexp++;
459 		if (F_ISSET(env, ENV_PRIVATE) &&
460 		    (size -= sizeof(*mutexp)) < sizeof(*mutexp)) {
461 			mutexp = __env_get_chunk(&mtxmgr->reginfo,
462 			    &chunk, &size);
463 		}
464 		mutexp = ALIGNP_INC(mutexp, mtxregion->stat.st_mutex_align);
465 	}
466 	env->mutex_handle = orig_handle;
467 }
468 #endif
469