1 /*
2 Unix SMB/CIFS implementation.
3
4 trivial database library
5
6 Copyright (C) Volker Lendecke 2012,2013
7 Copyright (C) Stefan Metzmacher 2013,2014
8 Copyright (C) Michael Adam 2014
9
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
12 ** under the LGPL
13
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
18
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
23
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
26 */
27 #include "tdb_private.h"
28 #include "system/threads.h"
29
30 #ifdef USE_TDB_MUTEX_LOCKING
31
32 /*
33 * If we run with mutexes, we store the "struct tdb_mutexes" at the
34 * beginning of the file. We store an additional tdb_header right
35 * beyond the mutex area, page aligned. All the offsets within the tdb
36 * are relative to the area behind the mutex area. tdb->map_ptr points
37 * behind the mmap area as well, so the read and write path in the
38 * mutex case can remain unchanged.
39 *
40 * Early in the mutex development the mutexes were placed between the hash
41 * chain pointers and the real tdb data. This had two drawbacks: First, it
42 * made pointer calculations more complex. Second, we had to mmap the mutex
43 * area twice. One was the normal map_ptr in the tdb. This frequently changed
44 * from within tdb_oob. At least the Linux glibc robust mutex code assumes
45 * constant pointers in memory, so a constantly changing mmap area destroys
46 * the mutex list. So we had to mmap the first bytes of the file with a second
47 * mmap call. With that scheme, very weird errors happened that could be
48 * easily fixed by doing the mutex mmap in a second file. It seemed that
49 * mapping the same memory area twice does not end up in accessing the same
50 * physical page, looking at the mutexes in gdb it seemed that old data showed
51 * up after some re-mapping. To avoid a separate mutex file, the code now puts
52 * the real content of the tdb file after the mutex area. This way we do not
53 * have overlapping mmap areas, the mutex area is mmapped once and not
54 * changed, the tdb data area's mmap is constantly changed but does not
55 * overlap.
56 */
57
58 struct tdb_mutexes {
59 struct tdb_header hdr;
60
61 /* protect allrecord_lock */
62 pthread_mutex_t allrecord_mutex;
63
64 /*
65 * F_UNLCK: free,
66 * F_RDLCK: shared,
67 * F_WRLCK: exclusive
68 */
69 short int allrecord_lock;
70
71 /*
72 * Index 0 is the freelist mutex, followed by
73 * one mutex per hashchain.
74 */
75 pthread_mutex_t hashchains[1];
76 };
77
tdb_have_mutexes(struct tdb_context * tdb)78 bool tdb_have_mutexes(struct tdb_context *tdb)
79 {
80 return ((tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) != 0);
81 }
82
tdb_mutex_size(struct tdb_context * tdb)83 size_t tdb_mutex_size(struct tdb_context *tdb)
84 {
85 size_t mutex_size;
86
87 if (!tdb_have_mutexes(tdb)) {
88 return 0;
89 }
90
91 mutex_size = sizeof(struct tdb_mutexes);
92 mutex_size += tdb->hash_size * sizeof(pthread_mutex_t);
93
94 return TDB_ALIGN(mutex_size, tdb->page_size);
95 }
96
97 /*
98 * Get the index for a chain mutex
99 */
tdb_mutex_index(struct tdb_context * tdb,off_t off,off_t len,unsigned * idx)100 static bool tdb_mutex_index(struct tdb_context *tdb, off_t off, off_t len,
101 unsigned *idx)
102 {
103 /*
104 * Weird but true: We fcntl lock 1 byte at an offset 4 bytes before
105 * the 4 bytes of the freelist start and the hash chain that is about
106 * to be locked. See lock_offset() where the freelist is -1 vs the
107 * "+1" in TDB_HASH_TOP(). Because the mutex array is represented in
108 * the tdb file itself as data, we need to adjust the offset here.
109 */
110 const off_t freelist_lock_ofs = FREELIST_TOP - sizeof(tdb_off_t);
111
112 if (!tdb_have_mutexes(tdb)) {
113 return false;
114 }
115 if (len != 1) {
116 /* Possibly the allrecord lock */
117 return false;
118 }
119 if (off < freelist_lock_ofs) {
120 /* One of the special locks */
121 return false;
122 }
123 if (tdb->hash_size == 0) {
124 /* tdb not initialized yet, called from tdb_open_ex() */
125 return false;
126 }
127 if (off >= TDB_DATA_START(tdb->hash_size)) {
128 /* Single record lock from traverses */
129 return false;
130 }
131
132 /*
133 * Now we know it's a freelist or hash chain lock. Those are always 4
134 * byte aligned. Paranoia check.
135 */
136 if ((off % sizeof(tdb_off_t)) != 0) {
137 abort();
138 }
139
140 /*
141 * Re-index the fcntl offset into an offset into the mutex array
142 */
143 off -= freelist_lock_ofs; /* rebase to index 0 */
144 off /= sizeof(tdb_off_t); /* 0 for freelist 1-n for hashchain */
145
146 *idx = off;
147 return true;
148 }
149
tdb_have_mutex_chainlocks(struct tdb_context * tdb)150 static bool tdb_have_mutex_chainlocks(struct tdb_context *tdb)
151 {
152 size_t i;
153
154 for (i=0; i < tdb->num_lockrecs; i++) {
155 bool ret;
156 unsigned idx;
157
158 ret = tdb_mutex_index(tdb,
159 tdb->lockrecs[i].off,
160 tdb->lockrecs[i].count,
161 &idx);
162 if (!ret) {
163 continue;
164 }
165
166 if (idx == 0) {
167 /* this is the freelist mutex */
168 continue;
169 }
170
171 return true;
172 }
173
174 return false;
175 }
176
chain_mutex_lock(pthread_mutex_t * m,bool waitflag)177 static int chain_mutex_lock(pthread_mutex_t *m, bool waitflag)
178 {
179 int ret;
180
181 if (waitflag) {
182 ret = pthread_mutex_lock(m);
183 } else {
184 ret = pthread_mutex_trylock(m);
185 }
186 if (ret != EOWNERDEAD) {
187 return ret;
188 }
189
190 /*
191 * For chainlocks, we don't do any cleanup (yet?)
192 */
193 return pthread_mutex_consistent(m);
194 }
195
allrecord_mutex_lock(struct tdb_mutexes * m,bool waitflag)196 static int allrecord_mutex_lock(struct tdb_mutexes *m, bool waitflag)
197 {
198 int ret;
199
200 if (waitflag) {
201 ret = pthread_mutex_lock(&m->allrecord_mutex);
202 } else {
203 ret = pthread_mutex_trylock(&m->allrecord_mutex);
204 }
205 if (ret != EOWNERDEAD) {
206 return ret;
207 }
208
209 /*
210 * The allrecord lock holder died. We need to reset the allrecord_lock
211 * to F_UNLCK. This should also be the indication for
212 * tdb_needs_recovery.
213 */
214 m->allrecord_lock = F_UNLCK;
215
216 return pthread_mutex_consistent(&m->allrecord_mutex);
217 }
218
tdb_mutex_lock(struct tdb_context * tdb,int rw,off_t off,off_t len,bool waitflag,int * pret)219 bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len,
220 bool waitflag, int *pret)
221 {
222 struct tdb_mutexes *m = tdb->mutexes;
223 pthread_mutex_t *chain;
224 int ret;
225 unsigned idx;
226 bool allrecord_ok;
227
228 if (!tdb_mutex_index(tdb, off, len, &idx)) {
229 return false;
230 }
231 chain = &m->hashchains[idx];
232
233 again:
234 ret = chain_mutex_lock(chain, waitflag);
235 if (ret == EBUSY) {
236 ret = EAGAIN;
237 }
238 if (ret != 0) {
239 errno = ret;
240 goto fail;
241 }
242
243 if (idx == 0) {
244 /*
245 * This is a freelist lock, which is independent to
246 * the allrecord lock. So we're done once we got the
247 * freelist mutex.
248 */
249 *pret = 0;
250 return true;
251 }
252
253 if (tdb_have_mutex_chainlocks(tdb)) {
254 /*
255 * We can only check the allrecord lock once. If we do it with
256 * one chain mutex locked, we will deadlock with the allrecord
257 * locker process in the following way: We lock the first hash
258 * chain, we check for the allrecord lock. We keep the hash
259 * chain locked. Then the allrecord locker locks the
260 * allrecord_mutex. It walks the list of chain mutexes,
261 * locking them all in sequence. Meanwhile, we have the chain
262 * mutex locked, so the allrecord locker blocks trying to lock
263 * our chain mutex. Then we come in and try to lock the second
264 * chain lock, which in most cases will be the freelist. We
265 * see that the allrecord lock is locked and put ourselves on
266 * the allrecord_mutex. This will never be signalled though
267 * because the allrecord locker waits for us to give up the
268 * chain lock.
269 */
270
271 *pret = 0;
272 return true;
273 }
274
275 /*
276 * Check if someone is has the allrecord lock: queue if so.
277 */
278
279 allrecord_ok = false;
280
281 if (m->allrecord_lock == F_UNLCK) {
282 /*
283 * allrecord lock not taken
284 */
285 allrecord_ok = true;
286 }
287
288 if ((m->allrecord_lock == F_RDLCK) && (rw == F_RDLCK)) {
289 /*
290 * allrecord shared lock taken, but we only want to read
291 */
292 allrecord_ok = true;
293 }
294
295 if (allrecord_ok) {
296 *pret = 0;
297 return true;
298 }
299
300 ret = pthread_mutex_unlock(chain);
301 if (ret != 0) {
302 TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
303 "(chain_mutex) failed: %s\n", strerror(ret)));
304 errno = ret;
305 goto fail;
306 }
307 ret = allrecord_mutex_lock(m, waitflag);
308 if (ret == EBUSY) {
309 ret = EAGAIN;
310 }
311 if (ret != 0) {
312 if (waitflag || (ret != EAGAIN)) {
313 TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_%slock"
314 "(allrecord_mutex) failed: %s\n",
315 waitflag ? "" : "try_", strerror(ret)));
316 }
317 errno = ret;
318 goto fail;
319 }
320 ret = pthread_mutex_unlock(&m->allrecord_mutex);
321 if (ret != 0) {
322 TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
323 "(allrecord_mutex) failed: %s\n", strerror(ret)));
324 errno = ret;
325 goto fail;
326 }
327 goto again;
328
329 fail:
330 *pret = -1;
331 return true;
332 }
333
tdb_mutex_unlock(struct tdb_context * tdb,int rw,off_t off,off_t len,int * pret)334 bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len,
335 int *pret)
336 {
337 struct tdb_mutexes *m = tdb->mutexes;
338 pthread_mutex_t *chain;
339 int ret;
340 unsigned idx;
341
342 if (!tdb_mutex_index(tdb, off, len, &idx)) {
343 return false;
344 }
345 chain = &m->hashchains[idx];
346
347 ret = pthread_mutex_unlock(chain);
348 if (ret == 0) {
349 *pret = 0;
350 return true;
351 }
352 errno = ret;
353 *pret = -1;
354 return true;
355 }
356
tdb_mutex_allrecord_lock(struct tdb_context * tdb,int ltype,enum tdb_lock_flags flags)357 int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
358 enum tdb_lock_flags flags)
359 {
360 struct tdb_mutexes *m = tdb->mutexes;
361 int ret;
362 uint32_t i;
363 bool waitflag = (flags & TDB_LOCK_WAIT);
364 int saved_errno;
365
366 if (tdb->flags & TDB_NOLOCK) {
367 return 0;
368 }
369
370 if (flags & TDB_LOCK_MARK_ONLY) {
371 return 0;
372 }
373
374 ret = allrecord_mutex_lock(m, waitflag);
375 if (!waitflag && (ret == EBUSY)) {
376 errno = EAGAIN;
377 tdb->ecode = TDB_ERR_LOCK;
378 return -1;
379 }
380 if (ret != 0) {
381 if (!(flags & TDB_LOCK_PROBE)) {
382 TDB_LOG((tdb, TDB_DEBUG_TRACE,
383 "allrecord_mutex_lock() failed: %s\n",
384 strerror(ret)));
385 }
386 tdb->ecode = TDB_ERR_LOCK;
387 return -1;
388 }
389
390 if (m->allrecord_lock != F_UNLCK) {
391 TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
392 (int)m->allrecord_lock));
393 goto fail_unlock_allrecord_mutex;
394 }
395 m->allrecord_lock = (ltype == F_RDLCK) ? F_RDLCK : F_WRLCK;
396
397 for (i=0; i<tdb->hash_size; i++) {
398
399 /* ignore hashchains[0], the freelist */
400 pthread_mutex_t *chain = &m->hashchains[i+1];
401
402 ret = chain_mutex_lock(chain, waitflag);
403 if (!waitflag && (ret == EBUSY)) {
404 errno = EAGAIN;
405 goto fail_unroll_allrecord_lock;
406 }
407 if (ret != 0) {
408 if (!(flags & TDB_LOCK_PROBE)) {
409 TDB_LOG((tdb, TDB_DEBUG_TRACE,
410 "chain_mutex_lock() failed: %s\n",
411 strerror(ret)));
412 }
413 errno = ret;
414 goto fail_unroll_allrecord_lock;
415 }
416
417 ret = pthread_mutex_unlock(chain);
418 if (ret != 0) {
419 TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
420 "(chainlock) failed: %s\n", strerror(ret)));
421 errno = ret;
422 goto fail_unroll_allrecord_lock;
423 }
424 }
425 /*
426 * We leave this routine with m->allrecord_mutex locked
427 */
428 return 0;
429
430 fail_unroll_allrecord_lock:
431 m->allrecord_lock = F_UNLCK;
432
433 fail_unlock_allrecord_mutex:
434 saved_errno = errno;
435 ret = pthread_mutex_unlock(&m->allrecord_mutex);
436 if (ret != 0) {
437 TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
438 "(allrecord_mutex) failed: %s\n", strerror(ret)));
439 }
440 errno = saved_errno;
441 tdb->ecode = TDB_ERR_LOCK;
442 return -1;
443 }
444
tdb_mutex_allrecord_upgrade(struct tdb_context * tdb)445 int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb)
446 {
447 struct tdb_mutexes *m = tdb->mutexes;
448 int ret;
449 uint32_t i;
450
451 if (tdb->flags & TDB_NOLOCK) {
452 return 0;
453 }
454
455 /*
456 * Our only caller tdb_allrecord_upgrade()
457 * garantees that we already own the allrecord lock.
458 *
459 * Which means m->allrecord_mutex is still locked by us.
460 */
461
462 if (m->allrecord_lock != F_RDLCK) {
463 tdb->ecode = TDB_ERR_LOCK;
464 TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
465 (int)m->allrecord_lock));
466 return -1;
467 }
468
469 m->allrecord_lock = F_WRLCK;
470
471 for (i=0; i<tdb->hash_size; i++) {
472
473 /* ignore hashchains[0], the freelist */
474 pthread_mutex_t *chain = &m->hashchains[i+1];
475
476 ret = chain_mutex_lock(chain, true);
477 if (ret != 0) {
478 TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_lock"
479 "(chainlock) failed: %s\n", strerror(ret)));
480 goto fail_unroll_allrecord_lock;
481 }
482
483 ret = pthread_mutex_unlock(chain);
484 if (ret != 0) {
485 TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
486 "(chainlock) failed: %s\n", strerror(ret)));
487 goto fail_unroll_allrecord_lock;
488 }
489 }
490
491 return 0;
492
493 fail_unroll_allrecord_lock:
494 m->allrecord_lock = F_RDLCK;
495 tdb->ecode = TDB_ERR_LOCK;
496 return -1;
497 }
498
tdb_mutex_allrecord_downgrade(struct tdb_context * tdb)499 void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb)
500 {
501 struct tdb_mutexes *m = tdb->mutexes;
502
503 /*
504 * Our only caller tdb_allrecord_upgrade() (in the error case)
505 * garantees that we already own the allrecord lock.
506 *
507 * Which means m->allrecord_mutex is still locked by us.
508 */
509
510 if (m->allrecord_lock != F_WRLCK) {
511 TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
512 (int)m->allrecord_lock));
513 return;
514 }
515
516 m->allrecord_lock = F_RDLCK;
517 return;
518 }
519
520
tdb_mutex_allrecord_unlock(struct tdb_context * tdb)521 int tdb_mutex_allrecord_unlock(struct tdb_context *tdb)
522 {
523 struct tdb_mutexes *m = tdb->mutexes;
524 short old;
525 int ret;
526
527 if (tdb->flags & TDB_NOLOCK) {
528 return 0;
529 }
530
531 /*
532 * Our only callers tdb_allrecord_unlock() and
533 * tdb_allrecord_lock() (in the error path)
534 * garantee that we already own the allrecord lock.
535 *
536 * Which means m->allrecord_mutex is still locked by us.
537 */
538
539 if ((m->allrecord_lock != F_RDLCK) && (m->allrecord_lock != F_WRLCK)) {
540 TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
541 (int)m->allrecord_lock));
542 return -1;
543 }
544
545 old = m->allrecord_lock;
546 m->allrecord_lock = F_UNLCK;
547
548 ret = pthread_mutex_unlock(&m->allrecord_mutex);
549 if (ret != 0) {
550 m->allrecord_lock = old;
551 TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
552 "(allrecord_mutex) failed: %s\n", strerror(ret)));
553 return -1;
554 }
555 return 0;
556 }
557
tdb_mutex_init(struct tdb_context * tdb)558 int tdb_mutex_init(struct tdb_context *tdb)
559 {
560 struct tdb_mutexes *m;
561 pthread_mutexattr_t ma;
562 int i, ret;
563
564 ret = tdb_mutex_mmap(tdb);
565 if (ret == -1) {
566 return -1;
567 }
568 m = tdb->mutexes;
569
570 ret = pthread_mutexattr_init(&ma);
571 if (ret != 0) {
572 goto fail_munmap;
573 }
574 ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
575 if (ret != 0) {
576 goto fail;
577 }
578 ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
579 if (ret != 0) {
580 goto fail;
581 }
582 ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
583 if (ret != 0) {
584 goto fail;
585 }
586
587 for (i=0; i<tdb->hash_size+1; i++) {
588 pthread_mutex_t *chain = &m->hashchains[i];
589
590 ret = pthread_mutex_init(chain, &ma);
591 if (ret != 0) {
592 goto fail;
593 }
594 }
595
596 m->allrecord_lock = F_UNLCK;
597
598 ret = pthread_mutex_init(&m->allrecord_mutex, &ma);
599 if (ret != 0) {
600 goto fail;
601 }
602 ret = 0;
603 fail:
604 pthread_mutexattr_destroy(&ma);
605 fail_munmap:
606
607 if (ret == 0) {
608 return 0;
609 }
610
611 tdb_mutex_munmap(tdb);
612
613 errno = ret;
614 return -1;
615 }
616
tdb_mutex_mmap(struct tdb_context * tdb)617 int tdb_mutex_mmap(struct tdb_context *tdb)
618 {
619 size_t len;
620 void *ptr;
621
622 len = tdb_mutex_size(tdb);
623 if (len == 0) {
624 return 0;
625 }
626
627 if (tdb->mutexes != NULL) {
628 return 0;
629 }
630
631 ptr = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE,
632 tdb->fd, 0);
633 if (ptr == MAP_FAILED) {
634 return -1;
635 }
636 tdb->mutexes = (struct tdb_mutexes *)ptr;
637
638 return 0;
639 }
640
tdb_mutex_munmap(struct tdb_context * tdb)641 int tdb_mutex_munmap(struct tdb_context *tdb)
642 {
643 size_t len;
644 int ret;
645
646 len = tdb_mutex_size(tdb);
647 if (len == 0) {
648 return 0;
649 }
650
651 ret = munmap(tdb->mutexes, len);
652 if (ret == -1) {
653 return -1;
654 }
655 tdb->mutexes = NULL;
656
657 return 0;
658 }
659
660 static bool tdb_mutex_locking_cached;
661
tdb_mutex_locking_supported(void)662 static bool tdb_mutex_locking_supported(void)
663 {
664 pthread_mutexattr_t ma;
665 pthread_mutex_t m;
666 int ret;
667 static bool initialized;
668
669 if (initialized) {
670 return tdb_mutex_locking_cached;
671 }
672
673 initialized = true;
674
675 ret = pthread_mutexattr_init(&ma);
676 if (ret != 0) {
677 return false;
678 }
679 ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
680 if (ret != 0) {
681 goto cleanup_ma;
682 }
683 ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
684 if (ret != 0) {
685 goto cleanup_ma;
686 }
687 ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
688 if (ret != 0) {
689 goto cleanup_ma;
690 }
691 ret = pthread_mutex_init(&m, &ma);
692 if (ret != 0) {
693 goto cleanup_ma;
694 }
695 ret = pthread_mutex_lock(&m);
696 if (ret != 0) {
697 goto cleanup_m;
698 }
699 /*
700 * This makes sure we have real mutexes
701 * from a threading library instead of just
702 * stubs from libc.
703 */
704 ret = pthread_mutex_lock(&m);
705 if (ret != EDEADLK) {
706 goto cleanup_lock;
707 }
708 ret = pthread_mutex_unlock(&m);
709 if (ret != 0) {
710 goto cleanup_m;
711 }
712
713 tdb_mutex_locking_cached = true;
714 goto cleanup_m;
715
716 cleanup_lock:
717 pthread_mutex_unlock(&m);
718 cleanup_m:
719 pthread_mutex_destroy(&m);
720 cleanup_ma:
721 pthread_mutexattr_destroy(&ma);
722 return tdb_mutex_locking_cached;
723 }
724
725 static void (*tdb_robust_mutext_old_handler)(int) = SIG_ERR;
726 static pid_t tdb_robust_mutex_pid = -1;
727
tdb_robust_mutex_setup_sigchild(void (* handler)(int),void (** p_old_handler)(int))728 static bool tdb_robust_mutex_setup_sigchild(void (*handler)(int),
729 void (**p_old_handler)(int))
730 {
731 #ifdef HAVE_SIGACTION
732 struct sigaction act;
733 struct sigaction oldact;
734
735 memset(&act, '\0', sizeof(act));
736
737 act.sa_handler = handler;
738 #ifdef SA_RESTART
739 act.sa_flags = SA_RESTART;
740 #endif
741 sigemptyset(&act.sa_mask);
742 sigaddset(&act.sa_mask, SIGCHLD);
743 sigaction(SIGCHLD, &act, &oldact);
744 if (p_old_handler) {
745 *p_old_handler = oldact.sa_handler;
746 }
747 return true;
748 #else /* !HAVE_SIGACTION */
749 return false;
750 #endif
751 }
752
tdb_robust_mutex_handler(int sig)753 static void tdb_robust_mutex_handler(int sig)
754 {
755 pid_t child_pid = tdb_robust_mutex_pid;
756
757 if (child_pid != -1) {
758 pid_t pid;
759
760 pid = waitpid(child_pid, NULL, WNOHANG);
761 if (pid == -1) {
762 switch (errno) {
763 case ECHILD:
764 tdb_robust_mutex_pid = -1;
765 return;
766
767 default:
768 return;
769 }
770 }
771 if (pid == child_pid) {
772 tdb_robust_mutex_pid = -1;
773 return;
774 }
775 }
776
777 if (tdb_robust_mutext_old_handler == SIG_DFL) {
778 return;
779 }
780 if (tdb_robust_mutext_old_handler == SIG_IGN) {
781 return;
782 }
783 if (tdb_robust_mutext_old_handler == SIG_ERR) {
784 return;
785 }
786
787 tdb_robust_mutext_old_handler(sig);
788 }
789
tdb_robust_mutex_wait_for_child(pid_t * child_pid)790 static void tdb_robust_mutex_wait_for_child(pid_t *child_pid)
791 {
792 int options = WNOHANG;
793
794 if (*child_pid == -1) {
795 return;
796 }
797
798 while (tdb_robust_mutex_pid > 0) {
799 pid_t pid;
800
801 /*
802 * First we try with WNOHANG, as the process might not exist
803 * anymore. Once we've sent SIGKILL we block waiting for the
804 * exit.
805 */
806 pid = waitpid(*child_pid, NULL, options);
807 if (pid == -1) {
808 if (errno == EINTR) {
809 continue;
810 } else if (errno == ECHILD) {
811 break;
812 } else {
813 abort();
814 }
815 }
816 if (pid == *child_pid) {
817 break;
818 }
819
820 kill(*child_pid, SIGKILL);
821 options = 0;
822 }
823
824 tdb_robust_mutex_pid = -1;
825 *child_pid = -1;
826 }
827
tdb_runtime_check_for_robust_mutexes(void)828 _PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void)
829 {
830 void *ptr = NULL;
831 pthread_mutex_t *m = NULL;
832 pthread_mutexattr_t ma;
833 int ret = 1;
834 int pipe_down[2] = { -1, -1 };
835 int pipe_up[2] = { -1, -1 };
836 ssize_t nread;
837 char c = 0;
838 bool ok;
839 static bool initialized;
840 pid_t saved_child_pid = -1;
841 bool cleanup_ma = false;
842
843 if (initialized) {
844 return tdb_mutex_locking_cached;
845 }
846
847 initialized = true;
848
849 ok = tdb_mutex_locking_supported();
850 if (!ok) {
851 return false;
852 }
853
854 tdb_mutex_locking_cached = false;
855
856 ptr = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE,
857 MAP_SHARED|MAP_ANON, -1 /* fd */, 0);
858 if (ptr == MAP_FAILED) {
859 return false;
860 }
861
862 ret = pipe(pipe_down);
863 if (ret != 0) {
864 goto cleanup;
865 }
866 ret = pipe(pipe_up);
867 if (ret != 0) {
868 goto cleanup;
869 }
870
871 ret = pthread_mutexattr_init(&ma);
872 if (ret != 0) {
873 goto cleanup;
874 }
875 cleanup_ma = true;
876 ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
877 if (ret != 0) {
878 goto cleanup;
879 }
880 ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
881 if (ret != 0) {
882 goto cleanup;
883 }
884 ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
885 if (ret != 0) {
886 goto cleanup;
887 }
888 ret = pthread_mutex_init(ptr, &ma);
889 if (ret != 0) {
890 goto cleanup;
891 }
892 m = (pthread_mutex_t *)ptr;
893
894 if (tdb_robust_mutex_setup_sigchild(tdb_robust_mutex_handler,
895 &tdb_robust_mutext_old_handler) == false) {
896 goto cleanup;
897 }
898
899 tdb_robust_mutex_pid = fork();
900 saved_child_pid = tdb_robust_mutex_pid;
901 if (tdb_robust_mutex_pid == 0) {
902 size_t nwritten;
903 close(pipe_down[1]);
904 close(pipe_up[0]);
905 ret = pthread_mutex_lock(m);
906 nwritten = write(pipe_up[1], &ret, sizeof(ret));
907 if (nwritten != sizeof(ret)) {
908 _exit(1);
909 }
910 if (ret != 0) {
911 _exit(1);
912 }
913 nread = read(pipe_down[0], &c, 1);
914 if (nread != 1) {
915 _exit(1);
916 }
917 /* leave locked */
918 _exit(0);
919 }
920 if (tdb_robust_mutex_pid == -1) {
921 goto cleanup;
922 }
923 close(pipe_down[0]);
924 pipe_down[0] = -1;
925 close(pipe_up[1]);
926 pipe_up[1] = -1;
927
928 nread = read(pipe_up[0], &ret, sizeof(ret));
929 if (nread != sizeof(ret)) {
930 goto cleanup;
931 }
932
933 ret = pthread_mutex_trylock(m);
934 if (ret != EBUSY) {
935 if (ret == 0) {
936 pthread_mutex_unlock(m);
937 }
938 goto cleanup;
939 }
940
941 if (write(pipe_down[1], &c, 1) != 1) {
942 goto cleanup;
943 }
944
945 nread = read(pipe_up[0], &c, 1);
946 if (nread != 0) {
947 goto cleanup;
948 }
949
950 tdb_robust_mutex_wait_for_child(&saved_child_pid);
951
952 ret = pthread_mutex_trylock(m);
953 if (ret != EOWNERDEAD) {
954 if (ret == 0) {
955 pthread_mutex_unlock(m);
956 }
957 goto cleanup;
958 }
959
960 ret = pthread_mutex_consistent(m);
961 if (ret != 0) {
962 goto cleanup;
963 }
964
965 ret = pthread_mutex_trylock(m);
966 if (ret != EDEADLK && ret != EBUSY) {
967 pthread_mutex_unlock(m);
968 goto cleanup;
969 }
970
971 ret = pthread_mutex_unlock(m);
972 if (ret != 0) {
973 goto cleanup;
974 }
975
976 tdb_mutex_locking_cached = true;
977
978 cleanup:
979 /*
980 * Note that we don't reset the signal handler we just reset
981 * tdb_robust_mutex_pid to -1. This is ok as this code path is only
982 * called once per process.
983 *
984 * Leaving our signal handler avoids races with other threads potentialy
985 * setting up their SIGCHLD handlers.
986 *
987 * The worst thing that can happen is that the other newer signal
988 * handler will get the SIGCHLD signal for our child and/or reap the
989 * child with a wait() function. tdb_robust_mutex_wait_for_child()
990 * handles the case where waitpid returns ECHILD.
991 */
992 tdb_robust_mutex_wait_for_child(&saved_child_pid);
993
994 if (m != NULL) {
995 pthread_mutex_destroy(m);
996 }
997 if (cleanup_ma) {
998 pthread_mutexattr_destroy(&ma);
999 }
1000 if (pipe_down[0] != -1) {
1001 close(pipe_down[0]);
1002 }
1003 if (pipe_down[1] != -1) {
1004 close(pipe_down[1]);
1005 }
1006 if (pipe_up[0] != -1) {
1007 close(pipe_up[0]);
1008 }
1009 if (pipe_up[1] != -1) {
1010 close(pipe_up[1]);
1011 }
1012 if (ptr != NULL) {
1013 munmap(ptr, sizeof(pthread_mutex_t));
1014 }
1015
1016 return tdb_mutex_locking_cached;
1017 }
1018
1019 #else
1020
tdb_mutex_size(struct tdb_context * tdb)1021 size_t tdb_mutex_size(struct tdb_context *tdb)
1022 {
1023 return 0;
1024 }
1025
tdb_have_mutexes(struct tdb_context * tdb)1026 bool tdb_have_mutexes(struct tdb_context *tdb)
1027 {
1028 return false;
1029 }
1030
tdb_mutex_allrecord_lock(struct tdb_context * tdb,int ltype,enum tdb_lock_flags flags)1031 int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
1032 enum tdb_lock_flags flags)
1033 {
1034 tdb->ecode = TDB_ERR_LOCK;
1035 return -1;
1036 }
1037
tdb_mutex_allrecord_unlock(struct tdb_context * tdb)1038 int tdb_mutex_allrecord_unlock(struct tdb_context *tdb)
1039 {
1040 return -1;
1041 }
1042
tdb_mutex_allrecord_upgrade(struct tdb_context * tdb)1043 int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb)
1044 {
1045 tdb->ecode = TDB_ERR_LOCK;
1046 return -1;
1047 }
1048
tdb_mutex_allrecord_downgrade(struct tdb_context * tdb)1049 void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb)
1050 {
1051 return;
1052 }
1053
tdb_mutex_mmap(struct tdb_context * tdb)1054 int tdb_mutex_mmap(struct tdb_context *tdb)
1055 {
1056 errno = ENOSYS;
1057 return -1;
1058 }
1059
tdb_mutex_munmap(struct tdb_context * tdb)1060 int tdb_mutex_munmap(struct tdb_context *tdb)
1061 {
1062 errno = ENOSYS;
1063 return -1;
1064 }
1065
tdb_mutex_init(struct tdb_context * tdb)1066 int tdb_mutex_init(struct tdb_context *tdb)
1067 {
1068 errno = ENOSYS;
1069 return -1;
1070 }
1071
tdb_runtime_check_for_robust_mutexes(void)1072 _PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void)
1073 {
1074 return false;
1075 }
1076
1077 #endif
1078