1 /**
2  * testcode/checklocks.c - wrapper on locks that checks access.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 #include "config.h"
37 #include <signal.h>
38 #include "util/locks.h"   /* include before checklocks.h */
39 #include "testcode/checklocks.h"
40 
41 /**
42  * \file
43  * Locks that are checked.
44  *
45  * Ugly hack: uses the fact that workers start with an int thread_num, and
46  * are passed to thread_create to make the thread numbers here the same as
47  * those used for logging which is nice.
48  *
49  * Todo:
50  *	 - debug status print, of thread lock stacks, and current waiting.
51  */
52 #ifdef USE_THREAD_DEBUG
53 
54 /** How long to wait before lock attempt is a failure. */
55 #define CHECK_LOCK_TIMEOUT 120 /* seconds */
56 /** How long to wait before join attempt is a failure. */
57 #define CHECK_JOIN_TIMEOUT 120 /* seconds */
58 
59 /** if key has been created */
60 static int key_created = 0;
61 /** if the key was deleted, i.e. we have quit */
62 static int key_deleted = 0;
63 /** we hide the thread debug info with this key. */
64 static ub_thread_key_type thr_debug_key;
65 /** the list of threads, so all threads can be examined. NULL if unused. */
66 static struct thr_check* thread_infos[THRDEBUG_MAX_THREADS];
67 /** do we check locking order */
68 int check_locking_order = 1;
69 /** the pid of this runset, reasonably unique. */
70 static pid_t check_lock_pid;
71 
72 /** print all possible debug info on the state of the system */
73 static void total_debug_info(void);
74 /** print pretty lock error and exit (decl for NORETURN attribute) */
75 static void lock_error(struct checked_lock* lock, const char* func,
76 	const char* file, int line, const char* err) ATTR_NORETURN;
77 
78 /** print pretty lock error and exit */
lock_error(struct checked_lock * lock,const char * func,const char * file,int line,const char * err)79 static void lock_error(struct checked_lock* lock,
80 	const char* func, const char* file, int line, const char* err)
81 {
82 	log_err("lock error (description follows)");
83 	log_err("Created at %s %s:%d", lock->create_func,
84 		lock->create_file, lock->create_line);
85 	if(lock->holder_func && lock->holder_file)
86 		log_err("Previously %s %s:%d", lock->holder_func,
87 			lock->holder_file, lock->holder_line);
88 	log_err("At %s %s:%d", func, file, line);
89 	log_err("Error for %s lock: %s",
90 		(lock->type==check_lock_mutex)?"mutex": (
91 		(lock->type==check_lock_spinlock)?"spinlock": (
92 		(lock->type==check_lock_rwlock)?"rwlock": "badtype")), err);
93 	log_err("complete status display:");
94 	total_debug_info();
95 	fatal_exit("bailing out");
96 }
97 
98 /**
99  * Obtain lock on debug lock structure. This could be a deadlock by the caller.
100  * The debug code itself does not deadlock. Anyway, check with timeouts.
101  * @param lock: on what to acquire lock.
102  * @param func: user level caller identification.
103  * @param file: user level caller identification.
104  * @param line: user level caller identification.
105  */
106 static void
acquire_locklock(struct checked_lock * lock,const char * func,const char * file,int line)107 acquire_locklock(struct checked_lock* lock,
108 	const char* func, const char* file, int line)
109 {
110 	struct timespec to;
111 	int err;
112 	int contend = 0;
113 	/* first try; inc contention counter if not immediately */
114 	if((err = pthread_mutex_trylock(&lock->lock))) {
115 		if(err==EBUSY)
116 			contend++;
117 		else fatal_exit("error in mutex_trylock: %s", strerror(err));
118 	}
119 	if(!err)
120 		return; /* immediate success */
121 	to.tv_sec = time(NULL) + CHECK_LOCK_TIMEOUT;
122 	to.tv_nsec = 0;
123 	err = pthread_mutex_timedlock(&lock->lock, &to);
124 	if(err) {
125 		log_err("in acquiring locklock: %s", strerror(err));
126 		lock_error(lock, func, file, line, "acquire locklock");
127 	}
128 	/* since we hold the lock, we can edit the contention_count */
129 	lock->contention_count += contend;
130 }
131 
132 /** add protected region */
133 void
lock_protect(void * p,void * area,size_t size)134 lock_protect(void *p, void* area, size_t size)
135 {
136 	struct checked_lock* lock = *(struct checked_lock**)p;
137 	struct protected_area* e = (struct protected_area*)malloc(
138 		sizeof(struct protected_area));
139 	if(!e)
140 		fatal_exit("lock_protect: out of memory");
141 	e->region = area;
142 	e->size = size;
143 	e->hold = malloc(size);
144 	if(!e->hold)
145 		fatal_exit("lock_protect: out of memory");
146 	memcpy(e->hold, e->region, e->size);
147 
148 	acquire_locklock(lock, __func__, __FILE__, __LINE__);
149 	e->next = lock->prot;
150 	lock->prot = e;
151 	LOCKRET(pthread_mutex_unlock(&lock->lock));
152 }
153 
154 /** remove protected region */
155 void
lock_unprotect(void * mangled,void * area)156 lock_unprotect(void* mangled, void* area)
157 {
158 	struct checked_lock* lock = *(struct checked_lock**)mangled;
159 	struct protected_area* p, **prevp;
160 	if(!lock)
161 		return;
162 	acquire_locklock(lock, __func__, __FILE__, __LINE__);
163 	p = lock->prot;
164 	prevp = &lock->prot;
165 	while(p) {
166 		if(p->region == area) {
167 			*prevp = p->next;
168 			free(p->hold);
169 			free(p);
170 			LOCKRET(pthread_mutex_unlock(&lock->lock));
171 			return;
172 		}
173 		prevp = &p->next;
174 		p = p->next;
175 	}
176 	LOCKRET(pthread_mutex_unlock(&lock->lock));
177 }
178 
179 /**
180  * Check protected memory region. Memory compare. Exit on error.
181  * @param lock: which lock to check.
182  * @param func: location we are now (when failure is detected).
183  * @param file: location we are now (when failure is detected).
184  * @param line: location we are now (when failure is detected).
185  */
186 static void
prot_check(struct checked_lock * lock,const char * func,const char * file,int line)187 prot_check(struct checked_lock* lock,
188 	const char* func, const char* file, int line)
189 {
190 	struct protected_area* p = lock->prot;
191 	while(p) {
192 		if(memcmp(p->hold, p->region, p->size) != 0) {
193 			log_hex("memory prev", p->hold, p->size);
194 			log_hex("memory here", p->region, p->size);
195 			lock_error(lock, func, file, line,
196 				"protected area modified");
197 		}
198 		p = p->next;
199 	}
200 }
201 
202 /** Copy protected memory region */
203 static void
prot_store(struct checked_lock * lock)204 prot_store(struct checked_lock* lock)
205 {
206 	struct protected_area* p = lock->prot;
207 	while(p) {
208 		memcpy(p->hold, p->region, p->size);
209 		p = p->next;
210 	}
211 }
212 
213 /** get memory held by lock */
214 size_t
lock_get_mem(void * pp)215 lock_get_mem(void* pp)
216 {
217 	size_t s;
218 	struct checked_lock* lock = *(struct checked_lock**)pp;
219 	struct protected_area* p;
220 	s = sizeof(struct checked_lock);
221 	acquire_locklock(lock, __func__, __FILE__, __LINE__);
222 	for(p = lock->prot; p; p = p->next) {
223 		s += sizeof(struct protected_area);
224 		s += p->size;
225 	}
226 	LOCKRET(pthread_mutex_unlock(&lock->lock));
227 	return s;
228 }
229 
230 /** write lock trace info to file, while you hold those locks */
231 static void
ordercheck_locklock(struct thr_check * thr,struct checked_lock * lock)232 ordercheck_locklock(struct thr_check* thr, struct checked_lock* lock)
233 {
234 	int info[4];
235 	if(!check_locking_order) return;
236 	if(!thr->holding_first) return; /* no older lock, no info */
237 	/* write: <lock id held> <lock id new> <file> <line> */
238 	info[0] = thr->holding_first->create_thread;
239 	info[1] = thr->holding_first->create_instance;
240 	info[2] = lock->create_thread;
241 	info[3] = lock->create_instance;
242 	if(fwrite(info, 4*sizeof(int), 1, thr->order_info) != 1 ||
243 		fwrite(lock->holder_file, strlen(lock->holder_file)+1, 1,
244 		thr->order_info) != 1 ||
245 		fwrite(&lock->holder_line, sizeof(int), 1,
246 		thr->order_info) != 1)
247 		log_err("fwrite: %s", strerror(errno));
248 }
249 
250 /** write ordercheck lock creation details to file */
251 static void
ordercheck_lockcreate(struct thr_check * thr,struct checked_lock * lock)252 ordercheck_lockcreate(struct thr_check* thr, struct checked_lock* lock)
253 {
254 	/* write: <ffff = create> <lock id> <file> <line> */
255 	int cmd = -1;
256 	if(!check_locking_order) return;
257 
258 	if( fwrite(&cmd, sizeof(int), 1, thr->order_info) != 1 ||
259 		fwrite(&lock->create_thread, sizeof(int), 1,
260 			thr->order_info) != 1 ||
261 		fwrite(&lock->create_instance, sizeof(int), 1,
262 			thr->order_info) != 1 ||
263 		fwrite(lock->create_file, strlen(lock->create_file)+1, 1,
264 			thr->order_info) != 1 ||
265 		fwrite(&lock->create_line, sizeof(int), 1,
266 		thr->order_info) != 1)
267 		log_err("fwrite: %s", strerror(errno));
268 }
269 
270 /** alloc struct, init lock empty */
271 void
checklock_init(enum check_lock_type type,struct checked_lock ** lock,const char * func,const char * file,int line)272 checklock_init(enum check_lock_type type, struct checked_lock** lock,
273         const char* func, const char* file, int line)
274 {
275 	struct checked_lock* e = (struct checked_lock*)calloc(1,
276 		sizeof(struct checked_lock));
277 	struct thr_check *thr = (struct thr_check*)pthread_getspecific(
278 		thr_debug_key);
279 	if(!e)
280 		fatal_exit("%s %s %d: out of memory", func, file, line);
281 	if(!thr) {
282 		/* this is called when log_init() calls lock_init()
283 		 * functions, and the test check code has not yet
284 		 * been initialised.  But luckily, the checklock_start()
285 		 * routine can be called multiple times without ill effect.
286 		 */
287 		checklock_start();
288 		thr = (struct thr_check*)pthread_getspecific(thr_debug_key);
289 	}
290 	if(!thr)
291 		fatal_exit("%s %s %d: lock_init no thread info", func, file,
292 			line);
293 	*lock = e;
294 	e->type = type;
295 	e->create_func = func;
296 	e->create_file = file;
297 	e->create_line = line;
298 	e->create_thread = thr->num;
299 	e->create_instance = thr->locks_created++;
300 	ordercheck_lockcreate(thr, e);
301 	LOCKRET(pthread_mutex_init(&e->lock, NULL));
302 	switch(e->type) {
303 		case check_lock_mutex:
304 			LOCKRET(pthread_mutex_init(&e->u.mutex, NULL));
305 			break;
306 		case check_lock_spinlock:
307 			LOCKRET(pthread_spin_init(&e->u.spinlock, PTHREAD_PROCESS_PRIVATE));
308 			break;
309 		case check_lock_rwlock:
310 			LOCKRET(pthread_rwlock_init(&e->u.rwlock, NULL));
311 			break;
312 		default:
313 			log_assert(0);
314 	}
315 }
316 
317 /** delete prot items */
318 static void
prot_clear(struct checked_lock * lock)319 prot_clear(struct checked_lock* lock)
320 {
321 	struct protected_area* p=lock->prot, *np;
322 	while(p) {
323 		np = p->next;
324 		free(p->hold);
325 		free(p);
326 		p = np;
327 	}
328 }
329 
330 /** check if type is OK for the lock given */
331 static void
checktype(enum check_lock_type type,struct checked_lock * lock,const char * func,const char * file,int line)332 checktype(enum check_lock_type type, struct checked_lock* lock,
333         const char* func, const char* file, int line)
334 {
335 	if(!lock)
336 		fatal_exit("use of null/deleted lock at %s %s:%d",
337 			func, file, line);
338 	if(type != lock->type) {
339 		lock_error(lock, func, file, line, "wrong lock type");
340 	}
341 }
342 
343 /** check if OK, free struct */
344 void
checklock_destroy(enum check_lock_type type,struct checked_lock ** lock,const char * func,const char * file,int line)345 checklock_destroy(enum check_lock_type type, struct checked_lock** lock,
346         const char* func, const char* file, int line)
347 {
348 	const size_t contention_interest = 1; /* promille contented locks */
349 	struct checked_lock* e;
350 	if(!lock)
351 		return;
352 	e = *lock;
353 	if(!e)
354 		return;
355 	checktype(type, e, func, file, line);
356 
357 	/* check if delete is OK */
358 	acquire_locklock(e, func, file, line);
359 	if(e->hold_count != 0)
360 		lock_error(e, func, file, line, "delete while locked.");
361 	if(e->wait_count != 0)
362 		lock_error(e, func, file, line, "delete while waited on.");
363 	prot_check(e, func, file, line);
364 	*lock = NULL; /* use after free will fail */
365 	LOCKRET(pthread_mutex_unlock(&e->lock));
366 
367 	/* contention, look at fraction in trouble. */
368 	if(e->history_count > 1 &&
369 	   1000*e->contention_count/e->history_count > contention_interest) {
370 		log_info("lock created %s %s %d has contention %u of %u (%d%%)",
371 			e->create_func, e->create_file, e->create_line,
372 			(unsigned int)e->contention_count,
373 			(unsigned int)e->history_count,
374 			(int)(100*e->contention_count/e->history_count));
375 	}
376 
377 	/* delete it */
378 	LOCKRET(pthread_mutex_destroy(&e->lock));
379 	prot_clear(e);
380 	/* since nobody holds the lock - see check above, no need to unlink
381 	 * from the thread-held locks list. */
382 	switch(e->type) {
383 		case check_lock_mutex:
384 			LOCKRET(pthread_mutex_destroy(&e->u.mutex));
385 			break;
386 		case check_lock_spinlock:
387 			LOCKRET(pthread_spin_destroy(&e->u.spinlock));
388 			break;
389 		case check_lock_rwlock:
390 			LOCKRET(pthread_rwlock_destroy(&e->u.rwlock));
391 			break;
392 		default:
393 			log_assert(0);
394 	}
395 	memset(e, 0, sizeof(struct checked_lock));
396 	free(e);
397 }
398 
399 /** finish acquiring lock, shared between _(rd|wr||)lock() routines */
400 static void
finish_acquire_lock(struct thr_check * thr,struct checked_lock * lock,const char * func,const char * file,int line)401 finish_acquire_lock(struct thr_check* thr, struct checked_lock* lock,
402         const char* func, const char* file, int line)
403 {
404 	thr->waiting = NULL;
405 	lock->wait_count --;
406 	lock->holder = thr;
407 	lock->hold_count ++;
408 	lock->holder_func = func;
409 	lock->holder_file = file;
410 	lock->holder_line = line;
411 	ordercheck_locklock(thr, lock);
412 
413 	/* insert in thread lock list, as first */
414 	lock->prev_held_lock[thr->num] = NULL;
415 	lock->next_held_lock[thr->num] = thr->holding_first;
416 	if(thr->holding_first)
417 		/* no need to lock it, since this thread already holds the
418 		 * lock (since it is on this list) and we only edit thr->num
419 		 * member in array. So it is safe.  */
420 		thr->holding_first->prev_held_lock[thr->num] = lock;
421 	else	thr->holding_last = lock;
422 	thr->holding_first = lock;
423 }
424 
425 /**
426  * Locking routine.
427  * @param type: as passed by user.
428  * @param lock: as passed by user.
429  * @param func: caller location.
430  * @param file: caller location.
431  * @param line: caller location.
432  * @param tryfunc: the pthread_mutex_trylock or similar function.
433  * @param timedfunc: the pthread_mutex_timedlock or similar function.
434  *	Uses absolute timeout value.
435  * @param arg: what to pass to tryfunc and timedlock.
436  * @param exclusive: if lock must be exclusive (only one allowed).
437  * @param getwr: if attempts to get writelock (or readlock) for rwlocks.
438  */
439 static void
checklock_lockit(enum check_lock_type type,struct checked_lock * lock,const char * func,const char * file,int line,int (* tryfunc)(void *),int (* timedfunc)(void *,struct timespec *),void * arg,int exclusive,int getwr)440 checklock_lockit(enum check_lock_type type, struct checked_lock* lock,
441         const char* func, const char* file, int line,
442 	int (*tryfunc)(void*), int (*timedfunc)(void*, struct timespec*),
443 	void* arg, int exclusive, int getwr)
444 {
445 	int err;
446 	int contend = 0;
447 	struct thr_check *thr = (struct thr_check*)pthread_getspecific(
448 		thr_debug_key);
449 	checktype(type, lock, func, file, line);
450 	if(!thr) lock_error(lock, func, file, line, "no thread info");
451 
452 	acquire_locklock(lock, func, file, line);
453 	lock->wait_count ++;
454 	thr->waiting = lock;
455 	if(exclusive && lock->hold_count > 0 && lock->holder == thr)
456 		lock_error(lock, func, file, line, "thread already owns lock");
457 	if(type==check_lock_rwlock && getwr && lock->writeholder == thr)
458 		lock_error(lock, func, file, line, "thread already has wrlock");
459 	LOCKRET(pthread_mutex_unlock(&lock->lock));
460 
461 	/* first try; if busy increase contention counter */
462 	if((err=tryfunc(arg))) {
463 		struct timespec to;
464 		if(err != EBUSY) log_err("trylock: %s", strerror(err));
465 		to.tv_sec = time(NULL) + CHECK_LOCK_TIMEOUT;
466 		to.tv_nsec = 0;
467 		if((err=timedfunc(arg, &to))) {
468 			if(err == ETIMEDOUT)
469 				lock_error(lock, func, file, line,
470 					"timeout possible deadlock");
471 			log_err("timedlock: %s", strerror(err));
472 		}
473 		contend ++;
474 	}
475 	/* got the lock */
476 
477 	acquire_locklock(lock, func, file, line);
478 	lock->contention_count += contend;
479 	lock->history_count++;
480 	if(exclusive && lock->hold_count > 0)
481 		lock_error(lock, func, file, line, "got nonexclusive lock");
482 	if(type==check_lock_rwlock && getwr && lock->writeholder)
483 		lock_error(lock, func, file, line, "got nonexclusive wrlock");
484 	if(type==check_lock_rwlock && getwr)
485 		lock->writeholder = thr;
486 	/* check the memory areas for unauthorized changes,
487 	 * between last unlock time and current lock time.
488 	 * we check while holding the lock (threadsafe).
489 	 */
490 	if(getwr || exclusive)
491 		prot_check(lock, func, file, line);
492 	finish_acquire_lock(thr, lock, func, file, line);
493 	LOCKRET(pthread_mutex_unlock(&lock->lock));
494 }
495 
496 /** helper for rdlock: try */
try_rd(void * arg)497 static int try_rd(void* arg)
498 { return pthread_rwlock_tryrdlock((pthread_rwlock_t*)arg); }
499 /** helper for rdlock: timed */
timed_rd(void * arg,struct timespec * to)500 static int timed_rd(void* arg, struct timespec* to)
501 { return pthread_rwlock_timedrdlock((pthread_rwlock_t*)arg, to); }
502 
503 /** check if OK, lock */
504 void
checklock_rdlock(enum check_lock_type type,struct checked_lock * lock,const char * func,const char * file,int line)505 checklock_rdlock(enum check_lock_type type, struct checked_lock* lock,
506         const char* func, const char* file, int line)
507 {
508 	if(key_deleted)
509 		return;
510 
511 	log_assert(type == check_lock_rwlock);
512 	checklock_lockit(type, lock, func, file, line,
513 		try_rd, timed_rd, &lock->u.rwlock, 0, 0);
514 }
515 
516 /** helper for wrlock: try */
try_wr(void * arg)517 static int try_wr(void* arg)
518 { return pthread_rwlock_trywrlock((pthread_rwlock_t*)arg); }
519 /** helper for wrlock: timed */
timed_wr(void * arg,struct timespec * to)520 static int timed_wr(void* arg, struct timespec* to)
521 { return pthread_rwlock_timedwrlock((pthread_rwlock_t*)arg, to); }
522 
523 /** check if OK, lock */
524 void
checklock_wrlock(enum check_lock_type type,struct checked_lock * lock,const char * func,const char * file,int line)525 checklock_wrlock(enum check_lock_type type, struct checked_lock* lock,
526         const char* func, const char* file, int line)
527 {
528 	if(key_deleted)
529 		return;
530 	log_assert(type == check_lock_rwlock);
531 	checklock_lockit(type, lock, func, file, line,
532 		try_wr, timed_wr, &lock->u.rwlock, 0, 1);
533 }
534 
535 /** helper for lock mutex: try */
try_mutex(void * arg)536 static int try_mutex(void* arg)
537 { return pthread_mutex_trylock((pthread_mutex_t*)arg); }
538 /** helper for lock mutex: timed */
timed_mutex(void * arg,struct timespec * to)539 static int timed_mutex(void* arg, struct timespec* to)
540 { return pthread_mutex_timedlock((pthread_mutex_t*)arg, to); }
541 
542 /** helper for lock spinlock: try */
try_spinlock(void * arg)543 static int try_spinlock(void* arg)
544 { return pthread_spin_trylock((pthread_spinlock_t*)arg); }
545 /** helper for lock spinlock: timed */
timed_spinlock(void * arg,struct timespec * to)546 static int timed_spinlock(void* arg, struct timespec* to)
547 {
548 	int err;
549 	/* spin for 5 seconds. (ouch for the CPU, but it beats forever) */
550 	while( (err=try_spinlock(arg)) == EBUSY) {
551 #ifndef S_SPLINT_S
552 		if(time(NULL) >= to->tv_sec)
553 			return ETIMEDOUT;
554 		usleep(1000); /* in 1/1000000s of a second */
555 #endif
556 	}
557 	return err;
558 }
559 
560 /** check if OK, lock */
561 void
checklock_lock(enum check_lock_type type,struct checked_lock * lock,const char * func,const char * file,int line)562 checklock_lock(enum check_lock_type type, struct checked_lock* lock,
563         const char* func, const char* file, int line)
564 {
565 	if(key_deleted)
566 		return;
567 	log_assert(type != check_lock_rwlock);
568 	switch(type) {
569 		case check_lock_mutex:
570 			checklock_lockit(type, lock, func, file, line,
571 				try_mutex, timed_mutex, &lock->u.mutex, 1, 0);
572 			break;
573 		case check_lock_spinlock:
574 			/* void* cast needed because 'volatile' on some OS */
575 			checklock_lockit(type, lock, func, file, line,
576 				try_spinlock, timed_spinlock,
577 				(void*)&lock->u.spinlock, 1, 0);
578 			break;
579 		default:
580 			log_assert(0);
581 	}
582 }
583 
584 /** check if OK, unlock */
585 void
checklock_unlock(enum check_lock_type type,struct checked_lock * lock,const char * func,const char * file,int line)586 checklock_unlock(enum check_lock_type type, struct checked_lock* lock,
587         const char* func, const char* file, int line)
588 {
589 	struct thr_check *thr;
590 	if(key_deleted)
591 		return;
592 	thr = (struct thr_check*)pthread_getspecific(thr_debug_key);
593 	checktype(type, lock, func, file, line);
594 	if(!thr) lock_error(lock, func, file, line, "no thread info");
595 
596 	acquire_locklock(lock, func, file, line);
597 	/* was this thread even holding this lock? */
598 	if(thr->holding_first != lock &&
599 		lock->prev_held_lock[thr->num] == NULL) {
600 		lock_error(lock, func, file, line, "unlock nonlocked lock");
601 	}
602 	if(lock->hold_count <= 0)
603 		lock_error(lock, func, file, line, "too many unlocks");
604 
605 	/* store this point as last touched by */
606 	lock->holder = thr;
607 	lock->hold_count --;
608 	lock->holder_func = func;
609 	lock->holder_file = file;
610 	lock->holder_line = line;
611 
612 	/* delete from thread holder list */
613 	/* no need to lock other lockstructs, because they are all on the
614 	 * held-locks list, and this thread holds their locks.
615 	 * we only touch the thr->num members, so it is safe.  */
616 	if(thr->holding_first == lock)
617 		thr->holding_first = lock->next_held_lock[thr->num];
618 	if(thr->holding_last == lock)
619 		thr->holding_last = lock->prev_held_lock[thr->num];
620 	if(lock->next_held_lock[thr->num])
621 		lock->next_held_lock[thr->num]->prev_held_lock[thr->num] =
622 			lock->prev_held_lock[thr->num];
623 	if(lock->prev_held_lock[thr->num])
624 		lock->prev_held_lock[thr->num]->next_held_lock[thr->num] =
625 			lock->next_held_lock[thr->num];
626 	lock->next_held_lock[thr->num] = NULL;
627 	lock->prev_held_lock[thr->num] = NULL;
628 
629 	if(type==check_lock_rwlock && lock->writeholder == thr) {
630 		lock->writeholder = NULL;
631 		prot_store(lock);
632 	} else if(type != check_lock_rwlock) {
633 		/* store memory areas that are protected, for later checks */
634 		prot_store(lock);
635 	}
636 	LOCKRET(pthread_mutex_unlock(&lock->lock));
637 
638 	/* unlock it */
639 	switch(type) {
640 		case check_lock_mutex:
641 			LOCKRET(pthread_mutex_unlock(&lock->u.mutex));
642 			break;
643 		case check_lock_spinlock:
644 			LOCKRET(pthread_spin_unlock(&lock->u.spinlock));
645 			break;
646 		case check_lock_rwlock:
647 			LOCKRET(pthread_rwlock_unlock(&lock->u.rwlock));
648 			break;
649 		default:
650 			log_assert(0);
651 	}
652 }
653 
654 /** open order info debug file, thr->num must be valid */
655 static void
open_lockorder(struct thr_check * thr)656 open_lockorder(struct thr_check* thr)
657 {
658 	char buf[24];
659 	time_t t;
660 	snprintf(buf, sizeof(buf), "ublocktrace.%d", thr->num);
661 	thr->order_info = fopen(buf, "w");
662 	if(!thr->order_info)
663 		fatal_exit("could not open %s: %s", buf, strerror(errno));
664 	thr->locks_created = 0;
665 	t = time(NULL);
666 	/* write: <time_stamp> <runpid> <thread_num> */
667 	if(fwrite(&t, sizeof(t), 1, thr->order_info) != 1 ||
668 		fwrite(&thr->num, sizeof(thr->num), 1, thr->order_info) != 1 ||
669 		fwrite(&check_lock_pid, sizeof(check_lock_pid), 1,
670 		thr->order_info) != 1)
671 		log_err("fwrite: %s", strerror(errno));
672 }
673 
674 /** checklock thread main, Inits thread structure */
checklock_main(void * arg)675 static void* checklock_main(void* arg)
676 {
677 	struct thr_check* thr = (struct thr_check*)arg;
678 	void* ret;
679 	thr->id = pthread_self();
680 	/* Hack to get same numbers as in log file */
681 	thr->num = *(int*)(thr->arg);
682 	log_assert(thr->num < THRDEBUG_MAX_THREADS);
683 	/* as an aside, due to this, won't work for libunbound bg thread */
684 	if(thread_infos[thr->num] != NULL)
685 		log_warn("thread warning, thr->num %d not NULL", thr->num);
686 	thread_infos[thr->num] = thr;
687 	LOCKRET(pthread_setspecific(thr_debug_key, thr));
688 	if(check_locking_order)
689 		open_lockorder(thr);
690 	ret = thr->func(thr->arg);
691 	thread_infos[thr->num] = NULL;
692 	if(check_locking_order)
693 		fclose(thr->order_info);
694 	free(thr);
695 	return ret;
696 }
697 
698 /** init the main thread */
checklock_start(void)699 void checklock_start(void)
700 {
701 	if(key_deleted)
702 		return;
703 	if(!key_created) {
704 		struct thr_check* thisthr = (struct thr_check*)calloc(1,
705 			sizeof(struct thr_check));
706 		if(!thisthr)
707 			fatal_exit("thrcreate: out of memory");
708 		key_created = 1;
709 		check_lock_pid = getpid();
710 		LOCKRET(pthread_key_create(&thr_debug_key, NULL));
711 		LOCKRET(pthread_setspecific(thr_debug_key, thisthr));
712 		thread_infos[0] = thisthr;
713 		if(check_locking_order)
714 			open_lockorder(thisthr);
715 	}
716 }
717 
718 /** stop checklocks */
checklock_stop(void)719 void checklock_stop(void)
720 {
721 	if(key_created) {
722 		int i;
723 		key_deleted = 1;
724 		if(check_locking_order)
725 			fclose(thread_infos[0]->order_info);
726 		free(thread_infos[0]);
727 		thread_infos[0] = NULL;
728 		for(i = 0; i < THRDEBUG_MAX_THREADS; i++)
729 			log_assert(thread_infos[i] == NULL);
730 			/* should have been cleaned up. */
731 		LOCKRET(pthread_key_delete(thr_debug_key));
732 		key_created = 0;
733 	}
734 }
735 
736 /** allocate debug info and create thread */
737 void
checklock_thrcreate(pthread_t * id,void * (* func)(void *),void * arg)738 checklock_thrcreate(pthread_t* id, void* (*func)(void*), void* arg)
739 {
740 	struct thr_check* thr = (struct thr_check*)calloc(1,
741 		sizeof(struct thr_check));
742 	if(!thr)
743 		fatal_exit("thrcreate: out of memory");
744 	if(!key_created) {
745 		checklock_start();
746 	}
747 	thr->func = func;
748 	thr->arg = arg;
749 	LOCKRET(pthread_create(id, NULL, checklock_main, thr));
750 }
751 
752 /** count number of thread infos */
753 static int
count_thread_infos(void)754 count_thread_infos(void)
755 {
756 	int cnt = 0;
757 	int i;
758 	for(i=0; i<THRDEBUG_MAX_THREADS; i++)
759 		if(thread_infos[i])
760 			cnt++;
761 	return cnt;
762 }
763 
764 /** print lots of info on a lock */
765 static void
lock_debug_info(struct checked_lock * lock)766 lock_debug_info(struct checked_lock* lock)
767 {
768 	if(!lock) return;
769 	log_info("+++ Lock %llx, %d %d create %s %s %d",
770 		(unsigned long long)(size_t)lock,
771 		lock->create_thread, lock->create_instance,
772 		lock->create_func, lock->create_file, lock->create_line);
773 	log_info("lock type: %s",
774 		(lock->type==check_lock_mutex)?"mutex": (
775 		(lock->type==check_lock_spinlock)?"spinlock": (
776 		(lock->type==check_lock_rwlock)?"rwlock": "badtype")));
777 	log_info("lock contention %u, history:%u, hold:%d, wait:%d",
778 		(unsigned)lock->contention_count, (unsigned)lock->history_count,
779 		lock->hold_count, lock->wait_count);
780 	log_info("last touch %s %s %d", lock->holder_func, lock->holder_file,
781 		lock->holder_line);
782 	log_info("holder thread %d, writeholder thread %d",
783 		lock->holder?lock->holder->num:-1,
784 		lock->writeholder?lock->writeholder->num:-1);
785 }
786 
787 /** print debug locks held by a thread */
788 static void
held_debug_info(struct thr_check * thr,struct checked_lock * lock)789 held_debug_info(struct thr_check* thr, struct checked_lock* lock)
790 {
791 	if(!lock) return;
792 	lock_debug_info(lock);
793 	held_debug_info(thr, lock->next_held_lock[thr->num]);
794 }
795 
796 /** print debug info for a thread */
797 static void
thread_debug_info(struct thr_check * thr)798 thread_debug_info(struct thr_check* thr)
799 {
800 	struct checked_lock* w = NULL;
801 	struct checked_lock* f = NULL;
802 	struct checked_lock* l = NULL;
803 	if(!thr) return;
804 	log_info("pthread id is %x", (int)thr->id);
805 	log_info("thread func is %llx", (unsigned long long)(size_t)thr->func);
806 	log_info("thread arg is %llx (%d)",
807 		(unsigned long long)(size_t)thr->arg,
808 		(thr->arg?*(int*)thr->arg:0));
809 	log_info("thread num is %d", thr->num);
810 	log_info("locks created %d", thr->locks_created);
811 	log_info("open file for lockinfo: %s",
812 		thr->order_info?"yes, flushing":"no");
813 	fflush(thr->order_info);
814 	w = thr->waiting;
815 	f = thr->holding_first;
816 	l = thr->holding_last;
817 	log_info("thread waiting for a lock: %s %llx", w?"yes":"no",
818 		(unsigned long long)(size_t)w);
819 	lock_debug_info(w);
820 	log_info("thread holding first: %s, last: %s", f?"yes":"no",
821 		l?"yes":"no");
822 	held_debug_info(thr, f);
823 }
824 
825 static void
total_debug_info(void)826 total_debug_info(void)
827 {
828 	int i;
829 	log_info("checklocks: supervising %d threads.",
830 		count_thread_infos());
831 	if(!key_created) {
832 		log_info("No thread debug key created yet");
833 	}
834 	for(i=0; i<THRDEBUG_MAX_THREADS; i++) {
835 		if(thread_infos[i]) {
836 			log_info("*** Thread %d information: ***", i);
837 			thread_debug_info(thread_infos[i]);
838 		}
839 	}
840 }
841 
842 /** signal handler for join timeout, Exits */
joinalarm(int ATTR_UNUSED (sig))843 static RETSIGTYPE joinalarm(int ATTR_UNUSED(sig))
844 {
845 	log_err("join thread timeout. hangup or deadlock. Info follows.");
846 	total_debug_info();
847 	fatal_exit("join thread timeout. hangup or deadlock.");
848 }
849 
850 /** wait for thread with a timeout */
851 void
checklock_thrjoin(pthread_t thread)852 checklock_thrjoin(pthread_t thread)
853 {
854 	/* wait with a timeout */
855 	if(signal(SIGALRM, joinalarm) == SIG_ERR)
856 		fatal_exit("signal(): %s", strerror(errno));
857 	(void)alarm(CHECK_JOIN_TIMEOUT);
858 	LOCKRET(pthread_join(thread, NULL));
859 	(void)alarm(0);
860 }
861 
862 #endif /* USE_THREAD_DEBUG */
863