1 /**
2  * testcode/checklocks.c - wrapper on locks that checks access.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 #include "config.h"
37 #include <signal.h>
38 #include "util/locks.h"   /* include before checklocks.h */
39 #include "testcode/checklocks.h"
40 
41 /**
42  * \file
43  * Locks that are checked.
44  *
45  * Ugly hack: uses the fact that workers start with an int thread_num, and
46  * are passed to thread_create to make the thread numbers here the same as
47  * those used for logging which is nice.
48  *
49  * Todo:
50  *	 - debug status print, of thread lock stacks, and current waiting.
51  */
52 #ifdef USE_THREAD_DEBUG
53 
54 /** How long to wait before lock attempt is a failure. */
55 #define CHECK_LOCK_TIMEOUT 120 /* seconds */
56 /** How long to wait before join attempt is a failure. */
57 #define CHECK_JOIN_TIMEOUT 120 /* seconds */
58 
59 /** if key has been created */
60 static int key_created = 0;
61 /** if the key was deleted, i.e. we have quit */
62 static int key_deleted = 0;
63 /** we hide the thread debug info with this key. */
64 static ub_thread_key_type thr_debug_key;
65 /** the list of threads, so all threads can be examined. NULL if unused. */
66 static struct thr_check* thread_infos[THRDEBUG_MAX_THREADS];
67 /** do we check locking order */
68 int check_locking_order = 1;
69 /** the pid of this runset, reasonably unique. */
70 static pid_t check_lock_pid;
71 /**
72  * Should checklocks print a trace of the lock and unlock calls.
73  * It uses fprintf for that because the log function uses a lock and that
74  * would loop otherwise.
75  */
76 static int verbose_locking = 0;
77 /**
78  * Assume lock 0 0 (create_thread, create_instance), is the log lock and
79  * do not print for that. Otherwise the output is full of log lock accesses.
80  */
81 static int verbose_locking_not_loglock = 1;
82 
83 /** print all possible debug info on the state of the system */
84 static void total_debug_info(void);
85 /** print pretty lock error and exit (decl for NORETURN attribute) */
86 static void lock_error(struct checked_lock* lock, const char* func,
87 	const char* file, int line, const char* err) ATTR_NORETURN;
88 
89 /** print pretty lock error and exit */
lock_error(struct checked_lock * lock,const char * func,const char * file,int line,const char * err)90 static void lock_error(struct checked_lock* lock,
91 	const char* func, const char* file, int line, const char* err)
92 {
93 	log_err("lock error (description follows)");
94 	log_err("Created at %s %s:%d", lock->create_func,
95 		lock->create_file, lock->create_line);
96 	if(lock->holder_func && lock->holder_file)
97 		log_err("Previously %s %s:%d", lock->holder_func,
98 			lock->holder_file, lock->holder_line);
99 	log_err("At %s %s:%d", func, file, line);
100 	log_err("Error for %s lock: %s",
101 		(lock->type==check_lock_mutex)?"mutex": (
102 		(lock->type==check_lock_spinlock)?"spinlock": (
103 		(lock->type==check_lock_rwlock)?"rwlock": "badtype")), err);
104 	log_err("complete status display:");
105 	total_debug_info();
106 	fatal_exit("bailing out");
107 }
108 
109 /**
110  * Obtain lock on debug lock structure. This could be a deadlock by the caller.
111  * The debug code itself does not deadlock. Anyway, check with timeouts.
112  * @param lock: on what to acquire lock.
113  * @param func: user level caller identification.
114  * @param file: user level caller identification.
115  * @param line: user level caller identification.
116  */
117 static void
acquire_locklock(struct checked_lock * lock,const char * func,const char * file,int line)118 acquire_locklock(struct checked_lock* lock,
119 	const char* func, const char* file, int line)
120 {
121 	struct timespec to;
122 	int err;
123 	int contend = 0;
124 	/* first try; inc contention counter if not immediately */
125 	if((err = pthread_mutex_trylock(&lock->lock))) {
126 		if(err==EBUSY)
127 			contend++;
128 		else fatal_exit("error in mutex_trylock: %s", strerror(err));
129 	}
130 	if(!err)
131 		return; /* immediate success */
132 	to.tv_sec = time(NULL) + CHECK_LOCK_TIMEOUT;
133 	to.tv_nsec = 0;
134 	err = pthread_mutex_timedlock(&lock->lock, &to);
135 	if(err) {
136 		log_err("in acquiring locklock: %s", strerror(err));
137 		lock_error(lock, func, file, line, "acquire locklock");
138 	}
139 	/* since we hold the lock, we can edit the contention_count */
140 	lock->contention_count += contend;
141 }
142 
143 /** add protected region */
144 void
lock_protect(void * p,void * area,size_t size)145 lock_protect(void *p, void* area, size_t size)
146 {
147 	struct checked_lock* lock = *(struct checked_lock**)p;
148 	struct protected_area* e = (struct protected_area*)malloc(
149 		sizeof(struct protected_area));
150 	if(!e)
151 		fatal_exit("lock_protect: out of memory");
152 	e->region = area;
153 	e->size = size;
154 	e->hold = malloc(size);
155 	if(!e->hold)
156 		fatal_exit("lock_protect: out of memory");
157 	memcpy(e->hold, e->region, e->size);
158 
159 	acquire_locklock(lock, __func__, __FILE__, __LINE__);
160 	e->next = lock->prot;
161 	lock->prot = e;
162 	LOCKRET(pthread_mutex_unlock(&lock->lock));
163 }
164 
165 /** remove protected region */
166 void
lock_unprotect(void * mangled,void * area)167 lock_unprotect(void* mangled, void* area)
168 {
169 	struct checked_lock* lock = *(struct checked_lock**)mangled;
170 	struct protected_area* p, **prevp;
171 	if(!lock)
172 		return;
173 	acquire_locklock(lock, __func__, __FILE__, __LINE__);
174 	p = lock->prot;
175 	prevp = &lock->prot;
176 	while(p) {
177 		if(p->region == area) {
178 			*prevp = p->next;
179 			free(p->hold);
180 			free(p);
181 			LOCKRET(pthread_mutex_unlock(&lock->lock));
182 			return;
183 		}
184 		prevp = &p->next;
185 		p = p->next;
186 	}
187 	LOCKRET(pthread_mutex_unlock(&lock->lock));
188 }
189 
190 /**
191  * Check protected memory region. Memory compare. Exit on error.
192  * @param lock: which lock to check.
193  * @param func: location we are now (when failure is detected).
194  * @param file: location we are now (when failure is detected).
195  * @param line: location we are now (when failure is detected).
196  */
197 static void
prot_check(struct checked_lock * lock,const char * func,const char * file,int line)198 prot_check(struct checked_lock* lock,
199 	const char* func, const char* file, int line)
200 {
201 	struct protected_area* p = lock->prot;
202 	while(p) {
203 		if(memcmp(p->hold, p->region, p->size) != 0) {
204 			log_hex("memory prev", p->hold, p->size);
205 			log_hex("memory here", p->region, p->size);
206 			lock_error(lock, func, file, line,
207 				"protected area modified");
208 		}
209 		p = p->next;
210 	}
211 }
212 
213 /** Copy protected memory region */
214 static void
prot_store(struct checked_lock * lock)215 prot_store(struct checked_lock* lock)
216 {
217 	struct protected_area* p = lock->prot;
218 	while(p) {
219 		memcpy(p->hold, p->region, p->size);
220 		p = p->next;
221 	}
222 }
223 
224 /** get memory held by lock */
225 size_t
lock_get_mem(void * pp)226 lock_get_mem(void* pp)
227 {
228 	size_t s;
229 	struct checked_lock* lock = *(struct checked_lock**)pp;
230 	struct protected_area* p;
231 	s = sizeof(struct checked_lock);
232 	acquire_locklock(lock, __func__, __FILE__, __LINE__);
233 	for(p = lock->prot; p; p = p->next) {
234 		s += sizeof(struct protected_area);
235 		s += p->size;
236 	}
237 	LOCKRET(pthread_mutex_unlock(&lock->lock));
238 	return s;
239 }
240 
241 /** write lock trace info to file, while you hold those locks */
242 static void
ordercheck_locklock(struct thr_check * thr,struct checked_lock * lock)243 ordercheck_locklock(struct thr_check* thr, struct checked_lock* lock)
244 {
245 	int info[4];
246 	if(!check_locking_order) return;
247 	if(!thr->holding_first) return; /* no older lock, no info */
248 	/* write: <lock id held> <lock id new> <file> <line> */
249 	info[0] = thr->holding_first->create_thread;
250 	info[1] = thr->holding_first->create_instance;
251 	info[2] = lock->create_thread;
252 	info[3] = lock->create_instance;
253 	if(fwrite(info, 4*sizeof(int), 1, thr->order_info) != 1 ||
254 		fwrite(lock->holder_file, strlen(lock->holder_file)+1, 1,
255 		thr->order_info) != 1 ||
256 		fwrite(&lock->holder_line, sizeof(int), 1,
257 		thr->order_info) != 1)
258 		log_err("fwrite: %s", strerror(errno));
259 }
260 
261 /** write ordercheck lock creation details to file */
262 static void
ordercheck_lockcreate(struct thr_check * thr,struct checked_lock * lock)263 ordercheck_lockcreate(struct thr_check* thr, struct checked_lock* lock)
264 {
265 	/* write: <ffff = create> <lock id> <file> <line> */
266 	int cmd = -1;
267 	if(!check_locking_order) return;
268 
269 	if( fwrite(&cmd, sizeof(int), 1, thr->order_info) != 1 ||
270 		fwrite(&lock->create_thread, sizeof(int), 1,
271 			thr->order_info) != 1 ||
272 		fwrite(&lock->create_instance, sizeof(int), 1,
273 			thr->order_info) != 1 ||
274 		fwrite(lock->create_file, strlen(lock->create_file)+1, 1,
275 			thr->order_info) != 1 ||
276 		fwrite(&lock->create_line, sizeof(int), 1,
277 		thr->order_info) != 1)
278 		log_err("fwrite: %s", strerror(errno));
279 }
280 
281 /** alloc struct, init lock empty */
282 void
checklock_init(enum check_lock_type type,struct checked_lock ** lock,const char * func,const char * file,int line)283 checklock_init(enum check_lock_type type, struct checked_lock** lock,
284         const char* func, const char* file, int line)
285 {
286 	struct checked_lock* e = (struct checked_lock*)calloc(1,
287 		sizeof(struct checked_lock));
288 	struct thr_check *thr = (struct thr_check*)pthread_getspecific(
289 		thr_debug_key);
290 	if(!e)
291 		fatal_exit("%s %s %d: out of memory", func, file, line);
292 	if(!thr) {
293 		/* this is called when log_init() calls lock_init()
294 		 * functions, and the test check code has not yet
295 		 * been initialised.  But luckily, the checklock_start()
296 		 * routine can be called multiple times without ill effect.
297 		 */
298 		checklock_start();
299 		thr = (struct thr_check*)pthread_getspecific(thr_debug_key);
300 	}
301 	if(!thr)
302 		fatal_exit("%s %s %d: lock_init no thread info", func, file,
303 			line);
304 	*lock = e;
305 	e->type = type;
306 	e->create_func = func;
307 	e->create_file = file;
308 	e->create_line = line;
309 	e->create_thread = thr->num;
310 	e->create_instance = thr->locks_created++;
311 	ordercheck_lockcreate(thr, e);
312 	LOCKRET(pthread_mutex_init(&e->lock, NULL));
313 	switch(e->type) {
314 		case check_lock_mutex:
315 			LOCKRET(pthread_mutex_init(&e->u.mutex, NULL));
316 			break;
317 		case check_lock_spinlock:
318 			LOCKRET(pthread_spin_init(&e->u.spinlock, PTHREAD_PROCESS_PRIVATE));
319 			break;
320 		case check_lock_rwlock:
321 			LOCKRET(pthread_rwlock_init(&e->u.rwlock, NULL));
322 			break;
323 		default:
324 			log_assert(0);
325 	}
326 }
327 
328 /** delete prot items */
329 static void
prot_clear(struct checked_lock * lock)330 prot_clear(struct checked_lock* lock)
331 {
332 	struct protected_area* p=lock->prot, *np;
333 	while(p) {
334 		np = p->next;
335 		free(p->hold);
336 		free(p);
337 		p = np;
338 	}
339 }
340 
341 /** check if type is OK for the lock given */
342 static void
checktype(enum check_lock_type type,struct checked_lock * lock,const char * func,const char * file,int line)343 checktype(enum check_lock_type type, struct checked_lock* lock,
344         const char* func, const char* file, int line)
345 {
346 	if(!lock)
347 		fatal_exit("use of null/deleted lock at %s %s:%d",
348 			func, file, line);
349 	if(type != lock->type) {
350 		lock_error(lock, func, file, line, "wrong lock type");
351 	}
352 }
353 
354 /** check if OK, free struct */
355 void
checklock_destroy(enum check_lock_type type,struct checked_lock ** lock,const char * func,const char * file,int line)356 checklock_destroy(enum check_lock_type type, struct checked_lock** lock,
357         const char* func, const char* file, int line)
358 {
359 	const size_t contention_interest = 1; /* promille contented locks */
360 	struct checked_lock* e;
361 	if(!lock)
362 		return;
363 	e = *lock;
364 	if(!e)
365 		return;
366 	checktype(type, e, func, file, line);
367 
368 	/* check if delete is OK */
369 	acquire_locklock(e, func, file, line);
370 	if(e->hold_count != 0)
371 		lock_error(e, func, file, line, "delete while locked.");
372 	if(e->wait_count != 0)
373 		lock_error(e, func, file, line, "delete while waited on.");
374 	prot_check(e, func, file, line);
375 	*lock = NULL; /* use after free will fail */
376 	LOCKRET(pthread_mutex_unlock(&e->lock));
377 
378 	/* contention, look at fraction in trouble. */
379 	if(e->history_count > 1 &&
380 	   1000*e->contention_count/e->history_count > contention_interest) {
381 		log_info("lock created %s %s %d has contention %u of %u (%d%%)",
382 			e->create_func, e->create_file, e->create_line,
383 			(unsigned int)e->contention_count,
384 			(unsigned int)e->history_count,
385 			(int)(100*e->contention_count/e->history_count));
386 	}
387 
388 	/* delete it */
389 	LOCKRET(pthread_mutex_destroy(&e->lock));
390 	prot_clear(e);
391 	/* since nobody holds the lock - see check above, no need to unlink
392 	 * from the thread-held locks list. */
393 	switch(e->type) {
394 		case check_lock_mutex:
395 			LOCKRET(pthread_mutex_destroy(&e->u.mutex));
396 			break;
397 		case check_lock_spinlock:
398 			LOCKRET(pthread_spin_destroy(&e->u.spinlock));
399 			break;
400 		case check_lock_rwlock:
401 			LOCKRET(pthread_rwlock_destroy(&e->u.rwlock));
402 			break;
403 		default:
404 			log_assert(0);
405 	}
406 	memset(e, 0, sizeof(struct checked_lock));
407 	free(e);
408 }
409 
410 /** finish acquiring lock, shared between _(rd|wr||)lock() routines */
411 static void
finish_acquire_lock(struct thr_check * thr,struct checked_lock * lock,const char * func,const char * file,int line)412 finish_acquire_lock(struct thr_check* thr, struct checked_lock* lock,
413         const char* func, const char* file, int line)
414 {
415 	thr->waiting = NULL;
416 	lock->wait_count --;
417 	lock->holder = thr;
418 	lock->hold_count ++;
419 	lock->holder_func = func;
420 	lock->holder_file = file;
421 	lock->holder_line = line;
422 	ordercheck_locklock(thr, lock);
423 
424 	/* insert in thread lock list, as first */
425 	lock->prev_held_lock[thr->num] = NULL;
426 	lock->next_held_lock[thr->num] = thr->holding_first;
427 	if(thr->holding_first)
428 		/* no need to lock it, since this thread already holds the
429 		 * lock (since it is on this list) and we only edit thr->num
430 		 * member in array. So it is safe.  */
431 		thr->holding_first->prev_held_lock[thr->num] = lock;
432 	else	thr->holding_last = lock;
433 	thr->holding_first = lock;
434 }
435 
436 /**
437  * Locking routine.
438  * @param type: as passed by user.
439  * @param lock: as passed by user.
440  * @param func: caller location.
441  * @param file: caller location.
442  * @param line: caller location.
443  * @param tryfunc: the pthread_mutex_trylock or similar function.
444  * @param timedfunc: the pthread_mutex_timedlock or similar function.
445  *	Uses absolute timeout value.
446  * @param arg: what to pass to tryfunc and timedlock.
447  * @param exclusive: if lock must be exclusive (only one allowed).
448  * @param getwr: if attempts to get writelock (or readlock) for rwlocks.
449  */
450 static void
checklock_lockit(enum check_lock_type type,struct checked_lock * lock,const char * func,const char * file,int line,int (* tryfunc)(void *),int (* timedfunc)(void *,struct timespec *),void * arg,int exclusive,int getwr)451 checklock_lockit(enum check_lock_type type, struct checked_lock* lock,
452         const char* func, const char* file, int line,
453 	int (*tryfunc)(void*), int (*timedfunc)(void*, struct timespec*),
454 	void* arg, int exclusive, int getwr)
455 {
456 	int err;
457 	int contend = 0;
458 	struct thr_check *thr = (struct thr_check*)pthread_getspecific(
459 		thr_debug_key);
460 	checktype(type, lock, func, file, line);
461 	if(!thr) lock_error(lock, func, file, line, "no thread info");
462 
463 	acquire_locklock(lock, func, file, line);
464 	lock->wait_count ++;
465 	thr->waiting = lock;
466 	if(exclusive && lock->hold_count > 0 && lock->holder == thr)
467 		lock_error(lock, func, file, line, "thread already owns lock");
468 	if(type==check_lock_rwlock && getwr && lock->writeholder == thr)
469 		lock_error(lock, func, file, line, "thread already has wrlock");
470 	LOCKRET(pthread_mutex_unlock(&lock->lock));
471 
472 	/* first try; if busy increase contention counter */
473 	if((err=tryfunc(arg))) {
474 		struct timespec to;
475 		if(err != EBUSY) log_err("trylock: %s", strerror(err));
476 		to.tv_sec = time(NULL) + CHECK_LOCK_TIMEOUT;
477 		to.tv_nsec = 0;
478 		if((err=timedfunc(arg, &to))) {
479 			if(err == ETIMEDOUT)
480 				lock_error(lock, func, file, line,
481 					"timeout possible deadlock");
482 			log_err("timedlock: %s", strerror(err));
483 		}
484 		contend ++;
485 	}
486 	/* got the lock */
487 
488 	acquire_locklock(lock, func, file, line);
489 	lock->contention_count += contend;
490 	lock->history_count++;
491 	if(exclusive && lock->hold_count > 0)
492 		lock_error(lock, func, file, line, "got nonexclusive lock");
493 	if(type==check_lock_rwlock && getwr && lock->writeholder)
494 		lock_error(lock, func, file, line, "got nonexclusive wrlock");
495 	if(type==check_lock_rwlock && getwr)
496 		lock->writeholder = thr;
497 	/* check the memory areas for unauthorized changes,
498 	 * between last unlock time and current lock time.
499 	 * we check while holding the lock (threadsafe).
500 	 */
501 	if(getwr || exclusive)
502 		prot_check(lock, func, file, line);
503 	finish_acquire_lock(thr, lock, func, file, line);
504 	LOCKRET(pthread_mutex_unlock(&lock->lock));
505 }
506 
507 /** helper for rdlock: try */
try_rd(void * arg)508 static int try_rd(void* arg)
509 { return pthread_rwlock_tryrdlock((pthread_rwlock_t*)arg); }
510 /** helper for rdlock: timed */
timed_rd(void * arg,struct timespec * to)511 static int timed_rd(void* arg, struct timespec* to)
512 { return pthread_rwlock_timedrdlock((pthread_rwlock_t*)arg, to); }
513 
514 /** check if OK, lock */
515 void
checklock_rdlock(enum check_lock_type type,struct checked_lock * lock,const char * func,const char * file,int line)516 checklock_rdlock(enum check_lock_type type, struct checked_lock* lock,
517         const char* func, const char* file, int line)
518 {
519 	if(key_deleted)
520 		return;
521 
522 	if(verbose_locking && !(verbose_locking_not_loglock &&
523 		lock->create_thread == 0 && lock->create_instance == 0))
524 		fprintf(stderr, "checklock_rdlock lock %d %d %s:%d at %s:%d\n", lock->create_thread, lock->create_instance, lock->create_file, lock->create_line, file, line);
525 	log_assert(type == check_lock_rwlock);
526 	checklock_lockit(type, lock, func, file, line,
527 		try_rd, timed_rd, &lock->u.rwlock, 0, 0);
528 }
529 
530 /** helper for wrlock: try */
try_wr(void * arg)531 static int try_wr(void* arg)
532 { return pthread_rwlock_trywrlock((pthread_rwlock_t*)arg); }
533 /** helper for wrlock: timed */
timed_wr(void * arg,struct timespec * to)534 static int timed_wr(void* arg, struct timespec* to)
535 { return pthread_rwlock_timedwrlock((pthread_rwlock_t*)arg, to); }
536 
537 /** check if OK, lock */
538 void
checklock_wrlock(enum check_lock_type type,struct checked_lock * lock,const char * func,const char * file,int line)539 checklock_wrlock(enum check_lock_type type, struct checked_lock* lock,
540         const char* func, const char* file, int line)
541 {
542 	if(key_deleted)
543 		return;
544 	log_assert(type == check_lock_rwlock);
545 	if(verbose_locking && !(verbose_locking_not_loglock &&
546 		lock->create_thread == 0 && lock->create_instance == 0))
547 		fprintf(stderr, "checklock_wrlock lock %d %d %s:%d at %s:%d\n", lock->create_thread, lock->create_instance, lock->create_file, lock->create_line, file, line);
548 	checklock_lockit(type, lock, func, file, line,
549 		try_wr, timed_wr, &lock->u.rwlock, 0, 1);
550 }
551 
552 /** helper for lock mutex: try */
try_mutex(void * arg)553 static int try_mutex(void* arg)
554 { return pthread_mutex_trylock((pthread_mutex_t*)arg); }
555 /** helper for lock mutex: timed */
timed_mutex(void * arg,struct timespec * to)556 static int timed_mutex(void* arg, struct timespec* to)
557 { return pthread_mutex_timedlock((pthread_mutex_t*)arg, to); }
558 
559 /** helper for lock spinlock: try */
try_spinlock(void * arg)560 static int try_spinlock(void* arg)
561 { return pthread_spin_trylock((pthread_spinlock_t*)arg); }
562 /** helper for lock spinlock: timed */
timed_spinlock(void * arg,struct timespec * to)563 static int timed_spinlock(void* arg, struct timespec* to)
564 {
565 	int err;
566 	/* spin for 5 seconds. (ouch for the CPU, but it beats forever) */
567 	while( (err=try_spinlock(arg)) == EBUSY) {
568 #ifndef S_SPLINT_S
569 		if(time(NULL) >= to->tv_sec)
570 			return ETIMEDOUT;
571 		usleep(1000); /* in 1/1000000s of a second */
572 #endif
573 	}
574 	return err;
575 }
576 
577 /** check if OK, lock */
578 void
checklock_lock(enum check_lock_type type,struct checked_lock * lock,const char * func,const char * file,int line)579 checklock_lock(enum check_lock_type type, struct checked_lock* lock,
580         const char* func, const char* file, int line)
581 {
582 	if(key_deleted)
583 		return;
584 	log_assert(type != check_lock_rwlock);
585 	if(verbose_locking && !(verbose_locking_not_loglock &&
586 		lock->create_thread == 0 && lock->create_instance == 0))
587 		fprintf(stderr, "checklock_lock lock %d %d %s:%d at %s:%d\n", lock->create_thread, lock->create_instance, lock->create_file, lock->create_line, file, line);
588 	switch(type) {
589 		case check_lock_mutex:
590 			checklock_lockit(type, lock, func, file, line,
591 				try_mutex, timed_mutex, &lock->u.mutex, 1, 0);
592 			break;
593 		case check_lock_spinlock:
594 			/* void* cast needed because 'volatile' on some OS */
595 			checklock_lockit(type, lock, func, file, line,
596 				try_spinlock, timed_spinlock,
597 				(void*)&lock->u.spinlock, 1, 0);
598 			break;
599 		default:
600 			log_assert(0);
601 	}
602 }
603 
604 /** check if OK, unlock */
605 void
checklock_unlock(enum check_lock_type type,struct checked_lock * lock,const char * func,const char * file,int line)606 checklock_unlock(enum check_lock_type type, struct checked_lock* lock,
607         const char* func, const char* file, int line)
608 {
609 	struct thr_check *thr;
610 	if(key_deleted)
611 		return;
612 	thr = (struct thr_check*)pthread_getspecific(thr_debug_key);
613 	checktype(type, lock, func, file, line);
614 	if(!thr) lock_error(lock, func, file, line, "no thread info");
615 
616 	acquire_locklock(lock, func, file, line);
617 	/* was this thread even holding this lock? */
618 	if(thr->holding_first != lock &&
619 		lock->prev_held_lock[thr->num] == NULL) {
620 		lock_error(lock, func, file, line, "unlock nonlocked lock");
621 	}
622 	if(lock->hold_count <= 0)
623 		lock_error(lock, func, file, line, "too many unlocks");
624 
625 	if(verbose_locking && !(verbose_locking_not_loglock &&
626 		lock->create_thread == 0 && lock->create_instance == 0))
627 		fprintf(stderr, "checklock_unlock lock %d %d %s:%d at %s:%d\n", lock->create_thread, lock->create_instance, lock->create_file, lock->create_line, file, line);
628 
629 	/* store this point as last touched by */
630 	lock->holder = thr;
631 	lock->hold_count --;
632 	lock->holder_func = func;
633 	lock->holder_file = file;
634 	lock->holder_line = line;
635 
636 	/* delete from thread holder list */
637 	/* no need to lock other lockstructs, because they are all on the
638 	 * held-locks list, and this thread holds their locks.
639 	 * we only touch the thr->num members, so it is safe.  */
640 	if(thr->holding_first == lock)
641 		thr->holding_first = lock->next_held_lock[thr->num];
642 	if(thr->holding_last == lock)
643 		thr->holding_last = lock->prev_held_lock[thr->num];
644 	if(lock->next_held_lock[thr->num])
645 		lock->next_held_lock[thr->num]->prev_held_lock[thr->num] =
646 			lock->prev_held_lock[thr->num];
647 	if(lock->prev_held_lock[thr->num])
648 		lock->prev_held_lock[thr->num]->next_held_lock[thr->num] =
649 			lock->next_held_lock[thr->num];
650 	lock->next_held_lock[thr->num] = NULL;
651 	lock->prev_held_lock[thr->num] = NULL;
652 
653 	if(type==check_lock_rwlock && lock->writeholder == thr) {
654 		lock->writeholder = NULL;
655 		prot_store(lock);
656 	} else if(type != check_lock_rwlock) {
657 		/* store memory areas that are protected, for later checks */
658 		prot_store(lock);
659 	}
660 	LOCKRET(pthread_mutex_unlock(&lock->lock));
661 
662 	/* unlock it */
663 	switch(type) {
664 		case check_lock_mutex:
665 			LOCKRET(pthread_mutex_unlock(&lock->u.mutex));
666 			break;
667 		case check_lock_spinlock:
668 			LOCKRET(pthread_spin_unlock(&lock->u.spinlock));
669 			break;
670 		case check_lock_rwlock:
671 			LOCKRET(pthread_rwlock_unlock(&lock->u.rwlock));
672 			break;
673 		default:
674 			log_assert(0);
675 	}
676 }
677 
678 /** open order info debug file, thr->num must be valid */
679 static void
open_lockorder(struct thr_check * thr)680 open_lockorder(struct thr_check* thr)
681 {
682 	char buf[24];
683 	time_t t;
684 	snprintf(buf, sizeof(buf), "ublocktrace.%d", thr->num);
685 	thr->order_info = fopen(buf, "w");
686 	if(!thr->order_info)
687 		fatal_exit("could not open %s: %s", buf, strerror(errno));
688 	thr->locks_created = 0;
689 	t = time(NULL);
690 	/* write: <time_stamp> <runpid> <thread_num> */
691 	if(fwrite(&t, sizeof(t), 1, thr->order_info) != 1 ||
692 		fwrite(&thr->num, sizeof(thr->num), 1, thr->order_info) != 1 ||
693 		fwrite(&check_lock_pid, sizeof(check_lock_pid), 1,
694 		thr->order_info) != 1)
695 		log_err("fwrite: %s", strerror(errno));
696 }
697 
698 /** checklock thread main, Inits thread structure */
checklock_main(void * arg)699 static void* checklock_main(void* arg)
700 {
701 	struct thr_check* thr = (struct thr_check*)arg;
702 	void* ret;
703 	thr->id = pthread_self();
704 	/* Hack to get same numbers as in log file */
705 	thr->num = *(int*)(thr->arg);
706 	log_assert(thr->num < THRDEBUG_MAX_THREADS);
707 	/* as an aside, due to this, won't work for libunbound bg thread */
708 	if(thread_infos[thr->num] != NULL)
709 		log_warn("thread warning, thr->num %d not NULL", thr->num);
710 	thread_infos[thr->num] = thr;
711 	LOCKRET(pthread_setspecific(thr_debug_key, thr));
712 	if(check_locking_order)
713 		open_lockorder(thr);
714 	ret = thr->func(thr->arg);
715 	thread_infos[thr->num] = NULL;
716 	if(check_locking_order)
717 		fclose(thr->order_info);
718 	free(thr);
719 	return ret;
720 }
721 
722 /** init the main thread */
checklock_start(void)723 void checklock_start(void)
724 {
725 	if(key_deleted)
726 		return;
727 	if(!key_created) {
728 		struct thr_check* thisthr = (struct thr_check*)calloc(1,
729 			sizeof(struct thr_check));
730 		if(!thisthr)
731 			fatal_exit("thrcreate: out of memory");
732 		key_created = 1;
733 		check_lock_pid = getpid();
734 		LOCKRET(pthread_key_create(&thr_debug_key, NULL));
735 		LOCKRET(pthread_setspecific(thr_debug_key, thisthr));
736 		thread_infos[0] = thisthr;
737 		if(check_locking_order)
738 			open_lockorder(thisthr);
739 	}
740 }
741 
742 /** stop checklocks */
checklock_stop(void)743 void checklock_stop(void)
744 {
745 	if(key_created) {
746 		int i;
747 		key_deleted = 1;
748 		if(check_locking_order)
749 			fclose(thread_infos[0]->order_info);
750 		free(thread_infos[0]);
751 		thread_infos[0] = NULL;
752 		for(i = 0; i < THRDEBUG_MAX_THREADS; i++)
753 			log_assert(thread_infos[i] == NULL);
754 			/* should have been cleaned up. */
755 		LOCKRET(pthread_key_delete(thr_debug_key));
756 		key_created = 0;
757 	}
758 }
759 
760 /** allocate debug info and create thread */
761 void
checklock_thrcreate(pthread_t * id,void * (* func)(void *),void * arg)762 checklock_thrcreate(pthread_t* id, void* (*func)(void*), void* arg)
763 {
764 	struct thr_check* thr = (struct thr_check*)calloc(1,
765 		sizeof(struct thr_check));
766 	if(!thr)
767 		fatal_exit("thrcreate: out of memory");
768 	if(!key_created) {
769 		checklock_start();
770 	}
771 	thr->func = func;
772 	thr->arg = arg;
773 	LOCKRET(pthread_create(id, NULL, checklock_main, thr));
774 }
775 
776 /** count number of thread infos */
777 static int
count_thread_infos(void)778 count_thread_infos(void)
779 {
780 	int cnt = 0;
781 	int i;
782 	for(i=0; i<THRDEBUG_MAX_THREADS; i++)
783 		if(thread_infos[i])
784 			cnt++;
785 	return cnt;
786 }
787 
788 /** print lots of info on a lock */
789 static void
lock_debug_info(struct checked_lock * lock)790 lock_debug_info(struct checked_lock* lock)
791 {
792 	if(!lock) return;
793 	log_info("+++ Lock %llx, %d %d create %s %s %d",
794 		(unsigned long long)(size_t)lock,
795 		lock->create_thread, lock->create_instance,
796 		lock->create_func, lock->create_file, lock->create_line);
797 	log_info("lock type: %s",
798 		(lock->type==check_lock_mutex)?"mutex": (
799 		(lock->type==check_lock_spinlock)?"spinlock": (
800 		(lock->type==check_lock_rwlock)?"rwlock": "badtype")));
801 	log_info("lock contention %u, history:%u, hold:%d, wait:%d",
802 		(unsigned)lock->contention_count, (unsigned)lock->history_count,
803 		lock->hold_count, lock->wait_count);
804 	log_info("last touch %s %s %d", lock->holder_func, lock->holder_file,
805 		lock->holder_line);
806 	log_info("holder thread %d, writeholder thread %d",
807 		lock->holder?lock->holder->num:-1,
808 		lock->writeholder?lock->writeholder->num:-1);
809 }
810 
811 /** print debug locks held by a thread */
812 static void
held_debug_info(struct thr_check * thr,struct checked_lock * lock)813 held_debug_info(struct thr_check* thr, struct checked_lock* lock)
814 {
815 	if(!lock) return;
816 	lock_debug_info(lock);
817 	held_debug_info(thr, lock->next_held_lock[thr->num]);
818 }
819 
820 /** print debug info for a thread */
821 static void
thread_debug_info(struct thr_check * thr)822 thread_debug_info(struct thr_check* thr)
823 {
824 	struct checked_lock* w = NULL;
825 	struct checked_lock* f = NULL;
826 	struct checked_lock* l = NULL;
827 	if(!thr) return;
828 	log_info("pthread id is %x", (int)thr->id);
829 	log_info("thread func is %llx", (unsigned long long)(size_t)thr->func);
830 	log_info("thread arg is %llx (%d)",
831 		(unsigned long long)(size_t)thr->arg,
832 		(thr->arg?*(int*)thr->arg:0));
833 	log_info("thread num is %d", thr->num);
834 	log_info("locks created %d", thr->locks_created);
835 	log_info("open file for lockinfo: %s",
836 		thr->order_info?"yes, flushing":"no");
837 	fflush(thr->order_info);
838 	w = thr->waiting;
839 	f = thr->holding_first;
840 	l = thr->holding_last;
841 	log_info("thread waiting for a lock: %s %llx", w?"yes":"no",
842 		(unsigned long long)(size_t)w);
843 	lock_debug_info(w);
844 	log_info("thread holding first: %s, last: %s", f?"yes":"no",
845 		l?"yes":"no");
846 	held_debug_info(thr, f);
847 }
848 
849 static void
total_debug_info(void)850 total_debug_info(void)
851 {
852 	int i;
853 	log_info("checklocks: supervising %d threads.",
854 		count_thread_infos());
855 	if(!key_created) {
856 		log_info("No thread debug key created yet");
857 	}
858 	for(i=0; i<THRDEBUG_MAX_THREADS; i++) {
859 		if(thread_infos[i]) {
860 			log_info("*** Thread %d information: ***", i);
861 			thread_debug_info(thread_infos[i]);
862 		}
863 	}
864 }
865 
866 /** signal handler for join timeout, Exits */
joinalarm(int ATTR_UNUSED (sig))867 static RETSIGTYPE joinalarm(int ATTR_UNUSED(sig))
868 {
869 	log_err("join thread timeout. hangup or deadlock. Info follows.");
870 	total_debug_info();
871 	fatal_exit("join thread timeout. hangup or deadlock.");
872 }
873 
874 /** wait for thread with a timeout */
875 void
checklock_thrjoin(pthread_t thread)876 checklock_thrjoin(pthread_t thread)
877 {
878 	/* wait with a timeout */
879 	if(signal(SIGALRM, joinalarm) == SIG_ERR)
880 		fatal_exit("signal(): %s", strerror(errno));
881 	(void)alarm(CHECK_JOIN_TIMEOUT);
882 	LOCKRET(pthread_join(thread, NULL));
883 	(void)alarm(0);
884 }
885 
886 #endif /* USE_THREAD_DEBUG */
887