1 /*	$NetBSD: kern_condvar.c,v 1.35 2015/08/07 06:22:12 uebayasi Exp $	*/
2 
3 /*-
4  * Copyright (c) 2006, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Kernel condition variable implementation.
34  */
35 
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: kern_condvar.c,v 1.35 2015/08/07 06:22:12 uebayasi Exp $");
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/lwp.h>
42 #include <sys/condvar.h>
43 #include <sys/sleepq.h>
44 #include <sys/lockdebug.h>
45 #include <sys/cpu.h>
46 
47 /*
48  * Accessors for the private contents of the kcondvar_t data type.
49  *
50  *	cv_opaque[0]	sleepq...
51  *	cv_opaque[1]	...pointers
52  *	cv_opaque[2]	description for ps(1)
53  *
54  * cv_opaque[0..1] is protected by the interlock passed to cv_wait() (enqueue
55  * only), and the sleep queue lock acquired with sleeptab_lookup() (enqueue
56  * and dequeue).
57  *
58  * cv_opaque[2] (the wmesg) is static and does not change throughout the life
59  * of the CV.
60  */
61 #define	CV_SLEEPQ(cv)		((sleepq_t *)(cv)->cv_opaque)
62 #define	CV_WMESG(cv)		((const char *)(cv)->cv_opaque[2])
63 #define	CV_SET_WMESG(cv, v) 	(cv)->cv_opaque[2] = __UNCONST(v)
64 
65 #define	CV_DEBUG_P(cv)	(CV_WMESG(cv) != nodebug)
66 #define	CV_RA		((uintptr_t)__builtin_return_address(0))
67 
68 static void	cv_unsleep(lwp_t *, bool);
69 static void	cv_wakeup_one(kcondvar_t *);
70 static void	cv_wakeup_all(kcondvar_t *);
71 
72 static syncobj_t cv_syncobj = {
73 	SOBJ_SLEEPQ_SORTED,
74 	cv_unsleep,
75 	sleepq_changepri,
76 	sleepq_lendpri,
77 	syncobj_noowner,
78 };
79 
80 lockops_t cv_lockops = {
81 	"Condition variable",
82 	LOCKOPS_CV,
83 	NULL
84 };
85 
86 static const char deadcv[] = "deadcv";
87 #ifdef LOCKDEBUG
88 static const char nodebug[] = "nodebug";
89 #endif
90 
91 /*
92  * cv_init:
93  *
94  *	Initialize a condition variable for use.
95  */
96 void
cv_init(kcondvar_t * cv,const char * wmesg)97 cv_init(kcondvar_t *cv, const char *wmesg)
98 {
99 #ifdef LOCKDEBUG
100 	bool dodebug;
101 
102 	dodebug = LOCKDEBUG_ALLOC(cv, &cv_lockops,
103 	    (uintptr_t)__builtin_return_address(0));
104 	if (!dodebug) {
105 		/* XXX This will break vfs_lockf. */
106 		wmesg = nodebug;
107 	}
108 #endif
109 	KASSERT(wmesg != NULL);
110 	CV_SET_WMESG(cv, wmesg);
111 	sleepq_init(CV_SLEEPQ(cv));
112 }
113 
114 /*
115  * cv_destroy:
116  *
117  *	Tear down a condition variable.
118  */
119 void
cv_destroy(kcondvar_t * cv)120 cv_destroy(kcondvar_t *cv)
121 {
122 
123 	LOCKDEBUG_FREE(CV_DEBUG_P(cv), cv);
124 #ifdef DIAGNOSTIC
125 	KASSERT(cv_is_valid(cv));
126 	CV_SET_WMESG(cv, deadcv);
127 #endif
128 }
129 
130 /*
131  * cv_enter:
132  *
133  *	Look up and lock the sleep queue corresponding to the given
134  *	condition variable, and increment the number of waiters.
135  */
136 static inline void
cv_enter(kcondvar_t * cv,kmutex_t * mtx,lwp_t * l)137 cv_enter(kcondvar_t *cv, kmutex_t *mtx, lwp_t *l)
138 {
139 	sleepq_t *sq;
140 	kmutex_t *mp;
141 
142 	KASSERT(cv_is_valid(cv));
143 	KASSERT(!cpu_intr_p());
144 	KASSERT((l->l_pflag & LP_INTR) == 0 || panicstr != NULL);
145 
146 	LOCKDEBUG_LOCKED(CV_DEBUG_P(cv), cv, mtx, CV_RA, 0);
147 
148 	l->l_kpriority = true;
149 	mp = sleepq_hashlock(cv);
150 	sq = CV_SLEEPQ(cv);
151 	sleepq_enter(sq, l, mp);
152 	sleepq_enqueue(sq, cv, CV_WMESG(cv), &cv_syncobj);
153 	mutex_exit(mtx);
154 	KASSERT(cv_has_waiters(cv));
155 }
156 
157 /*
158  * cv_exit:
159  *
160  *	After resuming execution, check to see if we have been restarted
161  *	as a result of cv_signal().  If we have, but cannot take the
162  *	wakeup (because of eg a pending Unix signal or timeout) then try
163  *	to ensure that another LWP sees it.  This is necessary because
164  *	there may be multiple waiters, and at least one should take the
165  *	wakeup if possible.
166  */
167 static inline int
cv_exit(kcondvar_t * cv,kmutex_t * mtx,lwp_t * l,const int error)168 cv_exit(kcondvar_t *cv, kmutex_t *mtx, lwp_t *l, const int error)
169 {
170 
171 	mutex_enter(mtx);
172 	if (__predict_false(error != 0))
173 		cv_signal(cv);
174 
175 	LOCKDEBUG_UNLOCKED(CV_DEBUG_P(cv), cv, CV_RA, 0);
176 	KASSERT(cv_is_valid(cv));
177 
178 	return error;
179 }
180 
181 /*
182  * cv_unsleep:
183  *
184  *	Remove an LWP from the condition variable and sleep queue.  This
185  *	is called when the LWP has not been awoken normally but instead
186  *	interrupted: for example, when a signal is received.  Must be
187  *	called with the LWP locked, and must return it unlocked.
188  */
189 static void
cv_unsleep(lwp_t * l,bool cleanup)190 cv_unsleep(lwp_t *l, bool cleanup)
191 {
192 	kcondvar_t *cv __diagused;
193 
194 	cv = (kcondvar_t *)(uintptr_t)l->l_wchan;
195 
196 	KASSERT(l->l_wchan == (wchan_t)cv);
197 	KASSERT(l->l_sleepq == CV_SLEEPQ(cv));
198 	KASSERT(cv_is_valid(cv));
199 	KASSERT(cv_has_waiters(cv));
200 
201 	sleepq_unsleep(l, cleanup);
202 }
203 
204 /*
205  * cv_wait:
206  *
207  *	Wait non-interruptably on a condition variable until awoken.
208  */
209 void
cv_wait(kcondvar_t * cv,kmutex_t * mtx)210 cv_wait(kcondvar_t *cv, kmutex_t *mtx)
211 {
212 	lwp_t *l = curlwp;
213 
214 	KASSERT(mutex_owned(mtx));
215 
216 	cv_enter(cv, mtx, l);
217 	(void)sleepq_block(0, false);
218 	(void)cv_exit(cv, mtx, l, 0);
219 }
220 
221 /*
222  * cv_wait_sig:
223  *
224  *	Wait on a condition variable until a awoken or a signal is received.
225  *	Will also return early if the process is exiting.  Returns zero if
226  *	awoken normally, ERESTART if a signal was received and the system
227  *	call is restartable, or EINTR otherwise.
228  */
229 int
cv_wait_sig(kcondvar_t * cv,kmutex_t * mtx)230 cv_wait_sig(kcondvar_t *cv, kmutex_t *mtx)
231 {
232 	lwp_t *l = curlwp;
233 	int error;
234 
235 	KASSERT(mutex_owned(mtx));
236 
237 	cv_enter(cv, mtx, l);
238 	error = sleepq_block(0, true);
239 	return cv_exit(cv, mtx, l, error);
240 }
241 
242 /*
243  * cv_timedwait:
244  *
245  *	Wait on a condition variable until awoken or the specified timeout
246  *	expires.  Returns zero if awoken normally or EWOULDBLOCK if the
247  *	timeout expired.
248  *
249  *	timo is a timeout in ticks.  timo = 0 specifies an infinite timeout.
250  */
251 int
cv_timedwait(kcondvar_t * cv,kmutex_t * mtx,int timo)252 cv_timedwait(kcondvar_t *cv, kmutex_t *mtx, int timo)
253 {
254 	lwp_t *l = curlwp;
255 	int error;
256 
257 	KASSERT(mutex_owned(mtx));
258 
259 	cv_enter(cv, mtx, l);
260 	error = sleepq_block(timo, false);
261 	return cv_exit(cv, mtx, l, error);
262 }
263 
264 /*
265  * cv_timedwait_sig:
266  *
267  *	Wait on a condition variable until a timeout expires, awoken or a
268  *	signal is received.  Will also return early if the process is
269  *	exiting.  Returns zero if awoken normally, EWOULDBLOCK if the
270  *	timeout expires, ERESTART if a signal was received and the system
271  *	call is restartable, or EINTR otherwise.
272  *
273  *	timo is a timeout in ticks.  timo = 0 specifies an infinite timeout.
274  */
275 int
cv_timedwait_sig(kcondvar_t * cv,kmutex_t * mtx,int timo)276 cv_timedwait_sig(kcondvar_t *cv, kmutex_t *mtx, int timo)
277 {
278 	lwp_t *l = curlwp;
279 	int error;
280 
281 	KASSERT(mutex_owned(mtx));
282 
283 	cv_enter(cv, mtx, l);
284 	error = sleepq_block(timo, true);
285 	return cv_exit(cv, mtx, l, error);
286 }
287 
288 /*
289  * cv_signal:
290  *
291  *	Wake the highest priority LWP waiting on a condition variable.
292  *	Must be called with the interlocking mutex held.
293  */
294 void
cv_signal(kcondvar_t * cv)295 cv_signal(kcondvar_t *cv)
296 {
297 
298 	/* LOCKDEBUG_WAKEUP(CV_DEBUG_P(cv), cv, CV_RA); */
299 	KASSERT(cv_is_valid(cv));
300 
301 	if (__predict_false(!TAILQ_EMPTY(CV_SLEEPQ(cv))))
302 		cv_wakeup_one(cv);
303 }
304 
305 static void __noinline
cv_wakeup_one(kcondvar_t * cv)306 cv_wakeup_one(kcondvar_t *cv)
307 {
308 	sleepq_t *sq;
309 	kmutex_t *mp;
310 	lwp_t *l;
311 
312 	KASSERT(cv_is_valid(cv));
313 
314 	mp = sleepq_hashlock(cv);
315 	sq = CV_SLEEPQ(cv);
316 	l = TAILQ_FIRST(sq);
317 	if (l == NULL) {
318 		mutex_spin_exit(mp);
319 		return;
320 	}
321 	KASSERT(l->l_sleepq == sq);
322 	KASSERT(l->l_mutex == mp);
323 	KASSERT(l->l_wchan == cv);
324 	sleepq_remove(sq, l);
325 	mutex_spin_exit(mp);
326 
327 	KASSERT(cv_is_valid(cv));
328 }
329 
330 /*
331  * cv_broadcast:
332  *
333  *	Wake all LWPs waiting on a condition variable.  Must be called
334  *	with the interlocking mutex held.
335  */
336 void
cv_broadcast(kcondvar_t * cv)337 cv_broadcast(kcondvar_t *cv)
338 {
339 
340 	/* LOCKDEBUG_WAKEUP(CV_DEBUG_P(cv), cv, CV_RA); */
341 	KASSERT(cv_is_valid(cv));
342 
343 	if (__predict_false(!TAILQ_EMPTY(CV_SLEEPQ(cv))))
344 		cv_wakeup_all(cv);
345 }
346 
347 static void __noinline
cv_wakeup_all(kcondvar_t * cv)348 cv_wakeup_all(kcondvar_t *cv)
349 {
350 	sleepq_t *sq;
351 	kmutex_t *mp;
352 	lwp_t *l, *next;
353 
354 	KASSERT(cv_is_valid(cv));
355 
356 	mp = sleepq_hashlock(cv);
357 	sq = CV_SLEEPQ(cv);
358 	for (l = TAILQ_FIRST(sq); l != NULL; l = next) {
359 		KASSERT(l->l_sleepq == sq);
360 		KASSERT(l->l_mutex == mp);
361 		KASSERT(l->l_wchan == cv);
362 		next = TAILQ_NEXT(l, l_sleepchain);
363 		sleepq_remove(sq, l);
364 	}
365 	mutex_spin_exit(mp);
366 
367 	KASSERT(cv_is_valid(cv));
368 }
369 
370 /*
371  * cv_has_waiters:
372  *
373  *	For diagnostic assertions: return non-zero if a condition
374  *	variable has waiters.
375  */
376 bool
cv_has_waiters(kcondvar_t * cv)377 cv_has_waiters(kcondvar_t *cv)
378 {
379 
380 	return !TAILQ_EMPTY(CV_SLEEPQ(cv));
381 }
382 
383 /*
384  * cv_is_valid:
385  *
386  *	For diagnostic assertions: return non-zero if a condition
387  *	variable appears to be valid.  No locks need be held.
388  */
389 bool
cv_is_valid(kcondvar_t * cv)390 cv_is_valid(kcondvar_t *cv)
391 {
392 
393 	return CV_WMESG(cv) != deadcv && CV_WMESG(cv) != NULL;
394 }
395