1 /*
2  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3  *  Copyright (C) 2007 The Regents of the University of California.
4  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
6  *  UCRL-CODE-235197
7  *
8  *  This file is part of the SPL, Solaris Porting Layer.
9  *
10  *  The SPL is free software; you can redistribute it and/or modify it
11  *  under the terms of the GNU General Public License as published by the
12  *  Free Software Foundation; either version 2 of the License, or (at your
13  *  option) any later version.
14  *
15  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
16  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18  *  for more details.
19  *
20  *  You should have received a copy of the GNU General Public License along
21  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
22  *
23  *  Solaris Porting Layer (SPL) Condition Variables Implementation.
24  */
25 
26 #include <sys/condvar.h>
27 #include <sys/time.h>
28 #include <sys/sysmacros.h>
29 #include <linux/hrtimer.h>
30 #include <linux/compiler_compat.h>
31 #include <linux/mod_compat.h>
32 
33 #include <linux/sched.h>
34 
35 #ifdef HAVE_SCHED_SIGNAL_HEADER
36 #include <linux/sched/signal.h>
37 #endif
38 
39 #define	MAX_HRTIMEOUT_SLACK_US	1000
40 static unsigned int spl_schedule_hrtimeout_slack_us = 0;
41 
42 static int
param_set_hrtimeout_slack(const char * buf,zfs_kernel_param_t * kp)43 param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp)
44 {
45 	unsigned long val;
46 	int error;
47 
48 	error = kstrtoul(buf, 0, &val);
49 	if (error)
50 		return (error);
51 
52 	if (val > MAX_HRTIMEOUT_SLACK_US)
53 		return (-EINVAL);
54 
55 	error = param_set_uint(buf, kp);
56 	if (error < 0)
57 		return (error);
58 
59 	return (0);
60 }
61 
62 module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack,
63 	param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644);
64 MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us,
65 	"schedule_hrtimeout_range() delta/slack value in us, default(0)");
66 
67 void
__cv_init(kcondvar_t * cvp,char * name,kcv_type_t type,void * arg)68 __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
69 {
70 	ASSERT(cvp);
71 	ASSERT(name == NULL);
72 	ASSERT(type == CV_DEFAULT);
73 	ASSERT(arg == NULL);
74 
75 	cvp->cv_magic = CV_MAGIC;
76 	init_waitqueue_head(&cvp->cv_event);
77 	init_waitqueue_head(&cvp->cv_destroy);
78 	atomic_set(&cvp->cv_waiters, 0);
79 	atomic_set(&cvp->cv_refs, 1);
80 	cvp->cv_mutex = NULL;
81 }
82 EXPORT_SYMBOL(__cv_init);
83 
84 static int
cv_destroy_wakeup(kcondvar_t * cvp)85 cv_destroy_wakeup(kcondvar_t *cvp)
86 {
87 	if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) {
88 		ASSERT(cvp->cv_mutex == NULL);
89 		ASSERT(!waitqueue_active(&cvp->cv_event));
90 		return (1);
91 	}
92 
93 	return (0);
94 }
95 
96 void
__cv_destroy(kcondvar_t * cvp)97 __cv_destroy(kcondvar_t *cvp)
98 {
99 	ASSERT(cvp);
100 	ASSERT(cvp->cv_magic == CV_MAGIC);
101 
102 	cvp->cv_magic = CV_DESTROY;
103 	atomic_dec(&cvp->cv_refs);
104 
105 	/* Block until all waiters are woken and references dropped. */
106 	while (cv_destroy_wakeup(cvp) == 0)
107 		wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1);
108 
109 	ASSERT3P(cvp->cv_mutex, ==, NULL);
110 	ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0);
111 	ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0);
112 	ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0);
113 }
114 EXPORT_SYMBOL(__cv_destroy);
115 
116 static void
cv_wait_common(kcondvar_t * cvp,kmutex_t * mp,int state,int io)117 cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io)
118 {
119 	DEFINE_WAIT(wait);
120 	kmutex_t *m;
121 
122 	ASSERT(cvp);
123 	ASSERT(mp);
124 	ASSERT(cvp->cv_magic == CV_MAGIC);
125 	ASSERT(mutex_owned(mp));
126 	atomic_inc(&cvp->cv_refs);
127 
128 	m = READ_ONCE(cvp->cv_mutex);
129 	if (!m)
130 		m = xchg(&cvp->cv_mutex, mp);
131 	/* Ensure the same mutex is used by all callers */
132 	ASSERT(m == NULL || m == mp);
133 
134 	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
135 	atomic_inc(&cvp->cv_waiters);
136 
137 	/*
138 	 * Mutex should be dropped after prepare_to_wait() this
139 	 * ensures we're linked in to the waiters list and avoids the
140 	 * race where 'cvp->cv_waiters > 0' but the list is empty.
141 	 */
142 	mutex_exit(mp);
143 	if (io)
144 		io_schedule();
145 	else
146 		schedule();
147 
148 	/* No more waiters a different mutex could be used */
149 	if (atomic_dec_and_test(&cvp->cv_waiters)) {
150 		/*
151 		 * This is set without any lock, so it's racy. But this is
152 		 * just for debug anyway, so make it best-effort
153 		 */
154 		cvp->cv_mutex = NULL;
155 		wake_up(&cvp->cv_destroy);
156 	}
157 
158 	finish_wait(&cvp->cv_event, &wait);
159 	atomic_dec(&cvp->cv_refs);
160 
161 	/*
162 	 * Hold mutex after we release the cvp, otherwise we could dead lock
163 	 * with a thread holding the mutex and call cv_destroy.
164 	 */
165 	mutex_enter(mp);
166 }
167 
168 void
__cv_wait(kcondvar_t * cvp,kmutex_t * mp)169 __cv_wait(kcondvar_t *cvp, kmutex_t *mp)
170 {
171 	cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0);
172 }
173 EXPORT_SYMBOL(__cv_wait);
174 
175 void
__cv_wait_io(kcondvar_t * cvp,kmutex_t * mp)176 __cv_wait_io(kcondvar_t *cvp, kmutex_t *mp)
177 {
178 	cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1);
179 }
180 EXPORT_SYMBOL(__cv_wait_io);
181 
182 int
__cv_wait_io_sig(kcondvar_t * cvp,kmutex_t * mp)183 __cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp)
184 {
185 	cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1);
186 
187 	return (signal_pending(current) ? 0 : 1);
188 }
189 EXPORT_SYMBOL(__cv_wait_io_sig);
190 
191 int
__cv_wait_sig(kcondvar_t * cvp,kmutex_t * mp)192 __cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp)
193 {
194 	cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
195 
196 	return (signal_pending(current) ? 0 : 1);
197 }
198 EXPORT_SYMBOL(__cv_wait_sig);
199 
200 void
__cv_wait_idle(kcondvar_t * cvp,kmutex_t * mp)201 __cv_wait_idle(kcondvar_t *cvp, kmutex_t *mp)
202 {
203 	sigset_t blocked, saved;
204 
205 	sigfillset(&blocked);
206 	(void) sigprocmask(SIG_BLOCK, &blocked, &saved);
207 	cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
208 	(void) sigprocmask(SIG_SETMASK, &saved, NULL);
209 }
210 EXPORT_SYMBOL(__cv_wait_idle);
211 
212 #if defined(HAVE_IO_SCHEDULE_TIMEOUT)
213 #define	spl_io_schedule_timeout(t)	io_schedule_timeout(t)
214 #else
215 
216 struct spl_task_timer {
217 	struct timer_list timer;
218 	struct task_struct *task;
219 };
220 
221 static void
__cv_wakeup(spl_timer_list_t t)222 __cv_wakeup(spl_timer_list_t t)
223 {
224 	struct timer_list *tmr = (struct timer_list *)t;
225 	struct spl_task_timer *task_timer = from_timer(task_timer, tmr, timer);
226 
227 	wake_up_process(task_timer->task);
228 }
229 
230 static long
spl_io_schedule_timeout(long time_left)231 spl_io_schedule_timeout(long time_left)
232 {
233 	long expire_time = jiffies + time_left;
234 	struct spl_task_timer task_timer;
235 	struct timer_list *timer = &task_timer.timer;
236 
237 	task_timer.task = current;
238 
239 	timer_setup(timer, __cv_wakeup, 0);
240 
241 	timer->expires = expire_time;
242 	add_timer(timer);
243 
244 	io_schedule();
245 
246 	del_timer_sync(timer);
247 
248 	time_left = expire_time - jiffies;
249 
250 	return (time_left < 0 ? 0 : time_left);
251 }
252 #endif
253 
254 /*
255  * 'expire_time' argument is an absolute wall clock time in jiffies.
256  * Return value is time left (expire_time - now) or -1 if timeout occurred.
257  */
258 static clock_t
__cv_timedwait_common(kcondvar_t * cvp,kmutex_t * mp,clock_t expire_time,int state,int io)259 __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time,
260     int state, int io)
261 {
262 	DEFINE_WAIT(wait);
263 	kmutex_t *m;
264 	clock_t time_left;
265 
266 	ASSERT(cvp);
267 	ASSERT(mp);
268 	ASSERT(cvp->cv_magic == CV_MAGIC);
269 	ASSERT(mutex_owned(mp));
270 
271 	/* XXX - Does not handle jiffie wrap properly */
272 	time_left = expire_time - jiffies;
273 	if (time_left <= 0)
274 		return (-1);
275 
276 	atomic_inc(&cvp->cv_refs);
277 	m = READ_ONCE(cvp->cv_mutex);
278 	if (!m)
279 		m = xchg(&cvp->cv_mutex, mp);
280 	/* Ensure the same mutex is used by all callers */
281 	ASSERT(m == NULL || m == mp);
282 
283 	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
284 	atomic_inc(&cvp->cv_waiters);
285 
286 	/*
287 	 * Mutex should be dropped after prepare_to_wait() this
288 	 * ensures we're linked in to the waiters list and avoids the
289 	 * race where 'cvp->cv_waiters > 0' but the list is empty.
290 	 */
291 	mutex_exit(mp);
292 	if (io)
293 		time_left = spl_io_schedule_timeout(time_left);
294 	else
295 		time_left = schedule_timeout(time_left);
296 
297 	/* No more waiters a different mutex could be used */
298 	if (atomic_dec_and_test(&cvp->cv_waiters)) {
299 		/*
300 		 * This is set without any lock, so it's racy. But this is
301 		 * just for debug anyway, so make it best-effort
302 		 */
303 		cvp->cv_mutex = NULL;
304 		wake_up(&cvp->cv_destroy);
305 	}
306 
307 	finish_wait(&cvp->cv_event, &wait);
308 	atomic_dec(&cvp->cv_refs);
309 
310 	/*
311 	 * Hold mutex after we release the cvp, otherwise we could dead lock
312 	 * with a thread holding the mutex and call cv_destroy.
313 	 */
314 	mutex_enter(mp);
315 	return (time_left > 0 ? 1 : -1);
316 }
317 
318 int
__cv_timedwait(kcondvar_t * cvp,kmutex_t * mp,clock_t exp_time)319 __cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
320 {
321 	return (__cv_timedwait_common(cvp, mp, exp_time,
322 	    TASK_UNINTERRUPTIBLE, 0));
323 }
324 EXPORT_SYMBOL(__cv_timedwait);
325 
326 int
__cv_timedwait_io(kcondvar_t * cvp,kmutex_t * mp,clock_t exp_time)327 __cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
328 {
329 	return (__cv_timedwait_common(cvp, mp, exp_time,
330 	    TASK_UNINTERRUPTIBLE, 1));
331 }
332 EXPORT_SYMBOL(__cv_timedwait_io);
333 
334 int
__cv_timedwait_sig(kcondvar_t * cvp,kmutex_t * mp,clock_t exp_time)335 __cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
336 {
337 	int rc;
338 
339 	rc = __cv_timedwait_common(cvp, mp, exp_time, TASK_INTERRUPTIBLE, 0);
340 	return (signal_pending(current) ? 0 : rc);
341 }
342 EXPORT_SYMBOL(__cv_timedwait_sig);
343 
344 int
__cv_timedwait_idle(kcondvar_t * cvp,kmutex_t * mp,clock_t exp_time)345 __cv_timedwait_idle(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
346 {
347 	sigset_t blocked, saved;
348 	int rc;
349 
350 	sigfillset(&blocked);
351 	(void) sigprocmask(SIG_BLOCK, &blocked, &saved);
352 	rc = __cv_timedwait_common(cvp, mp, exp_time,
353 	    TASK_INTERRUPTIBLE, 0);
354 	(void) sigprocmask(SIG_SETMASK, &saved, NULL);
355 
356 	return (rc);
357 }
358 EXPORT_SYMBOL(__cv_timedwait_idle);
359 /*
360  * 'expire_time' argument is an absolute clock time in nanoseconds.
361  * Return value is time left (expire_time - now) or -1 if timeout occurred.
362  */
363 static clock_t
__cv_timedwait_hires(kcondvar_t * cvp,kmutex_t * mp,hrtime_t expire_time,hrtime_t res,int state)364 __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
365     hrtime_t res, int state)
366 {
367 	DEFINE_WAIT(wait);
368 	kmutex_t *m;
369 	hrtime_t time_left;
370 	ktime_t ktime_left;
371 	u64 slack = 0;
372 	int rc;
373 
374 	ASSERT(cvp);
375 	ASSERT(mp);
376 	ASSERT(cvp->cv_magic == CV_MAGIC);
377 	ASSERT(mutex_owned(mp));
378 
379 	time_left = expire_time - gethrtime();
380 	if (time_left <= 0)
381 		return (-1);
382 
383 	atomic_inc(&cvp->cv_refs);
384 	m = READ_ONCE(cvp->cv_mutex);
385 	if (!m)
386 		m = xchg(&cvp->cv_mutex, mp);
387 	/* Ensure the same mutex is used by all callers */
388 	ASSERT(m == NULL || m == mp);
389 
390 	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
391 	atomic_inc(&cvp->cv_waiters);
392 
393 	/*
394 	 * Mutex should be dropped after prepare_to_wait() this
395 	 * ensures we're linked in to the waiters list and avoids the
396 	 * race where 'cvp->cv_waiters > 0' but the list is empty.
397 	 */
398 	mutex_exit(mp);
399 
400 	ktime_left = ktime_set(0, time_left);
401 	slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC),
402 	    MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC);
403 	rc = schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL);
404 
405 	/* No more waiters a different mutex could be used */
406 	if (atomic_dec_and_test(&cvp->cv_waiters)) {
407 		/*
408 		 * This is set without any lock, so it's racy. But this is
409 		 * just for debug anyway, so make it best-effort
410 		 */
411 		cvp->cv_mutex = NULL;
412 		wake_up(&cvp->cv_destroy);
413 	}
414 
415 	finish_wait(&cvp->cv_event, &wait);
416 	atomic_dec(&cvp->cv_refs);
417 
418 	mutex_enter(mp);
419 	return (rc == -EINTR ? 1 : -1);
420 }
421 
422 /*
423  * Compatibility wrapper for the cv_timedwait_hires() Illumos interface.
424  */
425 static int
cv_timedwait_hires_common(kcondvar_t * cvp,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag,int state)426 cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
427     hrtime_t res, int flag, int state)
428 {
429 	if (!(flag & CALLOUT_FLAG_ABSOLUTE))
430 		tim += gethrtime();
431 
432 	return (__cv_timedwait_hires(cvp, mp, tim, res, state));
433 }
434 
435 int
cv_timedwait_hires(kcondvar_t * cvp,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag)436 cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
437     int flag)
438 {
439 	return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
440 	    TASK_UNINTERRUPTIBLE));
441 }
442 EXPORT_SYMBOL(cv_timedwait_hires);
443 
444 int
cv_timedwait_sig_hires(kcondvar_t * cvp,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag)445 cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
446     hrtime_t res, int flag)
447 {
448 	int rc;
449 
450 	rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
451 	    TASK_INTERRUPTIBLE);
452 	return (signal_pending(current) ? 0 : rc);
453 }
454 EXPORT_SYMBOL(cv_timedwait_sig_hires);
455 
456 int
cv_timedwait_idle_hires(kcondvar_t * cvp,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag)457 cv_timedwait_idle_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
458     hrtime_t res, int flag)
459 {
460 	sigset_t blocked, saved;
461 	int rc;
462 
463 	sigfillset(&blocked);
464 	(void) sigprocmask(SIG_BLOCK, &blocked, &saved);
465 	rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
466 	    TASK_INTERRUPTIBLE);
467 	(void) sigprocmask(SIG_SETMASK, &saved, NULL);
468 
469 	return (rc);
470 }
471 EXPORT_SYMBOL(cv_timedwait_idle_hires);
472 
473 void
__cv_signal(kcondvar_t * cvp)474 __cv_signal(kcondvar_t *cvp)
475 {
476 	ASSERT(cvp);
477 	ASSERT(cvp->cv_magic == CV_MAGIC);
478 	atomic_inc(&cvp->cv_refs);
479 
480 	/*
481 	 * All waiters are added with WQ_FLAG_EXCLUSIVE so only one
482 	 * waiter will be set runnable with each call to wake_up().
483 	 * Additionally wake_up() holds a spin_lock associated with
484 	 * the wait queue to ensure we don't race waking up processes.
485 	 */
486 	if (atomic_read(&cvp->cv_waiters) > 0)
487 		wake_up(&cvp->cv_event);
488 
489 	atomic_dec(&cvp->cv_refs);
490 }
491 EXPORT_SYMBOL(__cv_signal);
492 
493 void
__cv_broadcast(kcondvar_t * cvp)494 __cv_broadcast(kcondvar_t *cvp)
495 {
496 	ASSERT(cvp);
497 	ASSERT(cvp->cv_magic == CV_MAGIC);
498 	atomic_inc(&cvp->cv_refs);
499 
500 	/*
501 	 * Wake_up_all() will wake up all waiters even those which
502 	 * have the WQ_FLAG_EXCLUSIVE flag set.
503 	 */
504 	if (atomic_read(&cvp->cv_waiters) > 0)
505 		wake_up_all(&cvp->cv_event);
506 
507 	atomic_dec(&cvp->cv_refs);
508 }
509 EXPORT_SYMBOL(__cv_broadcast);
510