1 /*
2 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3 * Copyright (C) 2007 The Regents of the University of California.
4 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
6 * UCRL-CODE-235197
7 *
8 * This file is part of the SPL, Solaris Porting Layer.
9 *
10 * The SPL is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; either version 2 of the License, or (at your
13 * option) any later version.
14 *
15 * The SPL is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 * for more details.
19 *
20 * You should have received a copy of the GNU General Public License along
21 * with the SPL. If not, see <http://www.gnu.org/licenses/>.
22 *
23 * Solaris Porting Layer (SPL) Condition Variables Implementation.
24 */
25
26 #include <sys/condvar.h>
27 #include <sys/time.h>
28 #include <sys/sysmacros.h>
29 #include <linux/hrtimer.h>
30 #include <linux/compiler_compat.h>
31 #include <linux/mod_compat.h>
32
33 #include <linux/sched.h>
34
35 #ifdef HAVE_SCHED_SIGNAL_HEADER
36 #include <linux/sched/signal.h>
37 #endif
38
39 #define MAX_HRTIMEOUT_SLACK_US 1000
40 static unsigned int spl_schedule_hrtimeout_slack_us = 0;
41
42 static int
param_set_hrtimeout_slack(const char * buf,zfs_kernel_param_t * kp)43 param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp)
44 {
45 unsigned long val;
46 int error;
47
48 error = kstrtoul(buf, 0, &val);
49 if (error)
50 return (error);
51
52 if (val > MAX_HRTIMEOUT_SLACK_US)
53 return (-EINVAL);
54
55 error = param_set_uint(buf, kp);
56 if (error < 0)
57 return (error);
58
59 return (0);
60 }
61
62 module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack,
63 param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644);
64 MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us,
65 "schedule_hrtimeout_range() delta/slack value in us, default(0)");
66
67 void
__cv_init(kcondvar_t * cvp,char * name,kcv_type_t type,void * arg)68 __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
69 {
70 ASSERT(cvp);
71 ASSERT(name == NULL);
72 ASSERT(type == CV_DEFAULT);
73 ASSERT(arg == NULL);
74
75 cvp->cv_magic = CV_MAGIC;
76 init_waitqueue_head(&cvp->cv_event);
77 init_waitqueue_head(&cvp->cv_destroy);
78 atomic_set(&cvp->cv_waiters, 0);
79 atomic_set(&cvp->cv_refs, 1);
80 cvp->cv_mutex = NULL;
81 }
82 EXPORT_SYMBOL(__cv_init);
83
84 static int
cv_destroy_wakeup(kcondvar_t * cvp)85 cv_destroy_wakeup(kcondvar_t *cvp)
86 {
87 if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) {
88 ASSERT(cvp->cv_mutex == NULL);
89 ASSERT(!waitqueue_active(&cvp->cv_event));
90 return (1);
91 }
92
93 return (0);
94 }
95
96 void
__cv_destroy(kcondvar_t * cvp)97 __cv_destroy(kcondvar_t *cvp)
98 {
99 ASSERT(cvp);
100 ASSERT(cvp->cv_magic == CV_MAGIC);
101
102 cvp->cv_magic = CV_DESTROY;
103 atomic_dec(&cvp->cv_refs);
104
105 /* Block until all waiters are woken and references dropped. */
106 while (cv_destroy_wakeup(cvp) == 0)
107 wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1);
108
109 ASSERT3P(cvp->cv_mutex, ==, NULL);
110 ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0);
111 ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0);
112 ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0);
113 }
114 EXPORT_SYMBOL(__cv_destroy);
115
116 static void
cv_wait_common(kcondvar_t * cvp,kmutex_t * mp,int state,int io)117 cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io)
118 {
119 DEFINE_WAIT(wait);
120 kmutex_t *m;
121
122 ASSERT(cvp);
123 ASSERT(mp);
124 ASSERT(cvp->cv_magic == CV_MAGIC);
125 ASSERT(mutex_owned(mp));
126 atomic_inc(&cvp->cv_refs);
127
128 m = READ_ONCE(cvp->cv_mutex);
129 if (!m)
130 m = xchg(&cvp->cv_mutex, mp);
131 /* Ensure the same mutex is used by all callers */
132 ASSERT(m == NULL || m == mp);
133
134 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
135 atomic_inc(&cvp->cv_waiters);
136
137 /*
138 * Mutex should be dropped after prepare_to_wait() this
139 * ensures we're linked in to the waiters list and avoids the
140 * race where 'cvp->cv_waiters > 0' but the list is empty.
141 */
142 mutex_exit(mp);
143 if (io)
144 io_schedule();
145 else
146 schedule();
147
148 /* No more waiters a different mutex could be used */
149 if (atomic_dec_and_test(&cvp->cv_waiters)) {
150 /*
151 * This is set without any lock, so it's racy. But this is
152 * just for debug anyway, so make it best-effort
153 */
154 cvp->cv_mutex = NULL;
155 wake_up(&cvp->cv_destroy);
156 }
157
158 finish_wait(&cvp->cv_event, &wait);
159 atomic_dec(&cvp->cv_refs);
160
161 /*
162 * Hold mutex after we release the cvp, otherwise we could dead lock
163 * with a thread holding the mutex and call cv_destroy.
164 */
165 mutex_enter(mp);
166 }
167
168 void
__cv_wait(kcondvar_t * cvp,kmutex_t * mp)169 __cv_wait(kcondvar_t *cvp, kmutex_t *mp)
170 {
171 cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0);
172 }
173 EXPORT_SYMBOL(__cv_wait);
174
175 void
__cv_wait_io(kcondvar_t * cvp,kmutex_t * mp)176 __cv_wait_io(kcondvar_t *cvp, kmutex_t *mp)
177 {
178 cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1);
179 }
180 EXPORT_SYMBOL(__cv_wait_io);
181
182 int
__cv_wait_io_sig(kcondvar_t * cvp,kmutex_t * mp)183 __cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp)
184 {
185 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1);
186
187 return (signal_pending(current) ? 0 : 1);
188 }
189 EXPORT_SYMBOL(__cv_wait_io_sig);
190
191 int
__cv_wait_sig(kcondvar_t * cvp,kmutex_t * mp)192 __cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp)
193 {
194 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
195
196 return (signal_pending(current) ? 0 : 1);
197 }
198 EXPORT_SYMBOL(__cv_wait_sig);
199
200 void
__cv_wait_idle(kcondvar_t * cvp,kmutex_t * mp)201 __cv_wait_idle(kcondvar_t *cvp, kmutex_t *mp)
202 {
203 sigset_t blocked, saved;
204
205 sigfillset(&blocked);
206 (void) sigprocmask(SIG_BLOCK, &blocked, &saved);
207 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
208 (void) sigprocmask(SIG_SETMASK, &saved, NULL);
209 }
210 EXPORT_SYMBOL(__cv_wait_idle);
211
212 #if defined(HAVE_IO_SCHEDULE_TIMEOUT)
213 #define spl_io_schedule_timeout(t) io_schedule_timeout(t)
214 #else
215
216 struct spl_task_timer {
217 struct timer_list timer;
218 struct task_struct *task;
219 };
220
221 static void
__cv_wakeup(spl_timer_list_t t)222 __cv_wakeup(spl_timer_list_t t)
223 {
224 struct timer_list *tmr = (struct timer_list *)t;
225 struct spl_task_timer *task_timer = from_timer(task_timer, tmr, timer);
226
227 wake_up_process(task_timer->task);
228 }
229
230 static long
spl_io_schedule_timeout(long time_left)231 spl_io_schedule_timeout(long time_left)
232 {
233 long expire_time = jiffies + time_left;
234 struct spl_task_timer task_timer;
235 struct timer_list *timer = &task_timer.timer;
236
237 task_timer.task = current;
238
239 timer_setup(timer, __cv_wakeup, 0);
240
241 timer->expires = expire_time;
242 add_timer(timer);
243
244 io_schedule();
245
246 del_timer_sync(timer);
247
248 time_left = expire_time - jiffies;
249
250 return (time_left < 0 ? 0 : time_left);
251 }
252 #endif
253
254 /*
255 * 'expire_time' argument is an absolute wall clock time in jiffies.
256 * Return value is time left (expire_time - now) or -1 if timeout occurred.
257 */
258 static clock_t
__cv_timedwait_common(kcondvar_t * cvp,kmutex_t * mp,clock_t expire_time,int state,int io)259 __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time,
260 int state, int io)
261 {
262 DEFINE_WAIT(wait);
263 kmutex_t *m;
264 clock_t time_left;
265
266 ASSERT(cvp);
267 ASSERT(mp);
268 ASSERT(cvp->cv_magic == CV_MAGIC);
269 ASSERT(mutex_owned(mp));
270
271 /* XXX - Does not handle jiffie wrap properly */
272 time_left = expire_time - jiffies;
273 if (time_left <= 0)
274 return (-1);
275
276 atomic_inc(&cvp->cv_refs);
277 m = READ_ONCE(cvp->cv_mutex);
278 if (!m)
279 m = xchg(&cvp->cv_mutex, mp);
280 /* Ensure the same mutex is used by all callers */
281 ASSERT(m == NULL || m == mp);
282
283 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
284 atomic_inc(&cvp->cv_waiters);
285
286 /*
287 * Mutex should be dropped after prepare_to_wait() this
288 * ensures we're linked in to the waiters list and avoids the
289 * race where 'cvp->cv_waiters > 0' but the list is empty.
290 */
291 mutex_exit(mp);
292 if (io)
293 time_left = spl_io_schedule_timeout(time_left);
294 else
295 time_left = schedule_timeout(time_left);
296
297 /* No more waiters a different mutex could be used */
298 if (atomic_dec_and_test(&cvp->cv_waiters)) {
299 /*
300 * This is set without any lock, so it's racy. But this is
301 * just for debug anyway, so make it best-effort
302 */
303 cvp->cv_mutex = NULL;
304 wake_up(&cvp->cv_destroy);
305 }
306
307 finish_wait(&cvp->cv_event, &wait);
308 atomic_dec(&cvp->cv_refs);
309
310 /*
311 * Hold mutex after we release the cvp, otherwise we could dead lock
312 * with a thread holding the mutex and call cv_destroy.
313 */
314 mutex_enter(mp);
315 return (time_left > 0 ? 1 : -1);
316 }
317
318 int
__cv_timedwait(kcondvar_t * cvp,kmutex_t * mp,clock_t exp_time)319 __cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
320 {
321 return (__cv_timedwait_common(cvp, mp, exp_time,
322 TASK_UNINTERRUPTIBLE, 0));
323 }
324 EXPORT_SYMBOL(__cv_timedwait);
325
326 int
__cv_timedwait_io(kcondvar_t * cvp,kmutex_t * mp,clock_t exp_time)327 __cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
328 {
329 return (__cv_timedwait_common(cvp, mp, exp_time,
330 TASK_UNINTERRUPTIBLE, 1));
331 }
332 EXPORT_SYMBOL(__cv_timedwait_io);
333
334 int
__cv_timedwait_sig(kcondvar_t * cvp,kmutex_t * mp,clock_t exp_time)335 __cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
336 {
337 int rc;
338
339 rc = __cv_timedwait_common(cvp, mp, exp_time, TASK_INTERRUPTIBLE, 0);
340 return (signal_pending(current) ? 0 : rc);
341 }
342 EXPORT_SYMBOL(__cv_timedwait_sig);
343
344 int
__cv_timedwait_idle(kcondvar_t * cvp,kmutex_t * mp,clock_t exp_time)345 __cv_timedwait_idle(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
346 {
347 sigset_t blocked, saved;
348 int rc;
349
350 sigfillset(&blocked);
351 (void) sigprocmask(SIG_BLOCK, &blocked, &saved);
352 rc = __cv_timedwait_common(cvp, mp, exp_time,
353 TASK_INTERRUPTIBLE, 0);
354 (void) sigprocmask(SIG_SETMASK, &saved, NULL);
355
356 return (rc);
357 }
358 EXPORT_SYMBOL(__cv_timedwait_idle);
359 /*
360 * 'expire_time' argument is an absolute clock time in nanoseconds.
361 * Return value is time left (expire_time - now) or -1 if timeout occurred.
362 */
363 static clock_t
__cv_timedwait_hires(kcondvar_t * cvp,kmutex_t * mp,hrtime_t expire_time,hrtime_t res,int state)364 __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
365 hrtime_t res, int state)
366 {
367 DEFINE_WAIT(wait);
368 kmutex_t *m;
369 hrtime_t time_left;
370 ktime_t ktime_left;
371 u64 slack = 0;
372 int rc;
373
374 ASSERT(cvp);
375 ASSERT(mp);
376 ASSERT(cvp->cv_magic == CV_MAGIC);
377 ASSERT(mutex_owned(mp));
378
379 time_left = expire_time - gethrtime();
380 if (time_left <= 0)
381 return (-1);
382
383 atomic_inc(&cvp->cv_refs);
384 m = READ_ONCE(cvp->cv_mutex);
385 if (!m)
386 m = xchg(&cvp->cv_mutex, mp);
387 /* Ensure the same mutex is used by all callers */
388 ASSERT(m == NULL || m == mp);
389
390 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
391 atomic_inc(&cvp->cv_waiters);
392
393 /*
394 * Mutex should be dropped after prepare_to_wait() this
395 * ensures we're linked in to the waiters list and avoids the
396 * race where 'cvp->cv_waiters > 0' but the list is empty.
397 */
398 mutex_exit(mp);
399
400 ktime_left = ktime_set(0, time_left);
401 slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC),
402 MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC);
403 rc = schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL);
404
405 /* No more waiters a different mutex could be used */
406 if (atomic_dec_and_test(&cvp->cv_waiters)) {
407 /*
408 * This is set without any lock, so it's racy. But this is
409 * just for debug anyway, so make it best-effort
410 */
411 cvp->cv_mutex = NULL;
412 wake_up(&cvp->cv_destroy);
413 }
414
415 finish_wait(&cvp->cv_event, &wait);
416 atomic_dec(&cvp->cv_refs);
417
418 mutex_enter(mp);
419 return (rc == -EINTR ? 1 : -1);
420 }
421
422 /*
423 * Compatibility wrapper for the cv_timedwait_hires() Illumos interface.
424 */
425 static int
cv_timedwait_hires_common(kcondvar_t * cvp,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag,int state)426 cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
427 hrtime_t res, int flag, int state)
428 {
429 if (!(flag & CALLOUT_FLAG_ABSOLUTE))
430 tim += gethrtime();
431
432 return (__cv_timedwait_hires(cvp, mp, tim, res, state));
433 }
434
435 int
cv_timedwait_hires(kcondvar_t * cvp,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag)436 cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
437 int flag)
438 {
439 return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
440 TASK_UNINTERRUPTIBLE));
441 }
442 EXPORT_SYMBOL(cv_timedwait_hires);
443
444 int
cv_timedwait_sig_hires(kcondvar_t * cvp,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag)445 cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
446 hrtime_t res, int flag)
447 {
448 int rc;
449
450 rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
451 TASK_INTERRUPTIBLE);
452 return (signal_pending(current) ? 0 : rc);
453 }
454 EXPORT_SYMBOL(cv_timedwait_sig_hires);
455
456 int
cv_timedwait_idle_hires(kcondvar_t * cvp,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag)457 cv_timedwait_idle_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
458 hrtime_t res, int flag)
459 {
460 sigset_t blocked, saved;
461 int rc;
462
463 sigfillset(&blocked);
464 (void) sigprocmask(SIG_BLOCK, &blocked, &saved);
465 rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
466 TASK_INTERRUPTIBLE);
467 (void) sigprocmask(SIG_SETMASK, &saved, NULL);
468
469 return (rc);
470 }
471 EXPORT_SYMBOL(cv_timedwait_idle_hires);
472
473 void
__cv_signal(kcondvar_t * cvp)474 __cv_signal(kcondvar_t *cvp)
475 {
476 ASSERT(cvp);
477 ASSERT(cvp->cv_magic == CV_MAGIC);
478 atomic_inc(&cvp->cv_refs);
479
480 /*
481 * All waiters are added with WQ_FLAG_EXCLUSIVE so only one
482 * waiter will be set runnable with each call to wake_up().
483 * Additionally wake_up() holds a spin_lock associated with
484 * the wait queue to ensure we don't race waking up processes.
485 */
486 if (atomic_read(&cvp->cv_waiters) > 0)
487 wake_up(&cvp->cv_event);
488
489 atomic_dec(&cvp->cv_refs);
490 }
491 EXPORT_SYMBOL(__cv_signal);
492
493 void
__cv_broadcast(kcondvar_t * cvp)494 __cv_broadcast(kcondvar_t *cvp)
495 {
496 ASSERT(cvp);
497 ASSERT(cvp->cv_magic == CV_MAGIC);
498 atomic_inc(&cvp->cv_refs);
499
500 /*
501 * Wake_up_all() will wake up all waiters even those which
502 * have the WQ_FLAG_EXCLUSIVE flag set.
503 */
504 if (atomic_read(&cvp->cv_waiters) > 0)
505 wake_up_all(&cvp->cv_event);
506
507 atomic_dec(&cvp->cv_refs);
508 }
509 EXPORT_SYMBOL(__cv_broadcast);
510