1 /*-
2  * Copyright (c) 2017 Hans Petter Selasky
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <linux/workqueue.h>
31 #include <linux/wait.h>
32 #include <linux/compat.h>
33 #include <linux/spinlock.h>
34 
35 #include <sys/kernel.h>
36 
37 /*
38  * Define all work struct states
39  */
40 enum {
41 	WORK_ST_IDLE,			/* idle - not started */
42 	WORK_ST_TIMER,			/* timer is being started */
43 	WORK_ST_TASK,			/* taskqueue is being queued */
44 	WORK_ST_EXEC,			/* callback is being called */
45 	WORK_ST_CANCEL,			/* cancel is being requested */
46 	WORK_ST_MAX,
47 };
48 
49 /*
50  * Define global workqueues
51  */
52 static struct workqueue_struct *linux_system_short_wq;
53 static struct workqueue_struct *linux_system_long_wq;
54 
55 struct workqueue_struct *system_wq;
56 struct workqueue_struct *system_long_wq;
57 struct workqueue_struct *system_unbound_wq;
58 struct workqueue_struct *system_highpri_wq;
59 struct workqueue_struct *system_power_efficient_wq;
60 
61 static int linux_default_wq_cpus = 4;
62 
63 static void linux_delayed_work_timer_fn(void *);
64 
65 /*
66  * This function atomically updates the work state and returns the
67  * previous state at the time of update.
68  */
69 static uint8_t
70 linux_update_state(atomic_t *v, const uint8_t *pstate)
71 {
72 	int c, old;
73 
74 	c = v->counter;
75 
76 	while ((old = atomic_cmpxchg(v, c, pstate[c])) != c)
77 		c = old;
78 
79 	return (c);
80 }
81 
82 /*
83  * A LinuxKPI task is allowed to free itself inside the callback function
84  * and cannot safely be referred after the callback function has
85  * completed. This function gives the linux_work_fn() function a hint,
86  * that the task is not going away and can have its state checked
87  * again. Without this extra hint LinuxKPI tasks cannot be serialized
88  * accross multiple worker threads.
89  */
90 static bool
91 linux_work_exec_unblock(struct work_struct *work)
92 {
93 	struct workqueue_struct *wq;
94 	struct work_exec *exec;
95 	bool retval = 0;
96 
97 	wq = work->work_queue;
98 	if (unlikely(wq == NULL))
99 		goto done;
100 
101 	WQ_EXEC_LOCK(wq);
102 	TAILQ_FOREACH(exec, &wq->exec_head, entry) {
103 		if (exec->target == work) {
104 			exec->target = NULL;
105 			retval = 1;
106 			break;
107 		}
108 	}
109 	WQ_EXEC_UNLOCK(wq);
110 done:
111 	return (retval);
112 }
113 
114 static void
115 linux_delayed_work_enqueue(struct delayed_work *dwork)
116 {
117 	struct taskqueue *tq;
118 
119 	tq = dwork->work.work_queue->taskqueue;
120 	taskqueue_enqueue(tq, &dwork->work.work_task);
121 }
122 
123 /*
124  * This function queues the given work structure on the given
125  * workqueue. It returns non-zero if the work was successfully
126  * [re-]queued. Else the work is already pending for completion.
127  */
128 bool
129 linux_queue_work_on(int cpu __unused, struct workqueue_struct *wq,
130     struct work_struct *work)
131 {
132 	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
133 		[WORK_ST_IDLE] = WORK_ST_TASK,		/* start queuing task */
134 		[WORK_ST_TIMER] = WORK_ST_TIMER,	/* NOP */
135 		[WORK_ST_TASK] = WORK_ST_TASK,		/* NOP */
136 		[WORK_ST_EXEC] = WORK_ST_TASK,		/* queue task another time */
137 		[WORK_ST_CANCEL] = WORK_ST_TASK,	/* start queuing task again */
138 	};
139 
140 	if (atomic_read(&wq->draining) != 0)
141 		return (!work_pending(work));
142 
143 	switch (linux_update_state(&work->state, states)) {
144 	case WORK_ST_EXEC:
145 	case WORK_ST_CANCEL:
146 		if (linux_work_exec_unblock(work) != 0)
147 			return (1);
148 		/* FALLTHROUGH */
149 	case WORK_ST_IDLE:
150 		work->work_queue = wq;
151 		taskqueue_enqueue(wq->taskqueue, &work->work_task);
152 		return (1);
153 	default:
154 		return (0);		/* already on a queue */
155 	}
156 }
157 
158 /*
159  * This function queues the given work structure on the given
160  * workqueue after a given delay in ticks. It returns non-zero if the
161  * work was successfully [re-]queued. Else the work is already pending
162  * for completion.
163  */
164 bool
165 linux_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
166     struct delayed_work *dwork, unsigned delay)
167 {
168 	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
169 		[WORK_ST_IDLE] = WORK_ST_TIMER,		/* start timeout */
170 		[WORK_ST_TIMER] = WORK_ST_TIMER,	/* NOP */
171 		[WORK_ST_TASK] = WORK_ST_TASK,		/* NOP */
172 		[WORK_ST_EXEC] = WORK_ST_TIMER,		/* start timeout */
173 		[WORK_ST_CANCEL] = WORK_ST_TIMER,	/* start timeout */
174 	};
175 
176 	if (atomic_read(&wq->draining) != 0)
177 		return (!work_pending(&dwork->work));
178 
179 	switch (linux_update_state(&dwork->work.state, states)) {
180 	case WORK_ST_EXEC:
181 	case WORK_ST_CANCEL:
182 		if (delay == 0 && linux_work_exec_unblock(&dwork->work) != 0) {
183 			dwork->timer.expires = jiffies;
184 			return (1);
185 		}
186 		/* FALLTHROUGH */
187 	case WORK_ST_IDLE:
188 		dwork->work.work_queue = wq;
189 		dwork->timer.expires = jiffies + delay;
190 
191 		if (delay == 0) {
192 			linux_delayed_work_enqueue(dwork);
193 		} else if (unlikely(cpu != WORK_CPU_UNBOUND)) {
194 			mtx_lock(&dwork->timer.mtx);
195 			callout_reset_on(&dwork->timer.callout, delay,
196 			    &linux_delayed_work_timer_fn, dwork, cpu);
197 			mtx_unlock(&dwork->timer.mtx);
198 		} else {
199 			mtx_lock(&dwork->timer.mtx);
200 			callout_reset(&dwork->timer.callout, delay,
201 			    &linux_delayed_work_timer_fn, dwork);
202 			mtx_unlock(&dwork->timer.mtx);
203 		}
204 		return (1);
205 	default:
206 		return (0);		/* already on a queue */
207 	}
208 }
209 
210 void
211 linux_work_fn(void *context, int pending)
212 {
213 	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
214 		[WORK_ST_IDLE] = WORK_ST_IDLE,		/* NOP */
215 		[WORK_ST_TIMER] = WORK_ST_EXEC,		/* delayed work w/o timeout */
216 		[WORK_ST_TASK] = WORK_ST_EXEC,		/* call callback */
217 		[WORK_ST_EXEC] = WORK_ST_IDLE,		/* complete callback */
218 		[WORK_ST_CANCEL] = WORK_ST_EXEC,	/* failed to cancel */
219 	};
220 	struct work_struct *work;
221 	struct workqueue_struct *wq;
222 	struct work_exec exec;
223 	struct task_struct *task;
224 
225 	task = current;
226 
227 	/* setup local variables */
228 	work = context;
229 	wq = work->work_queue;
230 
231 	/* store target pointer */
232 	exec.target = work;
233 
234 	/* insert executor into list */
235 	WQ_EXEC_LOCK(wq);
236 	TAILQ_INSERT_TAIL(&wq->exec_head, &exec, entry);
237 	while (1) {
238 		switch (linux_update_state(&work->state, states)) {
239 		case WORK_ST_TIMER:
240 		case WORK_ST_TASK:
241 		case WORK_ST_CANCEL:
242 			WQ_EXEC_UNLOCK(wq);
243 
244 			/* set current work structure */
245 			task->work = work;
246 
247 			/* call work function */
248 			work->func(work);
249 
250 			/* set current work structure */
251 			task->work = NULL;
252 
253 			WQ_EXEC_LOCK(wq);
254 			/* check if unblocked */
255 			if (exec.target != work) {
256 				/* reapply block */
257 				exec.target = work;
258 				break;
259 			}
260 			/* FALLTHROUGH */
261 		default:
262 			goto done;
263 		}
264 	}
265 done:
266 	/* remove executor from list */
267 	TAILQ_REMOVE(&wq->exec_head, &exec, entry);
268 	WQ_EXEC_UNLOCK(wq);
269 }
270 
271 void
272 linux_delayed_work_fn(void *context, int pending)
273 {
274 	struct delayed_work *dwork = context;
275 
276 	/*
277 	 * Make sure the timer belonging to the delayed work gets
278 	 * drained before invoking the work function. Else the timer
279 	 * mutex may still be in use which can lead to use-after-free
280 	 * situations, because the work function might free the work
281 	 * structure before returning.
282 	 */
283 	callout_drain(&dwork->timer.callout);
284 
285 	linux_work_fn(&dwork->work, pending);
286 }
287 
288 static void
289 linux_delayed_work_timer_fn(void *arg)
290 {
291 	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
292 		[WORK_ST_IDLE] = WORK_ST_IDLE,		/* NOP */
293 		[WORK_ST_TIMER] = WORK_ST_TASK,		/* start queueing task */
294 		[WORK_ST_TASK] = WORK_ST_TASK,		/* NOP */
295 		[WORK_ST_EXEC] = WORK_ST_EXEC,		/* NOP */
296 		[WORK_ST_CANCEL] = WORK_ST_TASK,	/* failed to cancel */
297 	};
298 	struct delayed_work *dwork = arg;
299 
300 	switch (linux_update_state(&dwork->work.state, states)) {
301 	case WORK_ST_TIMER:
302 	case WORK_ST_CANCEL:
303 		linux_delayed_work_enqueue(dwork);
304 		break;
305 	default:
306 		break;
307 	}
308 }
309 
310 /*
311  * This function cancels the given work structure in a synchronous
312  * fashion. It returns non-zero if the work was successfully
313  * cancelled. Else the work was already cancelled.
314  */
315 bool
316 linux_cancel_work_sync(struct work_struct *work)
317 {
318 	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
319 		[WORK_ST_IDLE] = WORK_ST_IDLE,		/* NOP */
320 		[WORK_ST_TIMER] = WORK_ST_TIMER,	/* can't happen */
321 		[WORK_ST_TASK] = WORK_ST_IDLE,		/* cancel and drain */
322 		[WORK_ST_EXEC] = WORK_ST_IDLE,		/* too late, drain */
323 		[WORK_ST_CANCEL] = WORK_ST_IDLE,	/* cancel and drain */
324 	};
325 	struct taskqueue *tq;
326 
327 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
328 	    "linux_cancel_work_sync() might sleep");
329 
330 	switch (linux_update_state(&work->state, states)) {
331 	case WORK_ST_IDLE:
332 	case WORK_ST_TIMER:
333 		return (0);
334 	case WORK_ST_EXEC:
335 		tq = work->work_queue->taskqueue;
336 		if (taskqueue_cancel(tq, &work->work_task, NULL) != 0)
337 			taskqueue_drain(tq, &work->work_task);
338 		return (0);
339 	default:
340 		tq = work->work_queue->taskqueue;
341 		if (taskqueue_cancel(tq, &work->work_task, NULL) != 0)
342 			taskqueue_drain(tq, &work->work_task);
343 		return (1);
344 	}
345 }
346 
347 /*
348  * This function atomically stops the timer and callback. The timer
349  * callback will not be called after this function returns. This
350  * functions returns true when the timeout was cancelled. Else the
351  * timeout was not started or has already been called.
352  */
353 static inline bool
354 linux_cancel_timer(struct delayed_work *dwork, bool drain)
355 {
356 	bool cancelled;
357 
358 	mtx_lock(&dwork->timer.mtx);
359 	cancelled = (callout_stop(&dwork->timer.callout) == 1);
360 	mtx_unlock(&dwork->timer.mtx);
361 
362 	/* check if we should drain */
363 	if (drain)
364 		callout_drain(&dwork->timer.callout);
365 	return (cancelled);
366 }
367 
368 /*
369  * This function cancels the given delayed work structure in a
370  * non-blocking fashion. It returns non-zero if the work was
371  * successfully cancelled. Else the work may still be busy or already
372  * cancelled.
373  */
374 bool
375 linux_cancel_delayed_work(struct delayed_work *dwork)
376 {
377 	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
378 		[WORK_ST_IDLE] = WORK_ST_IDLE,		/* NOP */
379 		[WORK_ST_TIMER] = WORK_ST_CANCEL,	/* try to cancel */
380 		[WORK_ST_TASK] = WORK_ST_CANCEL,	/* try to cancel */
381 		[WORK_ST_EXEC] = WORK_ST_EXEC,		/* NOP */
382 		[WORK_ST_CANCEL] = WORK_ST_CANCEL,	/* NOP */
383 	};
384 	struct taskqueue *tq;
385 
386 	switch (linux_update_state(&dwork->work.state, states)) {
387 	case WORK_ST_TIMER:
388 	case WORK_ST_CANCEL:
389 		if (linux_cancel_timer(dwork, 0)) {
390 			atomic_cmpxchg(&dwork->work.state,
391 			    WORK_ST_CANCEL, WORK_ST_IDLE);
392 			return (1);
393 		}
394 		/* FALLTHROUGH */
395 	case WORK_ST_TASK:
396 		tq = dwork->work.work_queue->taskqueue;
397 		if (taskqueue_cancel(tq, &dwork->work.work_task, NULL) == 0) {
398 			atomic_cmpxchg(&dwork->work.state,
399 			    WORK_ST_CANCEL, WORK_ST_IDLE);
400 			return (1);
401 		}
402 		/* FALLTHROUGH */
403 	default:
404 		return (0);
405 	}
406 }
407 
408 /*
409  * This function cancels the given work structure in a synchronous
410  * fashion. It returns non-zero if the work was successfully
411  * cancelled. Else the work was already cancelled.
412  */
413 bool
414 linux_cancel_delayed_work_sync(struct delayed_work *dwork)
415 {
416 	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
417 		[WORK_ST_IDLE] = WORK_ST_IDLE,		/* NOP */
418 		[WORK_ST_TIMER] = WORK_ST_IDLE,		/* cancel and drain */
419 		[WORK_ST_TASK] = WORK_ST_IDLE,		/* cancel and drain */
420 		[WORK_ST_EXEC] = WORK_ST_IDLE,		/* too late, drain */
421 		[WORK_ST_CANCEL] = WORK_ST_IDLE,	/* cancel and drain */
422 	};
423 	struct taskqueue *tq;
424 
425 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
426 	    "linux_cancel_delayed_work_sync() might sleep");
427 
428 	switch (linux_update_state(&dwork->work.state, states)) {
429 	case WORK_ST_IDLE:
430 		return (0);
431 	case WORK_ST_EXEC:
432 		tq = dwork->work.work_queue->taskqueue;
433 		if (taskqueue_cancel(tq, &dwork->work.work_task, NULL) != 0)
434 			taskqueue_drain(tq, &dwork->work.work_task);
435 		return (0);
436 	case WORK_ST_TIMER:
437 	case WORK_ST_CANCEL:
438 		if (linux_cancel_timer(dwork, 1)) {
439 			/*
440 			 * Make sure taskqueue is also drained before
441 			 * returning:
442 			 */
443 			tq = dwork->work.work_queue->taskqueue;
444 			taskqueue_drain(tq, &dwork->work.work_task);
445 			return (1);
446 		}
447 		/* FALLTHROUGH */
448 	default:
449 		tq = dwork->work.work_queue->taskqueue;
450 		if (taskqueue_cancel(tq, &dwork->work.work_task, NULL) != 0)
451 			taskqueue_drain(tq, &dwork->work.work_task);
452 		return (1);
453 	}
454 }
455 
456 /*
457  * This function waits until the given work structure is completed.
458  * It returns non-zero if the work was successfully
459  * waited for. Else the work was not waited for.
460  */
461 bool
462 linux_flush_work(struct work_struct *work)
463 {
464 	struct taskqueue *tq;
465 	int retval;
466 
467 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
468 	    "linux_flush_work() might sleep");
469 
470 	switch (atomic_read(&work->state)) {
471 	case WORK_ST_IDLE:
472 		return (0);
473 	default:
474 		tq = work->work_queue->taskqueue;
475 		retval = taskqueue_poll_is_busy(tq, &work->work_task);
476 		taskqueue_drain(tq, &work->work_task);
477 		return (retval);
478 	}
479 }
480 
481 /*
482  * This function waits until the given delayed work structure is
483  * completed. It returns non-zero if the work was successfully waited
484  * for. Else the work was not waited for.
485  */
486 bool
487 linux_flush_delayed_work(struct delayed_work *dwork)
488 {
489 	struct taskqueue *tq;
490 	int retval;
491 
492 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
493 	    "linux_flush_delayed_work() might sleep");
494 
495 	switch (atomic_read(&dwork->work.state)) {
496 	case WORK_ST_IDLE:
497 		return (0);
498 	case WORK_ST_TIMER:
499 		if (linux_cancel_timer(dwork, 1))
500 			linux_delayed_work_enqueue(dwork);
501 		/* FALLTHROUGH */
502 	default:
503 		tq = dwork->work.work_queue->taskqueue;
504 		retval = taskqueue_poll_is_busy(tq, &dwork->work.work_task);
505 		taskqueue_drain(tq, &dwork->work.work_task);
506 		return (retval);
507 	}
508 }
509 
510 /*
511  * This function returns true if the given work is pending, and not
512  * yet executing:
513  */
514 bool
515 linux_work_pending(struct work_struct *work)
516 {
517 	switch (atomic_read(&work->state)) {
518 	case WORK_ST_TIMER:
519 	case WORK_ST_TASK:
520 	case WORK_ST_CANCEL:
521 		return (1);
522 	default:
523 		return (0);
524 	}
525 }
526 
527 /*
528  * This function returns true if the given work is busy.
529  */
530 bool
531 linux_work_busy(struct work_struct *work)
532 {
533 	struct taskqueue *tq;
534 
535 	switch (atomic_read(&work->state)) {
536 	case WORK_ST_IDLE:
537 		return (0);
538 	case WORK_ST_EXEC:
539 		tq = work->work_queue->taskqueue;
540 		return (taskqueue_poll_is_busy(tq, &work->work_task));
541 	default:
542 		return (1);
543 	}
544 }
545 
546 struct workqueue_struct *
547 linux_create_workqueue_common(const char *name, int cpus)
548 {
549 	struct workqueue_struct *wq;
550 
551 	/*
552 	 * If zero CPUs are specified use the default number of CPUs:
553 	 */
554 	if (cpus == 0)
555 		cpus = linux_default_wq_cpus;
556 
557 	wq = kmalloc(sizeof(*wq), M_WAITOK | M_ZERO);
558 	wq->taskqueue = taskqueue_create(name, M_WAITOK,
559 	    taskqueue_thread_enqueue, &wq->taskqueue);
560 	atomic_set(&wq->draining, 0);
561 	taskqueue_start_threads(&wq->taskqueue, cpus, PWAIT, "%s", name);
562 	TAILQ_INIT(&wq->exec_head);
563 	mtx_init(&wq->exec_mtx, "linux_wq_exec", NULL, MTX_DEF);
564 
565 	return (wq);
566 }
567 
568 void
569 linux_destroy_workqueue(struct workqueue_struct *wq)
570 {
571 	atomic_inc(&wq->draining);
572 	drain_workqueue(wq);
573 	taskqueue_free(wq->taskqueue);
574 	mtx_destroy(&wq->exec_mtx);
575 	kfree(wq);
576 }
577 
578 void
579 linux_init_delayed_work(struct delayed_work *dwork, work_func_t func)
580 {
581 	memset(dwork, 0, sizeof(*dwork));
582 	dwork->work.func = func;
583 	TASK_INIT(&dwork->work.work_task, 0, linux_delayed_work_fn, dwork);
584 	mtx_init(&dwork->timer.mtx, spin_lock_name("lkpi-dwork"), NULL,
585 	    MTX_DEF | MTX_NOWITNESS);
586 	callout_init_mtx(&dwork->timer.callout, &dwork->timer.mtx, 0);
587 }
588 
589 struct work_struct *
590 linux_current_work(void)
591 {
592 	return (current->work);
593 }
594 
595 static void
596 linux_work_init(void *arg)
597 {
598 	int max_wq_cpus = mp_ncpus + 1;
599 
600 	/* avoid deadlock when there are too few threads */
601 	if (max_wq_cpus < 4)
602 		max_wq_cpus = 4;
603 
604 	/* set default number of CPUs */
605 	linux_default_wq_cpus = max_wq_cpus;
606 
607 	linux_system_short_wq = alloc_workqueue("linuxkpi_short_wq", 0, max_wq_cpus);
608 	linux_system_long_wq = alloc_workqueue("linuxkpi_long_wq", 0, max_wq_cpus);
609 
610 	/* populate the workqueue pointers */
611 	system_long_wq = linux_system_long_wq;
612 	system_wq = linux_system_short_wq;
613 	system_power_efficient_wq = linux_system_short_wq;
614 	system_unbound_wq = linux_system_short_wq;
615 	system_highpri_wq = linux_system_short_wq;
616 }
617 SYSINIT(linux_work_init, SI_SUB_TASKQ, SI_ORDER_THIRD, linux_work_init, NULL);
618 
619 static void
620 linux_work_uninit(void *arg)
621 {
622 	destroy_workqueue(linux_system_short_wq);
623 	destroy_workqueue(linux_system_long_wq);
624 
625 	/* clear workqueue pointers */
626 	system_long_wq = NULL;
627 	system_wq = NULL;
628 	system_power_efficient_wq = NULL;
629 	system_unbound_wq = NULL;
630 	system_highpri_wq = NULL;
631 }
632 SYSUNINIT(linux_work_uninit, SI_SUB_TASKQ, SI_ORDER_THIRD, linux_work_uninit, NULL);
633