xref: /openbsd/sys/kern/sys_futex.c (revision d415bd75)
1 /*	$OpenBSD: sys_futex.c,v 1.22 2023/08/14 07:42:34 miod Exp $ */
2 
3 /*
4  * Copyright (c) 2016-2017 Martin Pieuchot
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/proc.h>
22 #include <sys/mount.h>
23 #include <sys/syscallargs.h>
24 #include <sys/pool.h>
25 #include <sys/time.h>
26 #include <sys/rwlock.h>
27 #include <sys/futex.h>
28 
29 #ifdef KTRACE
30 #include <sys/ktrace.h>
31 #endif
32 
33 #include <uvm/uvm.h>
34 
35 /*
36  * Kernel representation of a futex.
37  */
38 struct futex {
39 	LIST_ENTRY(futex)	 ft_list;	/* list of all futexes */
40 	TAILQ_HEAD(, proc)	 ft_threads;	/* sleeping queue */
41 	struct uvm_object	*ft_obj;	/* UVM object */
42 	struct vm_amap		*ft_amap;	/* UVM amap */
43 	voff_t			 ft_off;	/* UVM offset */
44 	unsigned int		 ft_refcnt;	/* # of references */
45 };
46 
47 /* Syscall helpers. */
48 int	 futex_wait(uint32_t *, uint32_t, const struct timespec *, int);
49 int	 futex_wake(uint32_t *, uint32_t, int);
50 int	 futex_requeue(uint32_t *, uint32_t, uint32_t *, uint32_t, int);
51 
52 /* Flags for futex_get(). */
53 #define FT_CREATE	0x1	/* Create a futex if it doesn't exist. */
54 #define FT_PRIVATE	0x2	/* Futex is process-private. */
55 
56 struct futex *futex_get(uint32_t *, int);
57 void	 futex_put(struct futex *);
58 
59 /*
60  * The global futex lock serializes futex(2) calls so that no wakeup
61  * event is lost, and protects all futex lists and futex states.
62  */
63 struct rwlock			ftlock = RWLOCK_INITIALIZER("futex");
64 static struct futex_list	ftlist_shared =
65 				    LIST_HEAD_INITIALIZER(ftlist_shared);
66 struct pool			ftpool;
67 
68 
69 void
70 futex_init(void)
71 {
72 	pool_init(&ftpool, sizeof(struct futex), 0, IPL_NONE,
73 	    PR_WAITOK | PR_RWLOCK, "futexpl", NULL);
74 }
75 
76 int
77 sys_futex(struct proc *p, void *v, register_t *retval)
78 {
79 	struct sys_futex_args /* {
80 		syscallarg(uint32_t *) f;
81 		syscallarg(int) op;
82 		syscallarg(inr) val;
83 		syscallarg(const struct timespec *) timeout;
84 		syscallarg(uint32_t *) g;
85 	} */ *uap = v;
86 	uint32_t *uaddr = SCARG(uap, f);
87 	int op = SCARG(uap, op);
88 	uint32_t val = SCARG(uap, val);
89 	const struct timespec *timeout = SCARG(uap, timeout);
90 	void *g = SCARG(uap, g);
91 	int flags = 0;
92 	int error = 0;
93 
94 	if (op & FUTEX_PRIVATE_FLAG)
95 		flags |= FT_PRIVATE;
96 
97 	rw_enter_write(&ftlock);
98 	switch (op) {
99 	case FUTEX_WAIT:
100 	case FUTEX_WAIT_PRIVATE:
101 		error = futex_wait(uaddr, val, timeout, flags);
102 		break;
103 	case FUTEX_WAKE:
104 	case FUTEX_WAKE_PRIVATE:
105 		*retval = futex_wake(uaddr, val, flags);
106 		break;
107 	case FUTEX_REQUEUE:
108 	case FUTEX_REQUEUE_PRIVATE:
109 		*retval = futex_requeue(uaddr, val, g, (u_long)timeout, flags);
110 		break;
111 	default:
112 		error = ENOSYS;
113 		break;
114 	}
115 	rw_exit_write(&ftlock);
116 
117 	return error;
118 }
119 
120 /*
121  * Return an existing futex matching userspace address ``uaddr''.
122  *
123  * If such futex does not exist and FT_CREATE is given, create it.
124  */
125 struct futex *
126 futex_get(uint32_t *uaddr, int flags)
127 {
128 	struct proc *p = curproc;
129 	vm_map_t map = &p->p_vmspace->vm_map;
130 	vm_map_entry_t entry;
131 	struct uvm_object *obj = NULL;
132 	struct vm_amap *amap = NULL;
133 	voff_t off = (vaddr_t)uaddr;
134 	struct futex *f;
135 	struct futex_list *ftlist = &p->p_p->ps_ftlist;
136 
137 	rw_assert_wrlock(&ftlock);
138 
139 	if (!(flags & FT_PRIVATE)) {
140 		vm_map_lock_read(map);
141 		if (uvm_map_lookup_entry(map, (vaddr_t)uaddr, &entry) &&
142 		    entry->inheritance == MAP_INHERIT_SHARE) {
143 			if (UVM_ET_ISOBJ(entry)) {
144 				ftlist = &ftlist_shared;
145 				obj = entry->object.uvm_obj;
146 				off = entry->offset +
147 				    ((vaddr_t)uaddr - entry->start);
148 			} else if (entry->aref.ar_amap) {
149 				ftlist = &ftlist_shared;
150 				amap = entry->aref.ar_amap;
151 				off = ptoa(entry->aref.ar_pageoff) +
152 				    ((vaddr_t)uaddr - entry->start);
153 			}
154 		}
155 		vm_map_unlock_read(map);
156 	}
157 
158 	LIST_FOREACH(f, ftlist, ft_list) {
159 		if (f->ft_obj == obj && f->ft_amap == amap &&
160 		    f->ft_off == off) {
161 			f->ft_refcnt++;
162 			break;
163 		}
164 	}
165 
166 	if ((f == NULL) && (flags & FT_CREATE)) {
167 		/*
168 		 * We rely on the rwlock to ensure that no other thread
169 		 * create the same futex.
170 		 */
171 		f = pool_get(&ftpool, PR_WAITOK);
172 		TAILQ_INIT(&f->ft_threads);
173 		f->ft_obj = obj;
174 		f->ft_amap = amap;
175 		f->ft_off = off;
176 		f->ft_refcnt = 1;
177 		LIST_INSERT_HEAD(ftlist, f, ft_list);
178 	}
179 
180 	return f;
181 }
182 
183 /*
184  * Release a given futex.
185  */
186 void
187 futex_put(struct futex *f)
188 {
189 	rw_assert_wrlock(&ftlock);
190 
191 	KASSERT(f->ft_refcnt > 0);
192 
193 	--f->ft_refcnt;
194 	if (f->ft_refcnt == 0) {
195 		KASSERT(TAILQ_EMPTY(&f->ft_threads));
196 		LIST_REMOVE(f, ft_list);
197 		pool_put(&ftpool, f);
198 	}
199 }
200 
201 /*
202  * Put the current thread on the sleep queue of the futex at address
203  * ``uaddr''.  Let it sleep for the specified ``timeout'' time, or
204  * indefinitely if the argument is NULL.
205  */
206 int
207 futex_wait(uint32_t *uaddr, uint32_t val, const struct timespec *timeout,
208     int flags)
209 {
210 	struct proc *p = curproc;
211 	struct futex *f;
212 	uint64_t nsecs = INFSLP;
213 	uint32_t cval;
214 	int error;
215 
216 	/*
217 	 * After reading the value a race is still possible but
218 	 * we deal with it by serializing all futex syscalls.
219 	 */
220 	rw_assert_wrlock(&ftlock);
221 
222 	/*
223 	 * Read user space futex value
224 	 */
225 	if ((error = copyin32(uaddr, &cval)))
226 		return error;
227 
228 	/* If the value changed, stop here. */
229 	if (cval != val)
230 		return EAGAIN;
231 
232 	if (timeout != NULL) {
233 		struct timespec ts;
234 
235 		if ((error = copyin(timeout, &ts, sizeof(ts))))
236 			return error;
237 #ifdef KTRACE
238 		if (KTRPOINT(p, KTR_STRUCT))
239 			ktrreltimespec(p, &ts);
240 #endif
241 		if (ts.tv_sec < 0 || !timespecisvalid(&ts))
242 			return EINVAL;
243 		nsecs = MAX(1, MIN(TIMESPEC_TO_NSEC(&ts), MAXTSLP));
244 	}
245 
246 	f = futex_get(uaddr, flags | FT_CREATE);
247 	TAILQ_INSERT_TAIL(&f->ft_threads, p, p_fut_link);
248 	p->p_futex = f;
249 
250 	error = rwsleep_nsec(p, &ftlock, PWAIT|PCATCH, "fsleep", nsecs);
251 	if (error == ERESTART)
252 		error = ECANCELED;
253 	else if (error == EWOULDBLOCK) {
254 		/* A race occurred between a wakeup and a timeout. */
255 		if (p->p_futex == NULL)
256 			error = 0;
257 		else
258 			error = ETIMEDOUT;
259 	}
260 
261 	/* Remove ourself if we haven't been awaken. */
262 	if ((f = p->p_futex) != NULL) {
263 		p->p_futex = NULL;
264 		TAILQ_REMOVE(&f->ft_threads, p, p_fut_link);
265 		futex_put(f);
266 	}
267 
268 	return error;
269 }
270 
271 /*
272  * Wakeup at most ``n'' sibling threads sleeping on a futex at address
273  * ``uaddr'' and requeue at most ``m'' sibling threads on a futex at
274  * address ``uaddr2''.
275  */
276 int
277 futex_requeue(uint32_t *uaddr, uint32_t n, uint32_t *uaddr2, uint32_t m,
278     int flags)
279 {
280 	struct futex *f, *g;
281 	struct proc *p;
282 	uint32_t count = 0;
283 
284 	rw_assert_wrlock(&ftlock);
285 
286 	f = futex_get(uaddr, flags);
287 	if (f == NULL)
288 		return 0;
289 
290 	while ((p = TAILQ_FIRST(&f->ft_threads)) != NULL && (count < (n + m))) {
291 		p->p_futex = NULL;
292 		TAILQ_REMOVE(&f->ft_threads, p, p_fut_link);
293 		futex_put(f);
294 
295 		if (count < n) {
296 			wakeup_one(p);
297 		} else if (uaddr2 != NULL) {
298 			g = futex_get(uaddr2, FT_CREATE);
299 			TAILQ_INSERT_TAIL(&g->ft_threads, p, p_fut_link);
300 			p->p_futex = g;
301 		}
302 		count++;
303 	}
304 
305 	futex_put(f);
306 
307 	return count;
308 }
309 
310 /*
311  * Wakeup at most ``n'' sibling threads sleeping on a futex at address
312  * ``uaddr''.
313  */
314 int
315 futex_wake(uint32_t *uaddr, uint32_t n, int flags)
316 {
317 	return futex_requeue(uaddr, n, NULL, 0, flags);
318 }
319