1 /* $OpenBSD: sys_futex.c,v 1.22 2023/08/14 07:42:34 miod Exp $ */ 2 3 /* 4 * Copyright (c) 2016-2017 Martin Pieuchot 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/proc.h> 22 #include <sys/mount.h> 23 #include <sys/syscallargs.h> 24 #include <sys/pool.h> 25 #include <sys/time.h> 26 #include <sys/rwlock.h> 27 #include <sys/futex.h> 28 29 #ifdef KTRACE 30 #include <sys/ktrace.h> 31 #endif 32 33 #include <uvm/uvm.h> 34 35 /* 36 * Kernel representation of a futex. 37 */ 38 struct futex { 39 LIST_ENTRY(futex) ft_list; /* list of all futexes */ 40 TAILQ_HEAD(, proc) ft_threads; /* sleeping queue */ 41 struct uvm_object *ft_obj; /* UVM object */ 42 struct vm_amap *ft_amap; /* UVM amap */ 43 voff_t ft_off; /* UVM offset */ 44 unsigned int ft_refcnt; /* # of references */ 45 }; 46 47 /* Syscall helpers. */ 48 int futex_wait(uint32_t *, uint32_t, const struct timespec *, int); 49 int futex_wake(uint32_t *, uint32_t, int); 50 int futex_requeue(uint32_t *, uint32_t, uint32_t *, uint32_t, int); 51 52 /* Flags for futex_get(). */ 53 #define FT_CREATE 0x1 /* Create a futex if it doesn't exist. */ 54 #define FT_PRIVATE 0x2 /* Futex is process-private. */ 55 56 struct futex *futex_get(uint32_t *, int); 57 void futex_put(struct futex *); 58 59 /* 60 * The global futex lock serializes futex(2) calls so that no wakeup 61 * event is lost, and protects all futex lists and futex states. 62 */ 63 struct rwlock ftlock = RWLOCK_INITIALIZER("futex"); 64 static struct futex_list ftlist_shared = 65 LIST_HEAD_INITIALIZER(ftlist_shared); 66 struct pool ftpool; 67 68 69 void 70 futex_init(void) 71 { 72 pool_init(&ftpool, sizeof(struct futex), 0, IPL_NONE, 73 PR_WAITOK | PR_RWLOCK, "futexpl", NULL); 74 } 75 76 int 77 sys_futex(struct proc *p, void *v, register_t *retval) 78 { 79 struct sys_futex_args /* { 80 syscallarg(uint32_t *) f; 81 syscallarg(int) op; 82 syscallarg(inr) val; 83 syscallarg(const struct timespec *) timeout; 84 syscallarg(uint32_t *) g; 85 } */ *uap = v; 86 uint32_t *uaddr = SCARG(uap, f); 87 int op = SCARG(uap, op); 88 uint32_t val = SCARG(uap, val); 89 const struct timespec *timeout = SCARG(uap, timeout); 90 void *g = SCARG(uap, g); 91 int flags = 0; 92 int error = 0; 93 94 if (op & FUTEX_PRIVATE_FLAG) 95 flags |= FT_PRIVATE; 96 97 rw_enter_write(&ftlock); 98 switch (op) { 99 case FUTEX_WAIT: 100 case FUTEX_WAIT_PRIVATE: 101 error = futex_wait(uaddr, val, timeout, flags); 102 break; 103 case FUTEX_WAKE: 104 case FUTEX_WAKE_PRIVATE: 105 *retval = futex_wake(uaddr, val, flags); 106 break; 107 case FUTEX_REQUEUE: 108 case FUTEX_REQUEUE_PRIVATE: 109 *retval = futex_requeue(uaddr, val, g, (u_long)timeout, flags); 110 break; 111 default: 112 error = ENOSYS; 113 break; 114 } 115 rw_exit_write(&ftlock); 116 117 return error; 118 } 119 120 /* 121 * Return an existing futex matching userspace address ``uaddr''. 122 * 123 * If such futex does not exist and FT_CREATE is given, create it. 124 */ 125 struct futex * 126 futex_get(uint32_t *uaddr, int flags) 127 { 128 struct proc *p = curproc; 129 vm_map_t map = &p->p_vmspace->vm_map; 130 vm_map_entry_t entry; 131 struct uvm_object *obj = NULL; 132 struct vm_amap *amap = NULL; 133 voff_t off = (vaddr_t)uaddr; 134 struct futex *f; 135 struct futex_list *ftlist = &p->p_p->ps_ftlist; 136 137 rw_assert_wrlock(&ftlock); 138 139 if (!(flags & FT_PRIVATE)) { 140 vm_map_lock_read(map); 141 if (uvm_map_lookup_entry(map, (vaddr_t)uaddr, &entry) && 142 entry->inheritance == MAP_INHERIT_SHARE) { 143 if (UVM_ET_ISOBJ(entry)) { 144 ftlist = &ftlist_shared; 145 obj = entry->object.uvm_obj; 146 off = entry->offset + 147 ((vaddr_t)uaddr - entry->start); 148 } else if (entry->aref.ar_amap) { 149 ftlist = &ftlist_shared; 150 amap = entry->aref.ar_amap; 151 off = ptoa(entry->aref.ar_pageoff) + 152 ((vaddr_t)uaddr - entry->start); 153 } 154 } 155 vm_map_unlock_read(map); 156 } 157 158 LIST_FOREACH(f, ftlist, ft_list) { 159 if (f->ft_obj == obj && f->ft_amap == amap && 160 f->ft_off == off) { 161 f->ft_refcnt++; 162 break; 163 } 164 } 165 166 if ((f == NULL) && (flags & FT_CREATE)) { 167 /* 168 * We rely on the rwlock to ensure that no other thread 169 * create the same futex. 170 */ 171 f = pool_get(&ftpool, PR_WAITOK); 172 TAILQ_INIT(&f->ft_threads); 173 f->ft_obj = obj; 174 f->ft_amap = amap; 175 f->ft_off = off; 176 f->ft_refcnt = 1; 177 LIST_INSERT_HEAD(ftlist, f, ft_list); 178 } 179 180 return f; 181 } 182 183 /* 184 * Release a given futex. 185 */ 186 void 187 futex_put(struct futex *f) 188 { 189 rw_assert_wrlock(&ftlock); 190 191 KASSERT(f->ft_refcnt > 0); 192 193 --f->ft_refcnt; 194 if (f->ft_refcnt == 0) { 195 KASSERT(TAILQ_EMPTY(&f->ft_threads)); 196 LIST_REMOVE(f, ft_list); 197 pool_put(&ftpool, f); 198 } 199 } 200 201 /* 202 * Put the current thread on the sleep queue of the futex at address 203 * ``uaddr''. Let it sleep for the specified ``timeout'' time, or 204 * indefinitely if the argument is NULL. 205 */ 206 int 207 futex_wait(uint32_t *uaddr, uint32_t val, const struct timespec *timeout, 208 int flags) 209 { 210 struct proc *p = curproc; 211 struct futex *f; 212 uint64_t nsecs = INFSLP; 213 uint32_t cval; 214 int error; 215 216 /* 217 * After reading the value a race is still possible but 218 * we deal with it by serializing all futex syscalls. 219 */ 220 rw_assert_wrlock(&ftlock); 221 222 /* 223 * Read user space futex value 224 */ 225 if ((error = copyin32(uaddr, &cval))) 226 return error; 227 228 /* If the value changed, stop here. */ 229 if (cval != val) 230 return EAGAIN; 231 232 if (timeout != NULL) { 233 struct timespec ts; 234 235 if ((error = copyin(timeout, &ts, sizeof(ts)))) 236 return error; 237 #ifdef KTRACE 238 if (KTRPOINT(p, KTR_STRUCT)) 239 ktrreltimespec(p, &ts); 240 #endif 241 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) 242 return EINVAL; 243 nsecs = MAX(1, MIN(TIMESPEC_TO_NSEC(&ts), MAXTSLP)); 244 } 245 246 f = futex_get(uaddr, flags | FT_CREATE); 247 TAILQ_INSERT_TAIL(&f->ft_threads, p, p_fut_link); 248 p->p_futex = f; 249 250 error = rwsleep_nsec(p, &ftlock, PWAIT|PCATCH, "fsleep", nsecs); 251 if (error == ERESTART) 252 error = ECANCELED; 253 else if (error == EWOULDBLOCK) { 254 /* A race occurred between a wakeup and a timeout. */ 255 if (p->p_futex == NULL) 256 error = 0; 257 else 258 error = ETIMEDOUT; 259 } 260 261 /* Remove ourself if we haven't been awaken. */ 262 if ((f = p->p_futex) != NULL) { 263 p->p_futex = NULL; 264 TAILQ_REMOVE(&f->ft_threads, p, p_fut_link); 265 futex_put(f); 266 } 267 268 return error; 269 } 270 271 /* 272 * Wakeup at most ``n'' sibling threads sleeping on a futex at address 273 * ``uaddr'' and requeue at most ``m'' sibling threads on a futex at 274 * address ``uaddr2''. 275 */ 276 int 277 futex_requeue(uint32_t *uaddr, uint32_t n, uint32_t *uaddr2, uint32_t m, 278 int flags) 279 { 280 struct futex *f, *g; 281 struct proc *p; 282 uint32_t count = 0; 283 284 rw_assert_wrlock(&ftlock); 285 286 f = futex_get(uaddr, flags); 287 if (f == NULL) 288 return 0; 289 290 while ((p = TAILQ_FIRST(&f->ft_threads)) != NULL && (count < (n + m))) { 291 p->p_futex = NULL; 292 TAILQ_REMOVE(&f->ft_threads, p, p_fut_link); 293 futex_put(f); 294 295 if (count < n) { 296 wakeup_one(p); 297 } else if (uaddr2 != NULL) { 298 g = futex_get(uaddr2, FT_CREATE); 299 TAILQ_INSERT_TAIL(&g->ft_threads, p, p_fut_link); 300 p->p_futex = g; 301 } 302 count++; 303 } 304 305 futex_put(f); 306 307 return count; 308 } 309 310 /* 311 * Wakeup at most ``n'' sibling threads sleeping on a futex at address 312 * ``uaddr''. 313 */ 314 int 315 futex_wake(uint32_t *uaddr, uint32_t n, int flags) 316 { 317 return futex_requeue(uaddr, n, NULL, 0, flags); 318 } 319