1 /* $NetBSD: linux_futex.c,v 1.34 2016/05/20 13:54:34 chs Exp $ */
2
3 /*-
4 * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by Emmanuel Dreyfus
17 * 4. The name of the author may not be used to endorse or promote
18 * products derived from this software without specific prior written
19 * permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS''
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.34 2016/05/20 13:54:34 chs Exp $");
36
37 #include <sys/param.h>
38 #include <sys/time.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/lwp.h>
42 #include <sys/queue.h>
43 #include <sys/condvar.h>
44 #include <sys/mutex.h>
45 #include <sys/kmem.h>
46 #include <sys/kernel.h>
47 #include <sys/atomic.h>
48
49 #include <compat/linux/common/linux_types.h>
50 #include <compat/linux/common/linux_emuldata.h>
51 #include <compat/linux/common/linux_exec.h>
52 #include <compat/linux/common/linux_signal.h>
53 #include <compat/linux/common/linux_futex.h>
54 #include <compat/linux/common/linux_sched.h>
55 #include <compat/linux/common/linux_machdep.h>
56 #include <compat/linux/linux_syscallargs.h>
57
58 struct futex;
59
60 struct waiting_proc {
61 struct futex *wp_futex;
62 kcondvar_t wp_futex_cv;
63 TAILQ_ENTRY(waiting_proc) wp_list;
64 bool wp_onlist;
65 };
66 struct futex {
67 void *f_uaddr;
68 int f_refcount;
69 uint32_t f_bitset;
70 LIST_ENTRY(futex) f_list;
71 TAILQ_HEAD(, waiting_proc) f_waiting_proc;
72 };
73
74 static LIST_HEAD(futex_list, futex) futex_list;
75 static kmutex_t futex_lock;
76
77 #define FUTEX_LOCK mutex_enter(&futex_lock)
78 #define FUTEX_UNLOCK mutex_exit(&futex_lock)
79 #define FUTEX_LOCKASSERT KASSERT(mutex_owned(&futex_lock))
80
81 #define FUTEX_SYSTEM_LOCK KERNEL_LOCK(1, NULL)
82 #define FUTEX_SYSTEM_UNLOCK KERNEL_UNLOCK_ONE(0)
83
84 #ifdef DEBUG_LINUX_FUTEX
85 int debug_futex = 1;
86 #define FUTEXPRINTF(a) do { if (debug_futex) printf a; } while (0)
87 #else
88 #define FUTEXPRINTF(a)
89 #endif
90
91 void
linux_futex_init(void)92 linux_futex_init(void)
93 {
94 FUTEXPRINTF(("%s: initializing futex\n", __func__));
95 mutex_init(&futex_lock, MUTEX_DEFAULT, IPL_NONE);
96 }
97
98 void
linux_futex_fini(void)99 linux_futex_fini(void)
100 {
101 FUTEXPRINTF(("%s: destroying futex\n", __func__));
102 mutex_destroy(&futex_lock);
103 }
104
105 static struct waiting_proc *futex_wp_alloc(void);
106 static void futex_wp_free(struct waiting_proc *);
107 static struct futex *futex_get(void *, uint32_t);
108 static void futex_ref(struct futex *);
109 static void futex_put(struct futex *);
110 static int futex_sleep(struct futex **, lwp_t *, int, struct waiting_proc *);
111 static int futex_wake(struct futex *, int, struct futex *, int);
112 static int futex_atomic_op(lwp_t *, int, void *);
113
114 int
linux_sys_futex(struct lwp * l,const struct linux_sys_futex_args * uap,register_t * retval)115 linux_sys_futex(struct lwp *l, const struct linux_sys_futex_args *uap, register_t *retval)
116 {
117 /* {
118 syscallarg(int *) uaddr;
119 syscallarg(int) op;
120 syscallarg(int) val;
121 syscallarg(const struct linux_timespec *) timeout;
122 syscallarg(int *) uaddr2;
123 syscallarg(int) val3;
124 } */
125 struct linux_timespec lts;
126 struct timespec ts = { 0, 0 };
127 int error;
128
129 if ((SCARG(uap, op) & LINUX_FUTEX_CMD_MASK) == LINUX_FUTEX_WAIT &&
130 SCARG(uap, timeout) != NULL) {
131 if ((error = copyin(SCARG(uap, timeout),
132 <s, sizeof(lts))) != 0) {
133 return error;
134 }
135 linux_to_native_timespec(&ts, <s);
136 }
137 return linux_do_futex(l, uap, retval, &ts);
138 }
139
140 int
linux_do_futex(struct lwp * l,const struct linux_sys_futex_args * uap,register_t * retval,struct timespec * ts)141 linux_do_futex(struct lwp *l, const struct linux_sys_futex_args *uap, register_t *retval, struct timespec *ts)
142 {
143 /* {
144 syscallarg(int *) uaddr;
145 syscallarg(int) op;
146 syscallarg(int) val;
147 syscallarg(const struct linux_timespec *) timeout;
148 syscallarg(int *) uaddr2;
149 syscallarg(int) val3;
150 } */
151 int val, val3;
152 int ret;
153 int error = 0;
154 struct futex *f;
155 struct futex *newf;
156 int tout;
157 struct futex *f2;
158 struct waiting_proc *wp;
159 int op_ret, cmd;
160 clockid_t clk;
161
162 cmd = SCARG(uap, op) & LINUX_FUTEX_CMD_MASK;
163 val3 = SCARG(uap, val3);
164
165 if (SCARG(uap, op) & LINUX_FUTEX_CLOCK_REALTIME) {
166 switch (cmd) {
167 case LINUX_FUTEX_WAIT_BITSET:
168 case LINUX_FUTEX_WAIT:
169 clk = CLOCK_REALTIME;
170 break;
171 default:
172 return ENOSYS;
173 }
174 } else
175 clk = CLOCK_MONOTONIC;
176
177 /*
178 * Our implementation provides only private futexes. Most of the apps
179 * should use private futexes but don't claim so. Therefore we treat
180 * all futexes as private by clearing the FUTEX_PRIVATE_FLAG. It works
181 * in most cases (ie. when futexes are not shared on file descriptor
182 * or between different processes).
183 *
184 * Note that we don't handle bitsets at all at the moment. We need
185 * to move from refcounting uaddr's to handling multiple futex entries
186 * pointing to the same uaddr, but having possibly different bitmask.
187 * Perhaps move to an implementation where each uaddr has a list of
188 * futexes.
189 */
190 switch (cmd) {
191 case LINUX_FUTEX_WAIT:
192 val3 = FUTEX_BITSET_MATCH_ANY;
193 /*FALLTHROUGH*/
194 case LINUX_FUTEX_WAIT_BITSET:
195 if ((error = ts2timo(clk, 0, ts, &tout, NULL)) != 0) {
196 if (error != ETIMEDOUT)
197 return error;
198 /*
199 * If the user process requests a non null timeout,
200 * make sure we do not turn it into an infinite
201 * timeout because tout is 0.
202 *
203 * We use a minimal timeout of 1/hz. Maybe it would make
204 * sense to just return ETIMEDOUT without sleeping.
205 */
206 if (SCARG(uap, timeout) != NULL)
207 tout = 1;
208 else
209 tout = 0;
210 }
211 FUTEX_SYSTEM_LOCK;
212 if ((error = copyin(SCARG(uap, uaddr),
213 &val, sizeof(val))) != 0) {
214 FUTEX_SYSTEM_UNLOCK;
215 return error;
216 }
217
218 if (val != SCARG(uap, val)) {
219 FUTEX_SYSTEM_UNLOCK;
220 return EWOULDBLOCK;
221 }
222
223 FUTEXPRINTF(("FUTEX_WAIT %d.%d: val = %d, uaddr = %p, "
224 "*uaddr = %d, timeout = %lld.%09ld\n",
225 l->l_proc->p_pid, l->l_lid, SCARG(uap, val),
226 SCARG(uap, uaddr), val, (long long)ts->tv_sec,
227 ts->tv_nsec));
228
229
230 wp = futex_wp_alloc();
231 FUTEX_LOCK;
232 f = futex_get(SCARG(uap, uaddr), val3);
233 ret = futex_sleep(&f, l, tout, wp);
234 futex_put(f);
235 FUTEX_UNLOCK;
236 futex_wp_free(wp);
237
238 FUTEXPRINTF(("FUTEX_WAIT %d.%d: uaddr = %p, "
239 "ret = %d\n", l->l_proc->p_pid, l->l_lid,
240 SCARG(uap, uaddr), ret));
241
242 FUTEX_SYSTEM_UNLOCK;
243 switch (ret) {
244 case EWOULDBLOCK: /* timeout */
245 return ETIMEDOUT;
246 break;
247 case EINTR: /* signal */
248 return EINTR;
249 break;
250 case 0: /* FUTEX_WAKE received */
251 FUTEXPRINTF(("FUTEX_WAIT %d.%d: uaddr = %p, got it\n",
252 l->l_proc->p_pid, l->l_lid, SCARG(uap, uaddr)));
253 return 0;
254 break;
255 default:
256 FUTEXPRINTF(("FUTEX_WAIT: unexpected ret = %d\n", ret));
257 break;
258 }
259
260 /* NOTREACHED */
261 break;
262
263 case LINUX_FUTEX_WAKE:
264 val = FUTEX_BITSET_MATCH_ANY;
265 /*FALLTHROUGH*/
266 case LINUX_FUTEX_WAKE_BITSET:
267 /*
268 * XXX: Linux is able cope with different addresses
269 * corresponding to the same mapped memory in the sleeping
270 * and the waker process(es).
271 */
272 FUTEXPRINTF(("FUTEX_WAKE %d.%d: uaddr = %p, val = %d\n",
273 l->l_proc->p_pid, l->l_lid,
274 SCARG(uap, uaddr), SCARG(uap, val)));
275
276 FUTEX_SYSTEM_LOCK;
277 FUTEX_LOCK;
278 f = futex_get(SCARG(uap, uaddr), val3);
279 *retval = futex_wake(f, SCARG(uap, val), NULL, 0);
280 futex_put(f);
281 FUTEX_UNLOCK;
282 FUTEX_SYSTEM_UNLOCK;
283
284 break;
285
286 case LINUX_FUTEX_CMP_REQUEUE:
287 FUTEX_SYSTEM_LOCK;
288
289 if ((error = copyin(SCARG(uap, uaddr),
290 &val, sizeof(val))) != 0) {
291 FUTEX_SYSTEM_UNLOCK;
292 return error;
293 }
294
295 if (val != val3) {
296 FUTEX_SYSTEM_UNLOCK;
297 return EAGAIN;
298 }
299
300 FUTEXPRINTF(("FUTEX_CMP_REQUEUE %d.%d: uaddr = %p, val = %d, "
301 "uaddr2 = %p, val2 = %d\n",
302 l->l_proc->p_pid, l->l_lid,
303 SCARG(uap, uaddr), SCARG(uap, val), SCARG(uap, uaddr2),
304 (int)(unsigned long)SCARG(uap, timeout)));
305
306 FUTEX_LOCK;
307 f = futex_get(SCARG(uap, uaddr), val3);
308 newf = futex_get(SCARG(uap, uaddr2), val3);
309 *retval = futex_wake(f, SCARG(uap, val), newf,
310 (int)(unsigned long)SCARG(uap, timeout));
311 futex_put(f);
312 futex_put(newf);
313 FUTEX_UNLOCK;
314
315 FUTEX_SYSTEM_UNLOCK;
316 break;
317
318 case LINUX_FUTEX_REQUEUE:
319 FUTEX_SYSTEM_LOCK;
320
321 FUTEXPRINTF(("FUTEX_REQUEUE %d.%d: uaddr = %p, val = %d, "
322 "uaddr2 = %p, val2 = %d\n",
323 l->l_proc->p_pid, l->l_lid,
324 SCARG(uap, uaddr), SCARG(uap, val), SCARG(uap, uaddr2),
325 (int)(unsigned long)SCARG(uap, timeout)));
326
327 FUTEX_LOCK;
328 f = futex_get(SCARG(uap, uaddr), val3);
329 newf = futex_get(SCARG(uap, uaddr2), val3);
330 *retval = futex_wake(f, SCARG(uap, val), newf,
331 (int)(unsigned long)SCARG(uap, timeout));
332 futex_put(f);
333 futex_put(newf);
334 FUTEX_UNLOCK;
335
336 FUTEX_SYSTEM_UNLOCK;
337 break;
338
339 case LINUX_FUTEX_FD:
340 FUTEXPRINTF(("%s: unimplemented op %d\n", __func__, cmd));
341 return ENOSYS;
342 case LINUX_FUTEX_WAKE_OP:
343 FUTEX_SYSTEM_LOCK;
344
345 FUTEXPRINTF(("FUTEX_WAKE_OP %d.%d: uaddr = %p, op = %d, "
346 "val = %d, uaddr2 = %p, val2 = %d\n",
347 l->l_proc->p_pid, l->l_lid,
348 SCARG(uap, uaddr), cmd, SCARG(uap, val),
349 SCARG(uap, uaddr2),
350 (int)(unsigned long)SCARG(uap, timeout)));
351
352 FUTEX_LOCK;
353 f = futex_get(SCARG(uap, uaddr), val3);
354 f2 = futex_get(SCARG(uap, uaddr2), val3);
355 FUTEX_UNLOCK;
356
357 /*
358 * This function returns positive number as results and
359 * negative as errors
360 */
361 op_ret = futex_atomic_op(l, val3, SCARG(uap, uaddr2));
362 FUTEX_LOCK;
363 if (op_ret < 0) {
364 futex_put(f);
365 futex_put(f2);
366 FUTEX_UNLOCK;
367 FUTEX_SYSTEM_UNLOCK;
368 return -op_ret;
369 }
370
371 ret = futex_wake(f, SCARG(uap, val), NULL, 0);
372 futex_put(f);
373 if (op_ret > 0) {
374 op_ret = 0;
375 /*
376 * Linux abuses the address of the timespec parameter
377 * as the number of retries
378 */
379 op_ret += futex_wake(f2,
380 (int)(unsigned long)SCARG(uap, timeout), NULL, 0);
381 ret += op_ret;
382 }
383 futex_put(f2);
384 FUTEX_UNLOCK;
385 FUTEX_SYSTEM_UNLOCK;
386 *retval = ret;
387 break;
388 default:
389 FUTEXPRINTF(("%s: unknown op %d\n", __func__, cmd));
390 return ENOSYS;
391 }
392 return 0;
393 }
394
395 static struct waiting_proc *
futex_wp_alloc(void)396 futex_wp_alloc(void)
397 {
398 struct waiting_proc *wp;
399
400 wp = kmem_zalloc(sizeof(*wp), KM_SLEEP);
401 cv_init(&wp->wp_futex_cv, "futex");
402 return wp;
403 }
404
405 static void
futex_wp_free(struct waiting_proc * wp)406 futex_wp_free(struct waiting_proc *wp)
407 {
408
409 cv_destroy(&wp->wp_futex_cv);
410 kmem_free(wp, sizeof(*wp));
411 }
412
413 static struct futex *
futex_get(void * uaddr,uint32_t bitset)414 futex_get(void *uaddr, uint32_t bitset)
415 {
416 struct futex *f;
417
418 FUTEX_LOCKASSERT;
419
420 LIST_FOREACH(f, &futex_list, f_list) {
421 if (f->f_uaddr == uaddr) {
422 f->f_refcount++;
423 return f;
424 }
425 }
426
427 /* Not found, create it */
428 f = kmem_zalloc(sizeof(*f), KM_SLEEP);
429 f->f_uaddr = uaddr;
430 f->f_bitset = bitset;
431 f->f_refcount = 1;
432 TAILQ_INIT(&f->f_waiting_proc);
433 LIST_INSERT_HEAD(&futex_list, f, f_list);
434
435 return f;
436 }
437
438 static void
futex_ref(struct futex * f)439 futex_ref(struct futex *f)
440 {
441
442 FUTEX_LOCKASSERT;
443
444 f->f_refcount++;
445 }
446
447 static void
futex_put(struct futex * f)448 futex_put(struct futex *f)
449 {
450
451 FUTEX_LOCKASSERT;
452
453 f->f_refcount--;
454 if (f->f_refcount == 0) {
455 KASSERT(TAILQ_EMPTY(&f->f_waiting_proc));
456 LIST_REMOVE(f, f_list);
457 kmem_free(f, sizeof(*f));
458 }
459 }
460
461 static int
futex_sleep(struct futex ** fp,lwp_t * l,int timeout,struct waiting_proc * wp)462 futex_sleep(struct futex **fp, lwp_t *l, int timeout, struct waiting_proc *wp)
463 {
464 struct futex *f;
465 int ret;
466
467 FUTEX_LOCKASSERT;
468
469 f = *fp;
470 wp->wp_futex = f;
471 TAILQ_INSERT_TAIL(&f->f_waiting_proc, wp, wp_list);
472 wp->wp_onlist = true;
473 ret = cv_timedwait_sig(&wp->wp_futex_cv, &futex_lock, timeout);
474
475 /*
476 * we may have been requeued to a different futex before we were
477 * woken up, so let the caller know which futex to put. if we were
478 * woken by futex_wake() then it took us off the waiting list,
479 * but if our sleep was interrupted or timed out then we might
480 * need to take ourselves off the waiting list.
481 */
482
483 f = wp->wp_futex;
484 if (wp->wp_onlist) {
485 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
486 }
487 *fp = f;
488 return ret;
489 }
490
491 static int
futex_wake(struct futex * f,int n,struct futex * newf,int n2)492 futex_wake(struct futex *f, int n, struct futex *newf, int n2)
493 {
494 struct waiting_proc *wp;
495 int count = 0;
496
497 FUTEX_LOCKASSERT;
498
499 /*
500 * wake up up to n threads waiting on this futex.
501 */
502
503 while (n--) {
504 wp = TAILQ_FIRST(&f->f_waiting_proc);
505 if (wp == NULL)
506 return count;
507
508 KASSERT(f == wp->wp_futex);
509 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
510 wp->wp_onlist = false;
511 cv_signal(&wp->wp_futex_cv);
512 count++;
513 }
514 if (newf == NULL)
515 return count;
516
517 /*
518 * then requeue up to n2 additional threads to newf
519 * (without waking them up).
520 */
521
522 while (n2--) {
523 wp = TAILQ_FIRST(&f->f_waiting_proc);
524 if (wp == NULL)
525 return count;
526
527 KASSERT(f == wp->wp_futex);
528 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
529 futex_put(f);
530
531 wp->wp_futex = newf;
532 futex_ref(newf);
533 TAILQ_INSERT_TAIL(&newf->f_waiting_proc, wp, wp_list);
534 count++;
535 }
536 return count;
537 }
538
539 static int
futex_atomic_op(lwp_t * l,int encoded_op,void * uaddr)540 futex_atomic_op(lwp_t *l, int encoded_op, void *uaddr)
541 {
542 const int op = (encoded_op >> 28) & 7;
543 const int cmp = (encoded_op >> 24) & 15;
544 const int cmparg = (encoded_op << 20) >> 20;
545 int oparg = (encoded_op << 8) >> 20;
546 int error, oldval, cval;
547
548 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
549 oparg = 1 << oparg;
550
551 /* XXX: linux verifies access here and returns EFAULT */
552
553 if (copyin(uaddr, &cval, sizeof(int)) != 0)
554 return -EFAULT;
555
556 for (;;) {
557 int nval;
558
559 switch (op) {
560 case FUTEX_OP_SET:
561 nval = oparg;
562 break;
563 case FUTEX_OP_ADD:
564 nval = cval + oparg;
565 break;
566 case FUTEX_OP_OR:
567 nval = cval | oparg;
568 break;
569 case FUTEX_OP_ANDN:
570 nval = cval & ~oparg;
571 break;
572 case FUTEX_OP_XOR:
573 nval = cval ^ oparg;
574 break;
575 default:
576 return -ENOSYS;
577 }
578
579 error = ucas_int(uaddr, cval, nval, &oldval);
580 if (error || oldval == cval) {
581 break;
582 }
583 cval = oldval;
584 }
585
586 if (error)
587 return -EFAULT;
588
589 switch (cmp) {
590 case FUTEX_OP_CMP_EQ:
591 return (oldval == cmparg);
592 case FUTEX_OP_CMP_NE:
593 return (oldval != cmparg);
594 case FUTEX_OP_CMP_LT:
595 return (oldval < cmparg);
596 case FUTEX_OP_CMP_GE:
597 return (oldval >= cmparg);
598 case FUTEX_OP_CMP_LE:
599 return (oldval <= cmparg);
600 case FUTEX_OP_CMP_GT:
601 return (oldval > cmparg);
602 default:
603 return -ENOSYS;
604 }
605 }
606
607 int
linux_sys_set_robust_list(struct lwp * l,const struct linux_sys_set_robust_list_args * uap,register_t * retval)608 linux_sys_set_robust_list(struct lwp *l,
609 const struct linux_sys_set_robust_list_args *uap, register_t *retval)
610 {
611 /* {
612 syscallarg(struct linux_robust_list_head *) head;
613 syscallarg(size_t) len;
614 } */
615 struct linux_emuldata *led;
616
617 if (SCARG(uap, len) != sizeof(struct linux_robust_list_head))
618 return EINVAL;
619 led = l->l_emuldata;
620 led->led_robust_head = SCARG(uap, head);
621 *retval = 0;
622 return 0;
623 }
624
625 int
linux_sys_get_robust_list(struct lwp * l,const struct linux_sys_get_robust_list_args * uap,register_t * retval)626 linux_sys_get_robust_list(struct lwp *l,
627 const struct linux_sys_get_robust_list_args *uap, register_t *retval)
628 {
629 /* {
630 syscallarg(int) pid;
631 syscallarg(struct linux_robust_list_head **) head;
632 syscallarg(size_t *) len;
633 } */
634 struct proc *p;
635 struct linux_emuldata *led;
636 struct linux_robust_list_head *head;
637 size_t len;
638 int error = 0;
639
640 p = l->l_proc;
641 if (!SCARG(uap, pid)) {
642 led = l->l_emuldata;
643 head = led->led_robust_head;
644 } else {
645 mutex_enter(p->p_lock);
646 l = lwp_find(p, SCARG(uap, pid));
647 if (l != NULL) {
648 led = l->l_emuldata;
649 head = led->led_robust_head;
650 }
651 mutex_exit(p->p_lock);
652 if (l == NULL) {
653 return ESRCH;
654 }
655 }
656 #ifdef __arch64__
657 if (p->p_flag & PK_32) {
658 uint32_t u32;
659
660 u32 = 12;
661 error = copyout(&u32, SCARG(uap, len), sizeof(u32));
662 if (error)
663 return error;
664 u32 = (uint32_t)(uintptr_t)head;
665 return copyout(&u32, SCARG(uap, head), sizeof(u32));
666 }
667 #endif
668
669 len = sizeof(*head);
670 error = copyout(&len, SCARG(uap, len), sizeof(len));
671 if (error)
672 return error;
673 return copyout(&head, SCARG(uap, head), sizeof(head));
674 }
675
676 static int
handle_futex_death(void * uaddr,pid_t pid,int pi)677 handle_futex_death(void *uaddr, pid_t pid, int pi)
678 {
679 int uval, nval, mval;
680 struct futex *f;
681
682 retry:
683 if (copyin(uaddr, &uval, sizeof(uval)))
684 return EFAULT;
685
686 if ((uval & FUTEX_TID_MASK) == pid) {
687 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
688 nval = atomic_cas_32(uaddr, uval, mval);
689
690 if (nval == -1)
691 return EFAULT;
692
693 if (nval != uval)
694 goto retry;
695
696 if (!pi && (uval & FUTEX_WAITERS)) {
697 FUTEX_LOCK;
698 f = futex_get(uaddr, FUTEX_BITSET_MATCH_ANY);
699 futex_wake(f, 1, NULL, 0);
700 FUTEX_UNLOCK;
701 }
702 }
703
704 return 0;
705 }
706
707 static int
fetch_robust_entry(struct lwp * l,struct linux_robust_list ** entry,struct linux_robust_list ** head,int * pi)708 fetch_robust_entry(struct lwp *l, struct linux_robust_list **entry,
709 struct linux_robust_list **head, int *pi)
710 {
711 unsigned long uentry;
712
713 #ifdef __arch64__
714 if (l->l_proc->p_flag & PK_32) {
715 uint32_t u32;
716
717 if (copyin(head, &u32, sizeof(u32)))
718 return EFAULT;
719 uentry = (unsigned long)u32;
720 } else
721 #endif
722 if (copyin(head, &uentry, sizeof(uentry)))
723 return EFAULT;
724
725 *entry = (void *)(uentry & ~1UL);
726 *pi = uentry & 1;
727
728 return 0;
729 }
730
731 /* This walks the list of robust futexes, releasing them. */
732 void
release_futexes(struct lwp * l)733 release_futexes(struct lwp *l)
734 {
735 struct linux_robust_list_head head;
736 struct linux_robust_list *entry, *next_entry = NULL, *pending;
737 unsigned int limit = 2048, pi, next_pi, pip;
738 struct linux_emuldata *led;
739 unsigned long futex_offset;
740 int rc;
741
742 led = l->l_emuldata;
743 if (led->led_robust_head == NULL)
744 return;
745
746 #ifdef __arch64__
747 if (l->l_proc->p_flag & PK_32) {
748 uint32_t u32s[3];
749
750 if (copyin(led->led_robust_head, u32s, sizeof(u32s)))
751 return;
752
753 head.list.next = (void *)(uintptr_t)u32s[0];
754 head.futex_offset = (unsigned long)u32s[1];
755 head.pending_list = (void *)(uintptr_t)u32s[2];
756 } else
757 #endif
758 if (copyin(led->led_robust_head, &head, sizeof(head)))
759 return;
760
761 if (fetch_robust_entry(l, &entry, &head.list.next, &pi))
762 return;
763
764 #ifdef __arch64__
765 if (l->l_proc->p_flag & PK_32) {
766 uint32_t u32;
767
768 if (copyin(led->led_robust_head, &u32, sizeof(u32)))
769 return;
770
771 head.futex_offset = (unsigned long)u32;
772 } else
773 #endif
774 if (copyin(&head.futex_offset, &futex_offset, sizeof(unsigned long)))
775 return;
776
777 if (fetch_robust_entry(l, &pending, &head.pending_list, &pip))
778 return;
779
780 while (entry != &head.list) {
781 rc = fetch_robust_entry(l, &next_entry, &entry->next, &next_pi);
782
783 if (entry != pending)
784 if (handle_futex_death((char *)entry + futex_offset,
785 l->l_lid, pi))
786 return;
787
788 if (rc)
789 return;
790
791 entry = next_entry;
792 pi = next_pi;
793
794 if (!--limit)
795 break;
796
797 yield(); /* XXX why? */
798 }
799
800 if (pending)
801 handle_futex_death((char *)pending + futex_offset,
802 l->l_lid, pip);
803 }
804