xref: /freebsd/sys/fs/cuse/cuse.c (revision 780fb4a2)
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2010-2017 Hans Petter Selasky. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/stdint.h>
28 #include <sys/stddef.h>
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/systm.h>
32 #include <sys/conf.h>
33 #include <sys/kernel.h>
34 #include <sys/bus.h>
35 #include <sys/linker_set.h>
36 #include <sys/module.h>
37 #include <sys/lock.h>
38 #include <sys/mutex.h>
39 #include <sys/condvar.h>
40 #include <sys/sysctl.h>
41 #include <sys/unistd.h>
42 #include <sys/malloc.h>
43 #include <sys/priv.h>
44 #include <sys/uio.h>
45 #include <sys/poll.h>
46 #include <sys/sx.h>
47 #include <sys/rwlock.h>
48 #include <sys/queue.h>
49 #include <sys/fcntl.h>
50 #include <sys/proc.h>
51 #include <sys/vnode.h>
52 #include <sys/selinfo.h>
53 #include <sys/ptrace.h>
54 
55 #include <machine/bus.h>
56 
57 #include <vm/vm.h>
58 #include <vm/pmap.h>
59 #include <vm/vm_object.h>
60 #include <vm/vm_page.h>
61 #include <vm/vm_pager.h>
62 
63 #include <fs/cuse/cuse_defs.h>
64 #include <fs/cuse/cuse_ioctl.h>
65 
66 MODULE_VERSION(cuse, 1);
67 
68 /*
69  * Prevent cuse4bsd.ko and cuse.ko from loading at the same time by
70  * declaring support for the cuse4bsd interface in cuse.ko:
71  */
72 MODULE_VERSION(cuse4bsd, 1);
73 
74 #ifdef FEATURE
75 FEATURE(cuse, "Userspace character devices");
76 #endif
77 
78 struct cuse_command;
79 struct cuse_server;
80 struct cuse_client;
81 
82 struct cuse_client_command {
83 	TAILQ_ENTRY(cuse_client_command) entry;
84 	struct cuse_command sub;
85 	struct sx sx;
86 	struct cv cv;
87 	struct thread *entered;
88 	struct cuse_client *client;
89 	struct proc *proc_curr;
90 	int	proc_refs;
91 	int	got_signal;
92 	int	error;
93 	int	command;
94 };
95 
96 struct cuse_memory {
97 	TAILQ_ENTRY(cuse_memory) entry;
98 	vm_object_t object;
99 	uint32_t page_count;
100 	uint32_t alloc_nr;
101 };
102 
103 struct cuse_server_dev {
104 	TAILQ_ENTRY(cuse_server_dev) entry;
105 	struct cuse_server *server;
106 	struct cdev *kern_dev;
107 	struct cuse_dev *user_dev;
108 };
109 
110 struct cuse_server {
111 	TAILQ_ENTRY(cuse_server) entry;
112 	TAILQ_HEAD(, cuse_client_command) head;
113 	TAILQ_HEAD(, cuse_server_dev) hdev;
114 	TAILQ_HEAD(, cuse_client) hcli;
115 	TAILQ_HEAD(, cuse_memory) hmem;
116 	struct cv cv;
117 	struct selinfo selinfo;
118 	pid_t	pid;
119 	int	is_closing;
120 	int	refs;
121 };
122 
123 struct cuse_client {
124 	TAILQ_ENTRY(cuse_client) entry;
125 	TAILQ_ENTRY(cuse_client) entry_ref;
126 	struct cuse_client_command cmds[CUSE_CMD_MAX];
127 	struct cuse_server *server;
128 	struct cuse_server_dev *server_dev;
129 
130 	uint8_t	ioctl_buffer[CUSE_BUFFER_MAX] __aligned(4);
131 
132 	int	fflags;			/* file flags */
133 	int	cflags;			/* client flags */
134 #define	CUSE_CLI_IS_CLOSING 0x01
135 #define	CUSE_CLI_KNOTE_NEED_READ 0x02
136 #define	CUSE_CLI_KNOTE_NEED_WRITE 0x04
137 #define	CUSE_CLI_KNOTE_HAS_READ 0x08
138 #define	CUSE_CLI_KNOTE_HAS_WRITE 0x10
139 };
140 
141 #define	CUSE_CLIENT_CLOSING(pcc) \
142     ((pcc)->cflags & CUSE_CLI_IS_CLOSING)
143 
144 static	MALLOC_DEFINE(M_CUSE, "cuse", "CUSE memory");
145 
146 static TAILQ_HEAD(, cuse_server) cuse_server_head;
147 static struct mtx cuse_mtx;
148 static struct cdev *cuse_dev;
149 static struct cuse_server *cuse_alloc_unit[CUSE_DEVICES_MAX];
150 static int cuse_alloc_unit_id[CUSE_DEVICES_MAX];
151 
152 static void cuse_server_wakeup_all_client_locked(struct cuse_server *pcs);
153 static void cuse_client_kqfilter_read_detach(struct knote *kn);
154 static void cuse_client_kqfilter_write_detach(struct knote *kn);
155 static int cuse_client_kqfilter_read_event(struct knote *kn, long hint);
156 static int cuse_client_kqfilter_write_event(struct knote *kn, long hint);
157 
158 static struct filterops cuse_client_kqfilter_read_ops = {
159 	.f_isfd = 1,
160 	.f_detach = cuse_client_kqfilter_read_detach,
161 	.f_event = cuse_client_kqfilter_read_event,
162 };
163 
164 static struct filterops cuse_client_kqfilter_write_ops = {
165 	.f_isfd = 1,
166 	.f_detach = cuse_client_kqfilter_write_detach,
167 	.f_event = cuse_client_kqfilter_write_event,
168 };
169 
170 static d_open_t cuse_client_open;
171 static d_close_t cuse_client_close;
172 static d_ioctl_t cuse_client_ioctl;
173 static d_read_t cuse_client_read;
174 static d_write_t cuse_client_write;
175 static d_poll_t cuse_client_poll;
176 static d_mmap_single_t cuse_client_mmap_single;
177 static d_kqfilter_t cuse_client_kqfilter;
178 
179 static struct cdevsw cuse_client_devsw = {
180 	.d_version = D_VERSION,
181 	.d_open = cuse_client_open,
182 	.d_close = cuse_client_close,
183 	.d_ioctl = cuse_client_ioctl,
184 	.d_name = "cuse_client",
185 	.d_flags = D_TRACKCLOSE,
186 	.d_read = cuse_client_read,
187 	.d_write = cuse_client_write,
188 	.d_poll = cuse_client_poll,
189 	.d_mmap_single = cuse_client_mmap_single,
190 	.d_kqfilter = cuse_client_kqfilter,
191 };
192 
193 static d_open_t cuse_server_open;
194 static d_close_t cuse_server_close;
195 static d_ioctl_t cuse_server_ioctl;
196 static d_read_t cuse_server_read;
197 static d_write_t cuse_server_write;
198 static d_poll_t cuse_server_poll;
199 static d_mmap_single_t cuse_server_mmap_single;
200 
201 static struct cdevsw cuse_server_devsw = {
202 	.d_version = D_VERSION,
203 	.d_open = cuse_server_open,
204 	.d_close = cuse_server_close,
205 	.d_ioctl = cuse_server_ioctl,
206 	.d_name = "cuse_server",
207 	.d_flags = D_TRACKCLOSE,
208 	.d_read = cuse_server_read,
209 	.d_write = cuse_server_write,
210 	.d_poll = cuse_server_poll,
211 	.d_mmap_single = cuse_server_mmap_single,
212 };
213 
214 static void cuse_client_is_closing(struct cuse_client *);
215 static int cuse_free_unit_by_id_locked(struct cuse_server *, int);
216 
217 static void
218 cuse_lock(void)
219 {
220 	mtx_lock(&cuse_mtx);
221 }
222 
223 static void
224 cuse_unlock(void)
225 {
226 	mtx_unlock(&cuse_mtx);
227 }
228 
229 static void
230 cuse_cmd_lock(struct cuse_client_command *pccmd)
231 {
232 	sx_xlock(&pccmd->sx);
233 }
234 
235 static void
236 cuse_cmd_unlock(struct cuse_client_command *pccmd)
237 {
238 	sx_xunlock(&pccmd->sx);
239 }
240 
241 static void
242 cuse_kern_init(void *arg)
243 {
244 	TAILQ_INIT(&cuse_server_head);
245 
246 	mtx_init(&cuse_mtx, "cuse-mtx", NULL, MTX_DEF);
247 
248 	cuse_dev = make_dev(&cuse_server_devsw, 0,
249 	    UID_ROOT, GID_OPERATOR, 0600, "cuse");
250 
251 	printf("Cuse v%d.%d.%d @ /dev/cuse\n",
252 	    (CUSE_VERSION >> 16) & 0xFF, (CUSE_VERSION >> 8) & 0xFF,
253 	    (CUSE_VERSION >> 0) & 0xFF);
254 }
255 SYSINIT(cuse_kern_init, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_init, NULL);
256 
257 static void
258 cuse_kern_uninit(void *arg)
259 {
260 	void *ptr;
261 
262 	while (1) {
263 
264 		printf("Cuse: Please exit all /dev/cuse instances "
265 		    "and processes which have used this device.\n");
266 
267 		pause("DRAIN", 2 * hz);
268 
269 		cuse_lock();
270 		ptr = TAILQ_FIRST(&cuse_server_head);
271 		cuse_unlock();
272 
273 		if (ptr == NULL)
274 			break;
275 	}
276 
277 	if (cuse_dev != NULL)
278 		destroy_dev(cuse_dev);
279 
280 	mtx_destroy(&cuse_mtx);
281 }
282 SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, 0);
283 
284 static int
285 cuse_server_get(struct cuse_server **ppcs)
286 {
287 	struct cuse_server *pcs;
288 	int error;
289 
290 	error = devfs_get_cdevpriv((void **)&pcs);
291 	if (error != 0) {
292 		*ppcs = NULL;
293 		return (error);
294 	}
295 	/* check if closing */
296 	cuse_lock();
297 	if (pcs->is_closing) {
298 		cuse_unlock();
299 		*ppcs = NULL;
300 		return (EINVAL);
301 	}
302 	cuse_unlock();
303 	*ppcs = pcs;
304 	return (0);
305 }
306 
307 static void
308 cuse_server_is_closing(struct cuse_server *pcs)
309 {
310 	struct cuse_client *pcc;
311 
312 	if (pcs->is_closing)
313 		return;
314 
315 	pcs->is_closing = 1;
316 
317 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
318 		cuse_client_is_closing(pcc);
319 	}
320 }
321 
322 static struct cuse_client_command *
323 cuse_server_find_command(struct cuse_server *pcs, struct thread *td)
324 {
325 	struct cuse_client *pcc;
326 	int n;
327 
328 	if (pcs->is_closing)
329 		goto done;
330 
331 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
332 		if (CUSE_CLIENT_CLOSING(pcc))
333 			continue;
334 		for (n = 0; n != CUSE_CMD_MAX; n++) {
335 			if (pcc->cmds[n].entered == td)
336 				return (&pcc->cmds[n]);
337 		}
338 	}
339 done:
340 	return (NULL);
341 }
342 
343 static void
344 cuse_str_filter(char *ptr)
345 {
346 	int c;
347 
348 	while (((c = *ptr) != 0)) {
349 
350 		if ((c >= 'a') && (c <= 'z')) {
351 			ptr++;
352 			continue;
353 		}
354 		if ((c >= 'A') && (c <= 'Z')) {
355 			ptr++;
356 			continue;
357 		}
358 		if ((c >= '0') && (c <= '9')) {
359 			ptr++;
360 			continue;
361 		}
362 		if ((c == '.') || (c == '_') || (c == '/')) {
363 			ptr++;
364 			continue;
365 		}
366 		*ptr = '_';
367 
368 		ptr++;
369 	}
370 }
371 
372 static int
373 cuse_convert_error(int error)
374 {
375 	;				/* indent fix */
376 	switch (error) {
377 	case CUSE_ERR_NONE:
378 		return (0);
379 	case CUSE_ERR_BUSY:
380 		return (EBUSY);
381 	case CUSE_ERR_WOULDBLOCK:
382 		return (EWOULDBLOCK);
383 	case CUSE_ERR_INVALID:
384 		return (EINVAL);
385 	case CUSE_ERR_NO_MEMORY:
386 		return (ENOMEM);
387 	case CUSE_ERR_FAULT:
388 		return (EFAULT);
389 	case CUSE_ERR_SIGNAL:
390 		return (EINTR);
391 	case CUSE_ERR_NO_DEVICE:
392 		return (ENODEV);
393 	default:
394 		return (ENXIO);
395 	}
396 }
397 
398 static void
399 cuse_vm_memory_free(struct cuse_memory *mem)
400 {
401 	/* last user is gone - free */
402 	vm_object_deallocate(mem->object);
403 
404 	/* free CUSE memory */
405 	free(mem, M_CUSE);
406 }
407 
408 static int
409 cuse_server_alloc_memory(struct cuse_server *pcs, uint32_t alloc_nr,
410     uint32_t page_count)
411 {
412 	struct cuse_memory *temp;
413 	struct cuse_memory *mem;
414 	vm_object_t object;
415 	int error;
416 
417 	mem = malloc(sizeof(*mem), M_CUSE, M_WAITOK | M_ZERO);
418 	if (mem == NULL)
419 		return (ENOMEM);
420 
421 	object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * page_count,
422 	    VM_PROT_DEFAULT, 0, curthread->td_ucred);
423 	if (object == NULL) {
424 		error = ENOMEM;
425 		goto error_0;
426 	}
427 
428 	cuse_lock();
429 	/* check if allocation number already exists */
430 	TAILQ_FOREACH(temp, &pcs->hmem, entry) {
431 		if (temp->alloc_nr == alloc_nr)
432 			break;
433 	}
434 	if (temp != NULL) {
435 		cuse_unlock();
436 		error = EBUSY;
437 		goto error_1;
438 	}
439 	mem->object = object;
440 	mem->page_count = page_count;
441 	mem->alloc_nr = alloc_nr;
442 	TAILQ_INSERT_TAIL(&pcs->hmem, mem, entry);
443 	cuse_unlock();
444 
445 	return (0);
446 
447 error_1:
448 	vm_object_deallocate(object);
449 error_0:
450 	free(mem, M_CUSE);
451 	return (error);
452 }
453 
454 static int
455 cuse_server_free_memory(struct cuse_server *pcs, uint32_t alloc_nr)
456 {
457 	struct cuse_memory *mem;
458 
459 	cuse_lock();
460 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
461 		if (mem->alloc_nr == alloc_nr)
462 			break;
463 	}
464 	if (mem == NULL) {
465 		cuse_unlock();
466 		return (EINVAL);
467 	}
468 	TAILQ_REMOVE(&pcs->hmem, mem, entry);
469 	cuse_unlock();
470 
471 	cuse_vm_memory_free(mem);
472 
473 	return (0);
474 }
475 
476 static int
477 cuse_client_get(struct cuse_client **ppcc)
478 {
479 	struct cuse_client *pcc;
480 	int error;
481 
482 	/* try to get private data */
483 	error = devfs_get_cdevpriv((void **)&pcc);
484 	if (error != 0) {
485 		*ppcc = NULL;
486 		return (error);
487 	}
488 	/* check if closing */
489 	cuse_lock();
490 	if (CUSE_CLIENT_CLOSING(pcc) || pcc->server->is_closing) {
491 		cuse_unlock();
492 		*ppcc = NULL;
493 		return (EINVAL);
494 	}
495 	cuse_unlock();
496 	*ppcc = pcc;
497 	return (0);
498 }
499 
500 static void
501 cuse_client_is_closing(struct cuse_client *pcc)
502 {
503 	struct cuse_client_command *pccmd;
504 	uint32_t n;
505 
506 	if (CUSE_CLIENT_CLOSING(pcc))
507 		return;
508 
509 	pcc->cflags |= CUSE_CLI_IS_CLOSING;
510 	pcc->server_dev = NULL;
511 
512 	for (n = 0; n != CUSE_CMD_MAX; n++) {
513 
514 		pccmd = &pcc->cmds[n];
515 
516 		if (pccmd->entry.tqe_prev != NULL) {
517 			TAILQ_REMOVE(&pcc->server->head, pccmd, entry);
518 			pccmd->entry.tqe_prev = NULL;
519 		}
520 		cv_broadcast(&pccmd->cv);
521 	}
522 }
523 
524 static void
525 cuse_client_send_command_locked(struct cuse_client_command *pccmd,
526     uintptr_t data_ptr, unsigned long arg, int fflags, int ioflag)
527 {
528 	unsigned long cuse_fflags = 0;
529 	struct cuse_server *pcs;
530 
531 	if (fflags & FREAD)
532 		cuse_fflags |= CUSE_FFLAG_READ;
533 
534 	if (fflags & FWRITE)
535 		cuse_fflags |= CUSE_FFLAG_WRITE;
536 
537 	if (ioflag & IO_NDELAY)
538 		cuse_fflags |= CUSE_FFLAG_NONBLOCK;
539 
540 	pccmd->sub.fflags = cuse_fflags;
541 	pccmd->sub.data_pointer = data_ptr;
542 	pccmd->sub.argument = arg;
543 
544 	pcs = pccmd->client->server;
545 
546 	if ((pccmd->entry.tqe_prev == NULL) &&
547 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
548 	    (pcs->is_closing == 0)) {
549 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
550 		cv_signal(&pcs->cv);
551 	}
552 }
553 
554 static void
555 cuse_client_got_signal(struct cuse_client_command *pccmd)
556 {
557 	struct cuse_server *pcs;
558 
559 	pccmd->got_signal = 1;
560 
561 	pccmd = &pccmd->client->cmds[CUSE_CMD_SIGNAL];
562 
563 	pcs = pccmd->client->server;
564 
565 	if ((pccmd->entry.tqe_prev == NULL) &&
566 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
567 	    (pcs->is_closing == 0)) {
568 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
569 		cv_signal(&pcs->cv);
570 	}
571 }
572 
573 static int
574 cuse_client_receive_command_locked(struct cuse_client_command *pccmd,
575     uint8_t *arg_ptr, uint32_t arg_len)
576 {
577 	int error;
578 
579 	error = 0;
580 
581 	pccmd->proc_curr = curthread->td_proc;
582 
583 	if (CUSE_CLIENT_CLOSING(pccmd->client) ||
584 	    pccmd->client->server->is_closing) {
585 		error = CUSE_ERR_OTHER;
586 		goto done;
587 	}
588 	while (pccmd->command == CUSE_CMD_NONE) {
589 		if (error != 0) {
590 			cv_wait(&pccmd->cv, &cuse_mtx);
591 		} else {
592 			error = cv_wait_sig(&pccmd->cv, &cuse_mtx);
593 
594 			if (error != 0)
595 				cuse_client_got_signal(pccmd);
596 		}
597 		if (CUSE_CLIENT_CLOSING(pccmd->client) ||
598 		    pccmd->client->server->is_closing) {
599 			error = CUSE_ERR_OTHER;
600 			goto done;
601 		}
602 	}
603 
604 	error = pccmd->error;
605 	pccmd->command = CUSE_CMD_NONE;
606 	cv_signal(&pccmd->cv);
607 
608 done:
609 
610 	/* wait until all process references are gone */
611 
612 	pccmd->proc_curr = NULL;
613 
614 	while (pccmd->proc_refs != 0)
615 		cv_wait(&pccmd->cv, &cuse_mtx);
616 
617 	return (error);
618 }
619 
620 /*------------------------------------------------------------------------*
621  *	CUSE SERVER PART
622  *------------------------------------------------------------------------*/
623 
624 static void
625 cuse_server_free_dev(struct cuse_server_dev *pcsd)
626 {
627 	struct cuse_server *pcs;
628 	struct cuse_client *pcc;
629 
630 	/* get server pointer */
631 	pcs = pcsd->server;
632 
633 	/* prevent creation of more devices */
634 	cuse_lock();
635 	if (pcsd->kern_dev != NULL)
636 		pcsd->kern_dev->si_drv1 = NULL;
637 
638 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
639 		if (pcc->server_dev == pcsd)
640 			cuse_client_is_closing(pcc);
641 	}
642 	cuse_unlock();
643 
644 	/* destroy device, if any */
645 	if (pcsd->kern_dev != NULL) {
646 		/* destroy device synchronously */
647 		destroy_dev(pcsd->kern_dev);
648 	}
649 	free(pcsd, M_CUSE);
650 }
651 
652 static void
653 cuse_server_unref(struct cuse_server *pcs)
654 {
655 	struct cuse_server_dev *pcsd;
656 	struct cuse_memory *mem;
657 
658 	cuse_lock();
659 	pcs->refs--;
660 	if (pcs->refs != 0) {
661 		cuse_unlock();
662 		return;
663 	}
664 	cuse_server_is_closing(pcs);
665 	/* final client wakeup, if any */
666 	cuse_server_wakeup_all_client_locked(pcs);
667 
668 	TAILQ_REMOVE(&cuse_server_head, pcs, entry);
669 
670 	cuse_free_unit_by_id_locked(pcs, -1);
671 
672 	while ((pcsd = TAILQ_FIRST(&pcs->hdev)) != NULL) {
673 		TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
674 		cuse_unlock();
675 		cuse_server_free_dev(pcsd);
676 		cuse_lock();
677 	}
678 
679 	while ((mem = TAILQ_FIRST(&pcs->hmem)) != NULL) {
680 		TAILQ_REMOVE(&pcs->hmem, mem, entry);
681 		cuse_unlock();
682 		cuse_vm_memory_free(mem);
683 		cuse_lock();
684 	}
685 
686 	knlist_clear(&pcs->selinfo.si_note, 1);
687 	knlist_destroy(&pcs->selinfo.si_note);
688 
689 	cuse_unlock();
690 
691 	seldrain(&pcs->selinfo);
692 
693 	cv_destroy(&pcs->cv);
694 
695 	free(pcs, M_CUSE);
696 }
697 
698 static void
699 cuse_server_free(void *arg)
700 {
701 	struct cuse_server *pcs = arg;
702 
703 	/* drop refcount */
704 	cuse_server_unref(pcs);
705 }
706 
707 static int
708 cuse_server_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
709 {
710 	struct cuse_server *pcs;
711 
712 	pcs = malloc(sizeof(*pcs), M_CUSE, M_WAITOK | M_ZERO);
713 	if (pcs == NULL)
714 		return (ENOMEM);
715 
716 	if (devfs_set_cdevpriv(pcs, &cuse_server_free)) {
717 		printf("Cuse: Cannot set cdevpriv.\n");
718 		free(pcs, M_CUSE);
719 		return (ENOMEM);
720 	}
721 	/* store current process ID */
722 	pcs->pid = curproc->p_pid;
723 
724 	TAILQ_INIT(&pcs->head);
725 	TAILQ_INIT(&pcs->hdev);
726 	TAILQ_INIT(&pcs->hcli);
727 	TAILQ_INIT(&pcs->hmem);
728 
729 	cv_init(&pcs->cv, "cuse-server-cv");
730 
731 	knlist_init_mtx(&pcs->selinfo.si_note, &cuse_mtx);
732 
733 	cuse_lock();
734 	pcs->refs++;
735 	TAILQ_INSERT_TAIL(&cuse_server_head, pcs, entry);
736 	cuse_unlock();
737 
738 	return (0);
739 }
740 
741 static int
742 cuse_server_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
743 {
744 	struct cuse_server *pcs;
745 	int error;
746 
747 	error = cuse_server_get(&pcs);
748 	if (error != 0)
749 		goto done;
750 
751 	cuse_lock();
752 	cuse_server_is_closing(pcs);
753 	/* final client wakeup, if any */
754 	cuse_server_wakeup_all_client_locked(pcs);
755 
756 	knlist_clear(&pcs->selinfo.si_note, 1);
757 	cuse_unlock();
758 
759 done:
760 	return (0);
761 }
762 
763 static int
764 cuse_server_read(struct cdev *dev, struct uio *uio, int ioflag)
765 {
766 	return (ENXIO);
767 }
768 
769 static int
770 cuse_server_write(struct cdev *dev, struct uio *uio, int ioflag)
771 {
772 	return (ENXIO);
773 }
774 
775 static int
776 cuse_server_ioctl_copy_locked(struct cuse_client_command *pccmd,
777     struct cuse_data_chunk *pchk, int isread)
778 {
779 	struct proc *p_proc;
780 	uint32_t offset;
781 	int error;
782 
783 	offset = pchk->peer_ptr - CUSE_BUF_MIN_PTR;
784 
785 	if (pchk->length > CUSE_BUFFER_MAX)
786 		return (EFAULT);
787 
788 	if (offset >= CUSE_BUFFER_MAX)
789 		return (EFAULT);
790 
791 	if ((offset + pchk->length) > CUSE_BUFFER_MAX)
792 		return (EFAULT);
793 
794 	p_proc = pccmd->proc_curr;
795 	if (p_proc == NULL)
796 		return (ENXIO);
797 
798 	if (pccmd->proc_refs < 0)
799 		return (ENOMEM);
800 
801 	pccmd->proc_refs++;
802 
803 	cuse_unlock();
804 
805 	if (isread == 0) {
806 		error = copyin(
807 		    (void *)pchk->local_ptr,
808 		    pccmd->client->ioctl_buffer + offset,
809 		    pchk->length);
810 	} else {
811 		error = copyout(
812 		    pccmd->client->ioctl_buffer + offset,
813 		    (void *)pchk->local_ptr,
814 		    pchk->length);
815 	}
816 
817 	cuse_lock();
818 
819 	pccmd->proc_refs--;
820 
821 	if (pccmd->proc_curr == NULL)
822 		cv_signal(&pccmd->cv);
823 
824 	return (error);
825 }
826 
827 static int
828 cuse_proc2proc_copy(struct proc *proc_s, vm_offset_t data_s,
829     struct proc *proc_d, vm_offset_t data_d, size_t len)
830 {
831 	struct thread *td;
832 	struct proc *proc_cur;
833 	int error;
834 
835 	td = curthread;
836 	proc_cur = td->td_proc;
837 
838 	if (proc_cur == proc_d) {
839 		struct iovec iov = {
840 			.iov_base = (caddr_t)data_d,
841 			.iov_len = len,
842 		};
843 		struct uio uio = {
844 			.uio_iov = &iov,
845 			.uio_iovcnt = 1,
846 			.uio_offset = (off_t)data_s,
847 			.uio_resid = len,
848 			.uio_segflg = UIO_USERSPACE,
849 			.uio_rw = UIO_READ,
850 			.uio_td = td,
851 		};
852 
853 		PHOLD(proc_s);
854 		error = proc_rwmem(proc_s, &uio);
855 		PRELE(proc_s);
856 
857 	} else if (proc_cur == proc_s) {
858 		struct iovec iov = {
859 			.iov_base = (caddr_t)data_s,
860 			.iov_len = len,
861 		};
862 		struct uio uio = {
863 			.uio_iov = &iov,
864 			.uio_iovcnt = 1,
865 			.uio_offset = (off_t)data_d,
866 			.uio_resid = len,
867 			.uio_segflg = UIO_USERSPACE,
868 			.uio_rw = UIO_WRITE,
869 			.uio_td = td,
870 		};
871 
872 		PHOLD(proc_d);
873 		error = proc_rwmem(proc_d, &uio);
874 		PRELE(proc_d);
875 	} else {
876 		error = EINVAL;
877 	}
878 	return (error);
879 }
880 
881 static int
882 cuse_server_data_copy_locked(struct cuse_client_command *pccmd,
883     struct cuse_data_chunk *pchk, int isread)
884 {
885 	struct proc *p_proc;
886 	int error;
887 
888 	p_proc = pccmd->proc_curr;
889 	if (p_proc == NULL)
890 		return (ENXIO);
891 
892 	if (pccmd->proc_refs < 0)
893 		return (ENOMEM);
894 
895 	pccmd->proc_refs++;
896 
897 	cuse_unlock();
898 
899 	if (isread == 0) {
900 		error = cuse_proc2proc_copy(
901 		    curthread->td_proc, pchk->local_ptr,
902 		    p_proc, pchk->peer_ptr,
903 		    pchk->length);
904 	} else {
905 		error = cuse_proc2proc_copy(
906 		    p_proc, pchk->peer_ptr,
907 		    curthread->td_proc, pchk->local_ptr,
908 		    pchk->length);
909 	}
910 
911 	cuse_lock();
912 
913 	pccmd->proc_refs--;
914 
915 	if (pccmd->proc_curr == NULL)
916 		cv_signal(&pccmd->cv);
917 
918 	return (error);
919 }
920 
921 static int
922 cuse_alloc_unit_by_id_locked(struct cuse_server *pcs, int id)
923 {
924 	int n;
925 	int x = 0;
926 	int match;
927 
928 	do {
929 		for (match = n = 0; n != CUSE_DEVICES_MAX; n++) {
930 			if (cuse_alloc_unit[n] != NULL) {
931 				if ((cuse_alloc_unit_id[n] ^ id) & CUSE_ID_MASK)
932 					continue;
933 				if ((cuse_alloc_unit_id[n] & ~CUSE_ID_MASK) == x) {
934 					x++;
935 					match = 1;
936 				}
937 			}
938 		}
939 	} while (match);
940 
941 	if (x < 256) {
942 		for (n = 0; n != CUSE_DEVICES_MAX; n++) {
943 			if (cuse_alloc_unit[n] == NULL) {
944 				cuse_alloc_unit[n] = pcs;
945 				cuse_alloc_unit_id[n] = id | x;
946 				return (x);
947 			}
948 		}
949 	}
950 	return (-1);
951 }
952 
953 static void
954 cuse_server_wakeup_locked(struct cuse_server *pcs)
955 {
956 	selwakeup(&pcs->selinfo);
957 	KNOTE_LOCKED(&pcs->selinfo.si_note, 0);
958 }
959 
960 static void
961 cuse_server_wakeup_all_client_locked(struct cuse_server *pcs)
962 {
963 	struct cuse_client *pcc;
964 
965 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
966 		pcc->cflags |= (CUSE_CLI_KNOTE_NEED_READ |
967 		    CUSE_CLI_KNOTE_NEED_WRITE);
968 	}
969 	cuse_server_wakeup_locked(pcs);
970 }
971 
972 static int
973 cuse_free_unit_by_id_locked(struct cuse_server *pcs, int id)
974 {
975 	int n;
976 	int found = 0;
977 
978 	for (n = 0; n != CUSE_DEVICES_MAX; n++) {
979 		if (cuse_alloc_unit[n] == pcs) {
980 			if (cuse_alloc_unit_id[n] == id || id == -1) {
981 				cuse_alloc_unit[n] = NULL;
982 				cuse_alloc_unit_id[n] = 0;
983 				found = 1;
984 			}
985 		}
986 	}
987 
988 	return (found ? 0 : EINVAL);
989 }
990 
991 static int
992 cuse_server_ioctl(struct cdev *dev, unsigned long cmd,
993     caddr_t data, int fflag, struct thread *td)
994 {
995 	struct cuse_server *pcs;
996 	int error;
997 
998 	error = cuse_server_get(&pcs);
999 	if (error != 0)
1000 		return (error);
1001 
1002 	switch (cmd) {
1003 		struct cuse_client_command *pccmd;
1004 		struct cuse_client *pcc;
1005 		struct cuse_command *pcmd;
1006 		struct cuse_alloc_info *pai;
1007 		struct cuse_create_dev *pcd;
1008 		struct cuse_server_dev *pcsd;
1009 		struct cuse_data_chunk *pchk;
1010 		int n;
1011 
1012 	case CUSE_IOCTL_GET_COMMAND:
1013 		pcmd = (void *)data;
1014 
1015 		cuse_lock();
1016 
1017 		while ((pccmd = TAILQ_FIRST(&pcs->head)) == NULL) {
1018 			error = cv_wait_sig(&pcs->cv, &cuse_mtx);
1019 
1020 			if (pcs->is_closing)
1021 				error = ENXIO;
1022 
1023 			if (error) {
1024 				cuse_unlock();
1025 				return (error);
1026 			}
1027 		}
1028 
1029 		TAILQ_REMOVE(&pcs->head, pccmd, entry);
1030 		pccmd->entry.tqe_prev = NULL;
1031 
1032 		pccmd->entered = curthread;
1033 
1034 		*pcmd = pccmd->sub;
1035 
1036 		cuse_unlock();
1037 
1038 		break;
1039 
1040 	case CUSE_IOCTL_SYNC_COMMAND:
1041 
1042 		cuse_lock();
1043 		while ((pccmd = cuse_server_find_command(pcs, curthread)) != NULL) {
1044 
1045 			/* send sync command */
1046 			pccmd->entered = NULL;
1047 			pccmd->error = *(int *)data;
1048 			pccmd->command = CUSE_CMD_SYNC;
1049 
1050 			/* signal peer, if any */
1051 			cv_signal(&pccmd->cv);
1052 		}
1053 		cuse_unlock();
1054 
1055 		break;
1056 
1057 	case CUSE_IOCTL_ALLOC_UNIT:
1058 
1059 		cuse_lock();
1060 		n = cuse_alloc_unit_by_id_locked(pcs,
1061 		    CUSE_ID_DEFAULT(0));
1062 		cuse_unlock();
1063 
1064 		if (n < 0)
1065 			error = ENOMEM;
1066 		else
1067 			*(int *)data = n;
1068 		break;
1069 
1070 	case CUSE_IOCTL_ALLOC_UNIT_BY_ID:
1071 
1072 		n = *(int *)data;
1073 
1074 		n = (n & CUSE_ID_MASK);
1075 
1076 		cuse_lock();
1077 		n = cuse_alloc_unit_by_id_locked(pcs, n);
1078 		cuse_unlock();
1079 
1080 		if (n < 0)
1081 			error = ENOMEM;
1082 		else
1083 			*(int *)data = n;
1084 		break;
1085 
1086 	case CUSE_IOCTL_FREE_UNIT:
1087 
1088 		n = *(int *)data;
1089 
1090 		n = CUSE_ID_DEFAULT(n);
1091 
1092 		cuse_lock();
1093 		error = cuse_free_unit_by_id_locked(pcs, n);
1094 		cuse_unlock();
1095 		break;
1096 
1097 	case CUSE_IOCTL_FREE_UNIT_BY_ID:
1098 
1099 		n = *(int *)data;
1100 
1101 		cuse_lock();
1102 		error = cuse_free_unit_by_id_locked(pcs, n);
1103 		cuse_unlock();
1104 		break;
1105 
1106 	case CUSE_IOCTL_ALLOC_MEMORY:
1107 
1108 		pai = (void *)data;
1109 
1110 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1111 			error = ENOMEM;
1112 			break;
1113 		}
1114 		if (pai->page_count >= CUSE_ALLOC_PAGES_MAX) {
1115 			error = ENOMEM;
1116 			break;
1117 		}
1118 		error = cuse_server_alloc_memory(pcs,
1119 		    pai->alloc_nr, pai->page_count);
1120 		break;
1121 
1122 	case CUSE_IOCTL_FREE_MEMORY:
1123 		pai = (void *)data;
1124 
1125 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1126 			error = ENOMEM;
1127 			break;
1128 		}
1129 		error = cuse_server_free_memory(pcs, pai->alloc_nr);
1130 		break;
1131 
1132 	case CUSE_IOCTL_GET_SIG:
1133 
1134 		cuse_lock();
1135 		pccmd = cuse_server_find_command(pcs, curthread);
1136 
1137 		if (pccmd != NULL) {
1138 			n = pccmd->got_signal;
1139 			pccmd->got_signal = 0;
1140 		} else {
1141 			n = 0;
1142 		}
1143 		cuse_unlock();
1144 
1145 		*(int *)data = n;
1146 
1147 		break;
1148 
1149 	case CUSE_IOCTL_SET_PFH:
1150 
1151 		cuse_lock();
1152 		pccmd = cuse_server_find_command(pcs, curthread);
1153 
1154 		if (pccmd != NULL) {
1155 			pcc = pccmd->client;
1156 			for (n = 0; n != CUSE_CMD_MAX; n++) {
1157 				pcc->cmds[n].sub.per_file_handle = *(uintptr_t *)data;
1158 			}
1159 		} else {
1160 			error = ENXIO;
1161 		}
1162 		cuse_unlock();
1163 		break;
1164 
1165 	case CUSE_IOCTL_CREATE_DEV:
1166 
1167 		error = priv_check(curthread, PRIV_DRIVER);
1168 		if (error)
1169 			break;
1170 
1171 		pcd = (void *)data;
1172 
1173 		/* filter input */
1174 
1175 		pcd->devname[sizeof(pcd->devname) - 1] = 0;
1176 
1177 		if (pcd->devname[0] == 0) {
1178 			error = EINVAL;
1179 			break;
1180 		}
1181 		cuse_str_filter(pcd->devname);
1182 
1183 		pcd->permissions &= 0777;
1184 
1185 		/* try to allocate a character device */
1186 
1187 		pcsd = malloc(sizeof(*pcsd), M_CUSE, M_WAITOK | M_ZERO);
1188 
1189 		if (pcsd == NULL) {
1190 			error = ENOMEM;
1191 			break;
1192 		}
1193 		pcsd->server = pcs;
1194 
1195 		pcsd->user_dev = pcd->dev;
1196 
1197 		pcsd->kern_dev = make_dev_credf(MAKEDEV_CHECKNAME,
1198 		    &cuse_client_devsw, 0, NULL, pcd->user_id, pcd->group_id,
1199 		    pcd->permissions, "%s", pcd->devname);
1200 
1201 		if (pcsd->kern_dev == NULL) {
1202 			free(pcsd, M_CUSE);
1203 			error = ENOMEM;
1204 			break;
1205 		}
1206 		pcsd->kern_dev->si_drv1 = pcsd;
1207 
1208 		cuse_lock();
1209 		TAILQ_INSERT_TAIL(&pcs->hdev, pcsd, entry);
1210 		cuse_unlock();
1211 
1212 		break;
1213 
1214 	case CUSE_IOCTL_DESTROY_DEV:
1215 
1216 		error = priv_check(curthread, PRIV_DRIVER);
1217 		if (error)
1218 			break;
1219 
1220 		cuse_lock();
1221 
1222 		error = EINVAL;
1223 
1224 		pcsd = TAILQ_FIRST(&pcs->hdev);
1225 		while (pcsd != NULL) {
1226 			if (pcsd->user_dev == *(struct cuse_dev **)data) {
1227 				TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
1228 				cuse_unlock();
1229 				cuse_server_free_dev(pcsd);
1230 				cuse_lock();
1231 				error = 0;
1232 				pcsd = TAILQ_FIRST(&pcs->hdev);
1233 			} else {
1234 				pcsd = TAILQ_NEXT(pcsd, entry);
1235 			}
1236 		}
1237 
1238 		cuse_unlock();
1239 		break;
1240 
1241 	case CUSE_IOCTL_WRITE_DATA:
1242 	case CUSE_IOCTL_READ_DATA:
1243 
1244 		cuse_lock();
1245 		pchk = (struct cuse_data_chunk *)data;
1246 
1247 		pccmd = cuse_server_find_command(pcs, curthread);
1248 
1249 		if (pccmd == NULL) {
1250 			error = ENXIO;	/* invalid request */
1251 		} else if (pchk->peer_ptr < CUSE_BUF_MIN_PTR) {
1252 			error = EFAULT;	/* NULL pointer */
1253 		} else if (pchk->peer_ptr < CUSE_BUF_MAX_PTR) {
1254 			error = cuse_server_ioctl_copy_locked(pccmd,
1255 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1256 		} else {
1257 			error = cuse_server_data_copy_locked(pccmd,
1258 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1259 		}
1260 		cuse_unlock();
1261 		break;
1262 
1263 	case CUSE_IOCTL_SELWAKEUP:
1264 		cuse_lock();
1265 		/*
1266 		 * We don't know which direction caused the event.
1267 		 * Wakeup both!
1268 		 */
1269 		cuse_server_wakeup_all_client_locked(pcs);
1270 		cuse_unlock();
1271 		break;
1272 
1273 	default:
1274 		error = ENXIO;
1275 		break;
1276 	}
1277 	return (error);
1278 }
1279 
1280 static int
1281 cuse_server_poll(struct cdev *dev, int events, struct thread *td)
1282 {
1283 	return (events & (POLLHUP | POLLPRI | POLLIN |
1284 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1285 }
1286 
1287 static int
1288 cuse_server_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1289     vm_size_t size, struct vm_object **object, int nprot)
1290 {
1291 	uint32_t page_nr = *offset / PAGE_SIZE;
1292 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1293 	struct cuse_memory *mem;
1294 	struct cuse_server *pcs;
1295 	int error;
1296 
1297 	error = cuse_server_get(&pcs);
1298 	if (error != 0)
1299 		return (error);
1300 
1301 	cuse_lock();
1302 	/* lookup memory structure */
1303 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
1304 		if (mem->alloc_nr == alloc_nr)
1305 			break;
1306 	}
1307 	if (mem == NULL) {
1308 		cuse_unlock();
1309 		return (ENOMEM);
1310 	}
1311 	/* verify page offset */
1312 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1313 	if (page_nr >= mem->page_count) {
1314 		cuse_unlock();
1315 		return (ENXIO);
1316 	}
1317 	/* verify mmap size */
1318 	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1319 	    (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1320 		cuse_unlock();
1321 		return (EINVAL);
1322 	}
1323 	vm_object_reference(mem->object);
1324 	*object = mem->object;
1325 	cuse_unlock();
1326 
1327 	/* set new VM object offset to use */
1328 	*offset = page_nr * PAGE_SIZE;
1329 
1330 	/* success */
1331 	return (0);
1332 }
1333 
1334 /*------------------------------------------------------------------------*
1335  *	CUSE CLIENT PART
1336  *------------------------------------------------------------------------*/
1337 static void
1338 cuse_client_free(void *arg)
1339 {
1340 	struct cuse_client *pcc = arg;
1341 	struct cuse_client_command *pccmd;
1342 	struct cuse_server *pcs;
1343 	int n;
1344 
1345 	cuse_lock();
1346 	cuse_client_is_closing(pcc);
1347 	TAILQ_REMOVE(&pcc->server->hcli, pcc, entry);
1348 	cuse_unlock();
1349 
1350 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1351 
1352 		pccmd = &pcc->cmds[n];
1353 
1354 		sx_destroy(&pccmd->sx);
1355 		cv_destroy(&pccmd->cv);
1356 	}
1357 
1358 	pcs = pcc->server;
1359 
1360 	free(pcc, M_CUSE);
1361 
1362 	/* drop reference on server */
1363 	cuse_server_unref(pcs);
1364 }
1365 
1366 static int
1367 cuse_client_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
1368 {
1369 	struct cuse_client_command *pccmd;
1370 	struct cuse_server_dev *pcsd;
1371 	struct cuse_client *pcc;
1372 	struct cuse_server *pcs;
1373 	struct cuse_dev *pcd;
1374 	int error;
1375 	int n;
1376 
1377 	cuse_lock();
1378 	pcsd = dev->si_drv1;
1379 	if (pcsd != NULL) {
1380 		pcs = pcsd->server;
1381 		pcd = pcsd->user_dev;
1382 		/*
1383 		 * Check that the refcount didn't wrap and that the
1384 		 * same process is not both client and server. This
1385 		 * can easily lead to deadlocks when destroying the
1386 		 * CUSE character device nodes:
1387 		 */
1388 		pcs->refs++;
1389 		if (pcs->refs < 0 || pcs->pid == curproc->p_pid) {
1390 			/* overflow or wrong PID */
1391 			pcs->refs--;
1392 			pcsd = NULL;
1393 		}
1394 	} else {
1395 		pcs = NULL;
1396 		pcd = NULL;
1397 	}
1398 	cuse_unlock();
1399 
1400 	if (pcsd == NULL)
1401 		return (EINVAL);
1402 
1403 	pcc = malloc(sizeof(*pcc), M_CUSE, M_WAITOK | M_ZERO);
1404 	if (pcc == NULL) {
1405 		/* drop reference on server */
1406 		cuse_server_unref(pcs);
1407 		return (ENOMEM);
1408 	}
1409 	if (devfs_set_cdevpriv(pcc, &cuse_client_free)) {
1410 		printf("Cuse: Cannot set cdevpriv.\n");
1411 		/* drop reference on server */
1412 		cuse_server_unref(pcs);
1413 		free(pcc, M_CUSE);
1414 		return (ENOMEM);
1415 	}
1416 	pcc->fflags = fflags;
1417 	pcc->server_dev = pcsd;
1418 	pcc->server = pcs;
1419 
1420 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1421 
1422 		pccmd = &pcc->cmds[n];
1423 
1424 		pccmd->sub.dev = pcd;
1425 		pccmd->sub.command = n;
1426 		pccmd->client = pcc;
1427 
1428 		sx_init(&pccmd->sx, "cuse-client-sx");
1429 		cv_init(&pccmd->cv, "cuse-client-cv");
1430 	}
1431 
1432 	cuse_lock();
1433 
1434 	/* cuse_client_free() assumes that the client is listed somewhere! */
1435 	/* always enqueue */
1436 
1437 	TAILQ_INSERT_TAIL(&pcs->hcli, pcc, entry);
1438 
1439 	/* check if server is closing */
1440 	if ((pcs->is_closing != 0) || (dev->si_drv1 == NULL)) {
1441 		error = EINVAL;
1442 	} else {
1443 		error = 0;
1444 	}
1445 	cuse_unlock();
1446 
1447 	if (error) {
1448 		devfs_clear_cdevpriv();	/* XXX bugfix */
1449 		return (error);
1450 	}
1451 	pccmd = &pcc->cmds[CUSE_CMD_OPEN];
1452 
1453 	cuse_cmd_lock(pccmd);
1454 
1455 	cuse_lock();
1456 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1457 
1458 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1459 	cuse_unlock();
1460 
1461 	if (error < 0) {
1462 		error = cuse_convert_error(error);
1463 	} else {
1464 		error = 0;
1465 	}
1466 
1467 	cuse_cmd_unlock(pccmd);
1468 
1469 	if (error)
1470 		devfs_clear_cdevpriv();	/* XXX bugfix */
1471 
1472 	return (error);
1473 }
1474 
1475 static int
1476 cuse_client_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
1477 {
1478 	struct cuse_client_command *pccmd;
1479 	struct cuse_client *pcc;
1480 	int error;
1481 
1482 	error = cuse_client_get(&pcc);
1483 	if (error != 0)
1484 		return (0);
1485 
1486 	pccmd = &pcc->cmds[CUSE_CMD_CLOSE];
1487 
1488 	cuse_cmd_lock(pccmd);
1489 
1490 	cuse_lock();
1491 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1492 
1493 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1494 	cuse_unlock();
1495 
1496 	cuse_cmd_unlock(pccmd);
1497 
1498 	cuse_lock();
1499 	cuse_client_is_closing(pcc);
1500 	cuse_unlock();
1501 
1502 	return (0);
1503 }
1504 
1505 static void
1506 cuse_client_kqfilter_poll(struct cdev *dev, struct cuse_client *pcc)
1507 {
1508 	int temp;
1509 
1510 	cuse_lock();
1511 	temp = (pcc->cflags & (CUSE_CLI_KNOTE_HAS_READ |
1512 	    CUSE_CLI_KNOTE_HAS_WRITE));
1513 	pcc->cflags &= ~(CUSE_CLI_KNOTE_NEED_READ |
1514 	    CUSE_CLI_KNOTE_NEED_WRITE);
1515 	cuse_unlock();
1516 
1517 	if (temp != 0) {
1518 		/* get the latest polling state from the server */
1519 		temp = cuse_client_poll(dev, POLLIN | POLLOUT, NULL);
1520 
1521 		if (temp & (POLLIN | POLLOUT)) {
1522 			cuse_lock();
1523 			if (temp & POLLIN)
1524 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_READ;
1525 			if (temp & POLLOUT)
1526 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_WRITE;
1527 
1528 			/* make sure the "knote" gets woken up */
1529 			cuse_server_wakeup_locked(pcc->server);
1530 			cuse_unlock();
1531 		}
1532 	}
1533 }
1534 
1535 static int
1536 cuse_client_read(struct cdev *dev, struct uio *uio, int ioflag)
1537 {
1538 	struct cuse_client_command *pccmd;
1539 	struct cuse_client *pcc;
1540 	int error;
1541 	int len;
1542 
1543 	error = cuse_client_get(&pcc);
1544 	if (error != 0)
1545 		return (error);
1546 
1547 	pccmd = &pcc->cmds[CUSE_CMD_READ];
1548 
1549 	if (uio->uio_segflg != UIO_USERSPACE) {
1550 		return (EINVAL);
1551 	}
1552 	uio->uio_segflg = UIO_NOCOPY;
1553 
1554 	cuse_cmd_lock(pccmd);
1555 
1556 	while (uio->uio_resid != 0) {
1557 
1558 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1559 			error = ENOMEM;
1560 			break;
1561 		}
1562 		len = uio->uio_iov->iov_len;
1563 
1564 		cuse_lock();
1565 		cuse_client_send_command_locked(pccmd,
1566 		    (uintptr_t)uio->uio_iov->iov_base,
1567 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1568 
1569 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1570 		cuse_unlock();
1571 
1572 		if (error < 0) {
1573 			error = cuse_convert_error(error);
1574 			break;
1575 		} else if (error == len) {
1576 			error = uiomove(NULL, error, uio);
1577 			if (error)
1578 				break;
1579 		} else {
1580 			error = uiomove(NULL, error, uio);
1581 			break;
1582 		}
1583 	}
1584 	cuse_cmd_unlock(pccmd);
1585 
1586 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1587 
1588 	if (error == EWOULDBLOCK)
1589 		cuse_client_kqfilter_poll(dev, pcc);
1590 
1591 	return (error);
1592 }
1593 
1594 static int
1595 cuse_client_write(struct cdev *dev, struct uio *uio, int ioflag)
1596 {
1597 	struct cuse_client_command *pccmd;
1598 	struct cuse_client *pcc;
1599 	int error;
1600 	int len;
1601 
1602 	error = cuse_client_get(&pcc);
1603 	if (error != 0)
1604 		return (error);
1605 
1606 	pccmd = &pcc->cmds[CUSE_CMD_WRITE];
1607 
1608 	if (uio->uio_segflg != UIO_USERSPACE) {
1609 		return (EINVAL);
1610 	}
1611 	uio->uio_segflg = UIO_NOCOPY;
1612 
1613 	cuse_cmd_lock(pccmd);
1614 
1615 	while (uio->uio_resid != 0) {
1616 
1617 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1618 			error = ENOMEM;
1619 			break;
1620 		}
1621 		len = uio->uio_iov->iov_len;
1622 
1623 		cuse_lock();
1624 		cuse_client_send_command_locked(pccmd,
1625 		    (uintptr_t)uio->uio_iov->iov_base,
1626 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1627 
1628 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1629 		cuse_unlock();
1630 
1631 		if (error < 0) {
1632 			error = cuse_convert_error(error);
1633 			break;
1634 		} else if (error == len) {
1635 			error = uiomove(NULL, error, uio);
1636 			if (error)
1637 				break;
1638 		} else {
1639 			error = uiomove(NULL, error, uio);
1640 			break;
1641 		}
1642 	}
1643 	cuse_cmd_unlock(pccmd);
1644 
1645 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1646 
1647 	if (error == EWOULDBLOCK)
1648 		cuse_client_kqfilter_poll(dev, pcc);
1649 
1650 	return (error);
1651 }
1652 
1653 int
1654 cuse_client_ioctl(struct cdev *dev, unsigned long cmd,
1655     caddr_t data, int fflag, struct thread *td)
1656 {
1657 	struct cuse_client_command *pccmd;
1658 	struct cuse_client *pcc;
1659 	int error;
1660 	int len;
1661 
1662 	error = cuse_client_get(&pcc);
1663 	if (error != 0)
1664 		return (error);
1665 
1666 	len = IOCPARM_LEN(cmd);
1667 	if (len > CUSE_BUFFER_MAX)
1668 		return (ENOMEM);
1669 
1670 	pccmd = &pcc->cmds[CUSE_CMD_IOCTL];
1671 
1672 	cuse_cmd_lock(pccmd);
1673 
1674 	if (cmd & (IOC_IN | IOC_VOID))
1675 		memcpy(pcc->ioctl_buffer, data, len);
1676 
1677 	/*
1678 	 * When the ioctl-length is zero drivers can pass information
1679 	 * through the data pointer of the ioctl. Make sure this information
1680 	 * is forwarded to the driver.
1681 	 */
1682 
1683 	cuse_lock();
1684 	cuse_client_send_command_locked(pccmd,
1685 	    (len == 0) ? *(long *)data : CUSE_BUF_MIN_PTR,
1686 	    (unsigned long)cmd, pcc->fflags,
1687 	    (fflag & O_NONBLOCK) ? IO_NDELAY : 0);
1688 
1689 	error = cuse_client_receive_command_locked(pccmd, data, len);
1690 	cuse_unlock();
1691 
1692 	if (error < 0) {
1693 		error = cuse_convert_error(error);
1694 	} else {
1695 		error = 0;
1696 	}
1697 
1698 	if (cmd & IOC_OUT)
1699 		memcpy(data, pcc->ioctl_buffer, len);
1700 
1701 	cuse_cmd_unlock(pccmd);
1702 
1703 	if (error == EWOULDBLOCK)
1704 		cuse_client_kqfilter_poll(dev, pcc);
1705 
1706 	return (error);
1707 }
1708 
1709 static int
1710 cuse_client_poll(struct cdev *dev, int events, struct thread *td)
1711 {
1712 	struct cuse_client_command *pccmd;
1713 	struct cuse_client *pcc;
1714 	unsigned long temp;
1715 	int error;
1716 	int revents;
1717 
1718 	error = cuse_client_get(&pcc);
1719 	if (error != 0)
1720 		goto pollnval;
1721 
1722 	temp = 0;
1723 
1724 	if (events & (POLLPRI | POLLIN | POLLRDNORM))
1725 		temp |= CUSE_POLL_READ;
1726 
1727 	if (events & (POLLOUT | POLLWRNORM))
1728 		temp |= CUSE_POLL_WRITE;
1729 
1730 	if (events & POLLHUP)
1731 		temp |= CUSE_POLL_ERROR;
1732 
1733 	pccmd = &pcc->cmds[CUSE_CMD_POLL];
1734 
1735 	cuse_cmd_lock(pccmd);
1736 
1737 	/* Need to selrecord() first to not loose any events. */
1738 	if (temp != 0 && td != NULL)
1739 		selrecord(td, &pcc->server->selinfo);
1740 
1741 	cuse_lock();
1742 	cuse_client_send_command_locked(pccmd,
1743 	    0, temp, pcc->fflags, IO_NDELAY);
1744 
1745 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1746 	cuse_unlock();
1747 
1748 	cuse_cmd_unlock(pccmd);
1749 
1750 	if (error < 0) {
1751 		goto pollnval;
1752 	} else {
1753 		revents = 0;
1754 		if (error & CUSE_POLL_READ)
1755 			revents |= (events & (POLLPRI | POLLIN | POLLRDNORM));
1756 		if (error & CUSE_POLL_WRITE)
1757 			revents |= (events & (POLLOUT | POLLWRNORM));
1758 		if (error & CUSE_POLL_ERROR)
1759 			revents |= (events & POLLHUP);
1760 	}
1761 	return (revents);
1762 
1763 pollnval:
1764 	/* XXX many clients don't understand POLLNVAL */
1765 	return (events & (POLLHUP | POLLPRI | POLLIN |
1766 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1767 }
1768 
1769 static int
1770 cuse_client_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1771     vm_size_t size, struct vm_object **object, int nprot)
1772 {
1773 	uint32_t page_nr = *offset / PAGE_SIZE;
1774 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1775 	struct cuse_memory *mem;
1776 	struct cuse_client *pcc;
1777 	int error;
1778 
1779 	error = cuse_client_get(&pcc);
1780 	if (error != 0)
1781 		return (error);
1782 
1783 	cuse_lock();
1784 	/* lookup memory structure */
1785 	TAILQ_FOREACH(mem, &pcc->server->hmem, entry) {
1786 		if (mem->alloc_nr == alloc_nr)
1787 			break;
1788 	}
1789 	if (mem == NULL) {
1790 		cuse_unlock();
1791 		return (ENOMEM);
1792 	}
1793 	/* verify page offset */
1794 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1795 	if (page_nr >= mem->page_count) {
1796 		cuse_unlock();
1797 		return (ENXIO);
1798 	}
1799 	/* verify mmap size */
1800 	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1801 	    (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1802 		cuse_unlock();
1803 		return (EINVAL);
1804 	}
1805 	vm_object_reference(mem->object);
1806 	*object = mem->object;
1807 	cuse_unlock();
1808 
1809 	/* set new VM object offset to use */
1810 	*offset = page_nr * PAGE_SIZE;
1811 
1812 	/* success */
1813 	return (0);
1814 }
1815 
1816 static void
1817 cuse_client_kqfilter_read_detach(struct knote *kn)
1818 {
1819 	struct cuse_client *pcc;
1820 
1821 	cuse_lock();
1822 	pcc = kn->kn_hook;
1823 	knlist_remove(&pcc->server->selinfo.si_note, kn, 1);
1824 	cuse_unlock();
1825 }
1826 
1827 static void
1828 cuse_client_kqfilter_write_detach(struct knote *kn)
1829 {
1830 	struct cuse_client *pcc;
1831 
1832 	cuse_lock();
1833 	pcc = kn->kn_hook;
1834 	knlist_remove(&pcc->server->selinfo.si_note, kn, 1);
1835 	cuse_unlock();
1836 }
1837 
1838 static int
1839 cuse_client_kqfilter_read_event(struct knote *kn, long hint)
1840 {
1841 	struct cuse_client *pcc;
1842 
1843 	mtx_assert(&cuse_mtx, MA_OWNED);
1844 
1845 	pcc = kn->kn_hook;
1846 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_READ) ? 1 : 0);
1847 }
1848 
1849 static int
1850 cuse_client_kqfilter_write_event(struct knote *kn, long hint)
1851 {
1852 	struct cuse_client *pcc;
1853 
1854 	mtx_assert(&cuse_mtx, MA_OWNED);
1855 
1856 	pcc = kn->kn_hook;
1857 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_WRITE) ? 1 : 0);
1858 }
1859 
1860 static int
1861 cuse_client_kqfilter(struct cdev *dev, struct knote *kn)
1862 {
1863 	struct cuse_client *pcc;
1864 	struct cuse_server *pcs;
1865 	int error;
1866 
1867 	error = cuse_client_get(&pcc);
1868 	if (error != 0)
1869 		return (error);
1870 
1871 	cuse_lock();
1872 	pcs = pcc->server;
1873 	switch (kn->kn_filter) {
1874 	case EVFILT_READ:
1875 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_READ;
1876 		kn->kn_hook = pcc;
1877 		kn->kn_fop = &cuse_client_kqfilter_read_ops;
1878 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1879 		break;
1880 	case EVFILT_WRITE:
1881 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_WRITE;
1882 		kn->kn_hook = pcc;
1883 		kn->kn_fop = &cuse_client_kqfilter_write_ops;
1884 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1885 		break;
1886 	default:
1887 		error = EINVAL;
1888 		break;
1889 	}
1890 	cuse_unlock();
1891 
1892 	if (error == 0)
1893 		cuse_client_kqfilter_poll(dev, pcc);
1894 	return (error);
1895 }
1896