xref: /freebsd/sys/fs/cuse/cuse.c (revision 42249ef2)
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2010-2017 Hans Petter Selasky. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/stdint.h>
28 #include <sys/stddef.h>
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/systm.h>
32 #include <sys/conf.h>
33 #include <sys/kernel.h>
34 #include <sys/bus.h>
35 #include <sys/linker_set.h>
36 #include <sys/module.h>
37 #include <sys/lock.h>
38 #include <sys/mutex.h>
39 #include <sys/condvar.h>
40 #include <sys/sysctl.h>
41 #include <sys/unistd.h>
42 #include <sys/malloc.h>
43 #include <sys/priv.h>
44 #include <sys/uio.h>
45 #include <sys/poll.h>
46 #include <sys/sx.h>
47 #include <sys/rwlock.h>
48 #include <sys/queue.h>
49 #include <sys/fcntl.h>
50 #include <sys/proc.h>
51 #include <sys/vnode.h>
52 #include <sys/selinfo.h>
53 #include <sys/ptrace.h>
54 #include <sys/sysent.h>
55 
56 #include <machine/bus.h>
57 
58 #include <vm/vm.h>
59 #include <vm/pmap.h>
60 #include <vm/vm_object.h>
61 #include <vm/vm_page.h>
62 #include <vm/vm_pager.h>
63 
64 #include <fs/cuse/cuse_defs.h>
65 #include <fs/cuse/cuse_ioctl.h>
66 
67 MODULE_VERSION(cuse, 1);
68 
69 /*
70  * Prevent cuse4bsd.ko and cuse.ko from loading at the same time by
71  * declaring support for the cuse4bsd interface in cuse.ko:
72  */
73 MODULE_VERSION(cuse4bsd, 1);
74 
75 #ifdef FEATURE
76 FEATURE(cuse, "Userspace character devices");
77 #endif
78 
79 struct cuse_command;
80 struct cuse_server;
81 struct cuse_client;
82 
83 struct cuse_client_command {
84 	TAILQ_ENTRY(cuse_client_command) entry;
85 	struct cuse_command sub;
86 	struct sx sx;
87 	struct cv cv;
88 	struct thread *entered;
89 	struct cuse_client *client;
90 	struct proc *proc_curr;
91 	int	proc_refs;
92 	int	got_signal;
93 	int	error;
94 	int	command;
95 };
96 
97 struct cuse_memory {
98 	TAILQ_ENTRY(cuse_memory) entry;
99 	vm_object_t object;
100 	uint32_t page_count;
101 	uint32_t alloc_nr;
102 };
103 
104 struct cuse_server_dev {
105 	TAILQ_ENTRY(cuse_server_dev) entry;
106 	struct cuse_server *server;
107 	struct cdev *kern_dev;
108 	struct cuse_dev *user_dev;
109 };
110 
111 struct cuse_server {
112 	TAILQ_ENTRY(cuse_server) entry;
113 	TAILQ_HEAD(, cuse_client_command) head;
114 	TAILQ_HEAD(, cuse_server_dev) hdev;
115 	TAILQ_HEAD(, cuse_client) hcli;
116 	TAILQ_HEAD(, cuse_memory) hmem;
117 	struct cv cv;
118 	struct selinfo selinfo;
119 	pid_t	pid;
120 	int	is_closing;
121 	int	refs;
122 };
123 
124 struct cuse_client {
125 	TAILQ_ENTRY(cuse_client) entry;
126 	TAILQ_ENTRY(cuse_client) entry_ref;
127 	struct cuse_client_command cmds[CUSE_CMD_MAX];
128 	struct cuse_server *server;
129 	struct cuse_server_dev *server_dev;
130 
131 	uint8_t	ioctl_buffer[CUSE_BUFFER_MAX] __aligned(4);
132 
133 	int	fflags;			/* file flags */
134 	int	cflags;			/* client flags */
135 #define	CUSE_CLI_IS_CLOSING 0x01
136 #define	CUSE_CLI_KNOTE_NEED_READ 0x02
137 #define	CUSE_CLI_KNOTE_NEED_WRITE 0x04
138 #define	CUSE_CLI_KNOTE_HAS_READ 0x08
139 #define	CUSE_CLI_KNOTE_HAS_WRITE 0x10
140 };
141 
142 #define	CUSE_CLIENT_CLOSING(pcc) \
143     ((pcc)->cflags & CUSE_CLI_IS_CLOSING)
144 
145 static	MALLOC_DEFINE(M_CUSE, "cuse", "CUSE memory");
146 
147 static TAILQ_HEAD(, cuse_server) cuse_server_head;
148 static struct mtx cuse_mtx;
149 static struct cdev *cuse_dev;
150 static struct cuse_server *cuse_alloc_unit[CUSE_DEVICES_MAX];
151 static int cuse_alloc_unit_id[CUSE_DEVICES_MAX];
152 
153 static void cuse_server_wakeup_all_client_locked(struct cuse_server *pcs);
154 static void cuse_client_kqfilter_read_detach(struct knote *kn);
155 static void cuse_client_kqfilter_write_detach(struct knote *kn);
156 static int cuse_client_kqfilter_read_event(struct knote *kn, long hint);
157 static int cuse_client_kqfilter_write_event(struct knote *kn, long hint);
158 
159 static struct filterops cuse_client_kqfilter_read_ops = {
160 	.f_isfd = 1,
161 	.f_detach = cuse_client_kqfilter_read_detach,
162 	.f_event = cuse_client_kqfilter_read_event,
163 };
164 
165 static struct filterops cuse_client_kqfilter_write_ops = {
166 	.f_isfd = 1,
167 	.f_detach = cuse_client_kqfilter_write_detach,
168 	.f_event = cuse_client_kqfilter_write_event,
169 };
170 
171 static d_open_t cuse_client_open;
172 static d_close_t cuse_client_close;
173 static d_ioctl_t cuse_client_ioctl;
174 static d_read_t cuse_client_read;
175 static d_write_t cuse_client_write;
176 static d_poll_t cuse_client_poll;
177 static d_mmap_single_t cuse_client_mmap_single;
178 static d_kqfilter_t cuse_client_kqfilter;
179 
180 static struct cdevsw cuse_client_devsw = {
181 	.d_version = D_VERSION,
182 	.d_open = cuse_client_open,
183 	.d_close = cuse_client_close,
184 	.d_ioctl = cuse_client_ioctl,
185 	.d_name = "cuse_client",
186 	.d_flags = D_TRACKCLOSE,
187 	.d_read = cuse_client_read,
188 	.d_write = cuse_client_write,
189 	.d_poll = cuse_client_poll,
190 	.d_mmap_single = cuse_client_mmap_single,
191 	.d_kqfilter = cuse_client_kqfilter,
192 };
193 
194 static d_open_t cuse_server_open;
195 static d_close_t cuse_server_close;
196 static d_ioctl_t cuse_server_ioctl;
197 static d_read_t cuse_server_read;
198 static d_write_t cuse_server_write;
199 static d_poll_t cuse_server_poll;
200 static d_mmap_single_t cuse_server_mmap_single;
201 
202 static struct cdevsw cuse_server_devsw = {
203 	.d_version = D_VERSION,
204 	.d_open = cuse_server_open,
205 	.d_close = cuse_server_close,
206 	.d_ioctl = cuse_server_ioctl,
207 	.d_name = "cuse_server",
208 	.d_flags = D_TRACKCLOSE,
209 	.d_read = cuse_server_read,
210 	.d_write = cuse_server_write,
211 	.d_poll = cuse_server_poll,
212 	.d_mmap_single = cuse_server_mmap_single,
213 };
214 
215 static void cuse_client_is_closing(struct cuse_client *);
216 static int cuse_free_unit_by_id_locked(struct cuse_server *, int);
217 
218 static void
219 cuse_lock(void)
220 {
221 	mtx_lock(&cuse_mtx);
222 }
223 
224 static void
225 cuse_unlock(void)
226 {
227 	mtx_unlock(&cuse_mtx);
228 }
229 
230 static void
231 cuse_cmd_lock(struct cuse_client_command *pccmd)
232 {
233 	sx_xlock(&pccmd->sx);
234 }
235 
236 static void
237 cuse_cmd_unlock(struct cuse_client_command *pccmd)
238 {
239 	sx_xunlock(&pccmd->sx);
240 }
241 
242 static void
243 cuse_kern_init(void *arg)
244 {
245 	TAILQ_INIT(&cuse_server_head);
246 
247 	mtx_init(&cuse_mtx, "cuse-mtx", NULL, MTX_DEF);
248 
249 	cuse_dev = make_dev(&cuse_server_devsw, 0,
250 	    UID_ROOT, GID_OPERATOR, 0600, "cuse");
251 
252 	printf("Cuse v%d.%d.%d @ /dev/cuse\n",
253 	    (CUSE_VERSION >> 16) & 0xFF, (CUSE_VERSION >> 8) & 0xFF,
254 	    (CUSE_VERSION >> 0) & 0xFF);
255 }
256 SYSINIT(cuse_kern_init, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_init, NULL);
257 
258 static void
259 cuse_kern_uninit(void *arg)
260 {
261 	void *ptr;
262 
263 	while (1) {
264 
265 		printf("Cuse: Please exit all /dev/cuse instances "
266 		    "and processes which have used this device.\n");
267 
268 		pause("DRAIN", 2 * hz);
269 
270 		cuse_lock();
271 		ptr = TAILQ_FIRST(&cuse_server_head);
272 		cuse_unlock();
273 
274 		if (ptr == NULL)
275 			break;
276 	}
277 
278 	if (cuse_dev != NULL)
279 		destroy_dev(cuse_dev);
280 
281 	mtx_destroy(&cuse_mtx);
282 }
283 SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, 0);
284 
285 static int
286 cuse_server_get(struct cuse_server **ppcs)
287 {
288 	struct cuse_server *pcs;
289 	int error;
290 
291 	error = devfs_get_cdevpriv((void **)&pcs);
292 	if (error != 0) {
293 		*ppcs = NULL;
294 		return (error);
295 	}
296 	/* check if closing */
297 	cuse_lock();
298 	if (pcs->is_closing) {
299 		cuse_unlock();
300 		*ppcs = NULL;
301 		return (EINVAL);
302 	}
303 	cuse_unlock();
304 	*ppcs = pcs;
305 	return (0);
306 }
307 
308 static void
309 cuse_server_is_closing(struct cuse_server *pcs)
310 {
311 	struct cuse_client *pcc;
312 
313 	if (pcs->is_closing)
314 		return;
315 
316 	pcs->is_closing = 1;
317 
318 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
319 		cuse_client_is_closing(pcc);
320 	}
321 }
322 
323 static struct cuse_client_command *
324 cuse_server_find_command(struct cuse_server *pcs, struct thread *td)
325 {
326 	struct cuse_client *pcc;
327 	int n;
328 
329 	if (pcs->is_closing)
330 		goto done;
331 
332 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
333 		if (CUSE_CLIENT_CLOSING(pcc))
334 			continue;
335 		for (n = 0; n != CUSE_CMD_MAX; n++) {
336 			if (pcc->cmds[n].entered == td)
337 				return (&pcc->cmds[n]);
338 		}
339 	}
340 done:
341 	return (NULL);
342 }
343 
344 static void
345 cuse_str_filter(char *ptr)
346 {
347 	int c;
348 
349 	while (((c = *ptr) != 0)) {
350 
351 		if ((c >= 'a') && (c <= 'z')) {
352 			ptr++;
353 			continue;
354 		}
355 		if ((c >= 'A') && (c <= 'Z')) {
356 			ptr++;
357 			continue;
358 		}
359 		if ((c >= '0') && (c <= '9')) {
360 			ptr++;
361 			continue;
362 		}
363 		if ((c == '.') || (c == '_') || (c == '/')) {
364 			ptr++;
365 			continue;
366 		}
367 		*ptr = '_';
368 
369 		ptr++;
370 	}
371 }
372 
373 static int
374 cuse_convert_error(int error)
375 {
376 	;				/* indent fix */
377 	switch (error) {
378 	case CUSE_ERR_NONE:
379 		return (0);
380 	case CUSE_ERR_BUSY:
381 		return (EBUSY);
382 	case CUSE_ERR_WOULDBLOCK:
383 		return (EWOULDBLOCK);
384 	case CUSE_ERR_INVALID:
385 		return (EINVAL);
386 	case CUSE_ERR_NO_MEMORY:
387 		return (ENOMEM);
388 	case CUSE_ERR_FAULT:
389 		return (EFAULT);
390 	case CUSE_ERR_SIGNAL:
391 		return (EINTR);
392 	case CUSE_ERR_NO_DEVICE:
393 		return (ENODEV);
394 	default:
395 		return (ENXIO);
396 	}
397 }
398 
399 static void
400 cuse_vm_memory_free(struct cuse_memory *mem)
401 {
402 	/* last user is gone - free */
403 	vm_object_deallocate(mem->object);
404 
405 	/* free CUSE memory */
406 	free(mem, M_CUSE);
407 }
408 
409 static int
410 cuse_server_alloc_memory(struct cuse_server *pcs, uint32_t alloc_nr,
411     uint32_t page_count)
412 {
413 	struct cuse_memory *temp;
414 	struct cuse_memory *mem;
415 	vm_object_t object;
416 	int error;
417 
418 	mem = malloc(sizeof(*mem), M_CUSE, M_WAITOK | M_ZERO);
419 	if (mem == NULL)
420 		return (ENOMEM);
421 
422 	object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * page_count,
423 	    VM_PROT_DEFAULT, 0, curthread->td_ucred);
424 	if (object == NULL) {
425 		error = ENOMEM;
426 		goto error_0;
427 	}
428 
429 	cuse_lock();
430 	/* check if allocation number already exists */
431 	TAILQ_FOREACH(temp, &pcs->hmem, entry) {
432 		if (temp->alloc_nr == alloc_nr)
433 			break;
434 	}
435 	if (temp != NULL) {
436 		cuse_unlock();
437 		error = EBUSY;
438 		goto error_1;
439 	}
440 	mem->object = object;
441 	mem->page_count = page_count;
442 	mem->alloc_nr = alloc_nr;
443 	TAILQ_INSERT_TAIL(&pcs->hmem, mem, entry);
444 	cuse_unlock();
445 
446 	return (0);
447 
448 error_1:
449 	vm_object_deallocate(object);
450 error_0:
451 	free(mem, M_CUSE);
452 	return (error);
453 }
454 
455 static int
456 cuse_server_free_memory(struct cuse_server *pcs, uint32_t alloc_nr)
457 {
458 	struct cuse_memory *mem;
459 
460 	cuse_lock();
461 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
462 		if (mem->alloc_nr == alloc_nr)
463 			break;
464 	}
465 	if (mem == NULL) {
466 		cuse_unlock();
467 		return (EINVAL);
468 	}
469 	TAILQ_REMOVE(&pcs->hmem, mem, entry);
470 	cuse_unlock();
471 
472 	cuse_vm_memory_free(mem);
473 
474 	return (0);
475 }
476 
477 static int
478 cuse_client_get(struct cuse_client **ppcc)
479 {
480 	struct cuse_client *pcc;
481 	int error;
482 
483 	/* try to get private data */
484 	error = devfs_get_cdevpriv((void **)&pcc);
485 	if (error != 0) {
486 		*ppcc = NULL;
487 		return (error);
488 	}
489 	/* check if closing */
490 	cuse_lock();
491 	if (CUSE_CLIENT_CLOSING(pcc) || pcc->server->is_closing) {
492 		cuse_unlock();
493 		*ppcc = NULL;
494 		return (EINVAL);
495 	}
496 	cuse_unlock();
497 	*ppcc = pcc;
498 	return (0);
499 }
500 
501 static void
502 cuse_client_is_closing(struct cuse_client *pcc)
503 {
504 	struct cuse_client_command *pccmd;
505 	uint32_t n;
506 
507 	if (CUSE_CLIENT_CLOSING(pcc))
508 		return;
509 
510 	pcc->cflags |= CUSE_CLI_IS_CLOSING;
511 	pcc->server_dev = NULL;
512 
513 	for (n = 0; n != CUSE_CMD_MAX; n++) {
514 
515 		pccmd = &pcc->cmds[n];
516 
517 		if (pccmd->entry.tqe_prev != NULL) {
518 			TAILQ_REMOVE(&pcc->server->head, pccmd, entry);
519 			pccmd->entry.tqe_prev = NULL;
520 		}
521 		cv_broadcast(&pccmd->cv);
522 	}
523 }
524 
525 static void
526 cuse_client_send_command_locked(struct cuse_client_command *pccmd,
527     uintptr_t data_ptr, unsigned long arg, int fflags, int ioflag)
528 {
529 	unsigned long cuse_fflags = 0;
530 	struct cuse_server *pcs;
531 
532 	if (fflags & FREAD)
533 		cuse_fflags |= CUSE_FFLAG_READ;
534 
535 	if (fflags & FWRITE)
536 		cuse_fflags |= CUSE_FFLAG_WRITE;
537 
538 	if (ioflag & IO_NDELAY)
539 		cuse_fflags |= CUSE_FFLAG_NONBLOCK;
540 #if defined(__LP64__)
541 	if (SV_CURPROC_FLAG(SV_ILP32))
542 		cuse_fflags |= CUSE_FFLAG_COMPAT32;
543 #endif
544 	pccmd->sub.fflags = cuse_fflags;
545 	pccmd->sub.data_pointer = data_ptr;
546 	pccmd->sub.argument = arg;
547 
548 	pcs = pccmd->client->server;
549 
550 	if ((pccmd->entry.tqe_prev == NULL) &&
551 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
552 	    (pcs->is_closing == 0)) {
553 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
554 		cv_signal(&pcs->cv);
555 	}
556 }
557 
558 static void
559 cuse_client_got_signal(struct cuse_client_command *pccmd)
560 {
561 	struct cuse_server *pcs;
562 
563 	pccmd->got_signal = 1;
564 
565 	pccmd = &pccmd->client->cmds[CUSE_CMD_SIGNAL];
566 
567 	pcs = pccmd->client->server;
568 
569 	if ((pccmd->entry.tqe_prev == NULL) &&
570 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
571 	    (pcs->is_closing == 0)) {
572 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
573 		cv_signal(&pcs->cv);
574 	}
575 }
576 
577 static int
578 cuse_client_receive_command_locked(struct cuse_client_command *pccmd,
579     uint8_t *arg_ptr, uint32_t arg_len)
580 {
581 	int error;
582 
583 	error = 0;
584 
585 	pccmd->proc_curr = curthread->td_proc;
586 
587 	if (CUSE_CLIENT_CLOSING(pccmd->client) ||
588 	    pccmd->client->server->is_closing) {
589 		error = CUSE_ERR_OTHER;
590 		goto done;
591 	}
592 	while (pccmd->command == CUSE_CMD_NONE) {
593 		if (error != 0) {
594 			cv_wait(&pccmd->cv, &cuse_mtx);
595 		} else {
596 			error = cv_wait_sig(&pccmd->cv, &cuse_mtx);
597 
598 			if (error != 0)
599 				cuse_client_got_signal(pccmd);
600 		}
601 		if (CUSE_CLIENT_CLOSING(pccmd->client) ||
602 		    pccmd->client->server->is_closing) {
603 			error = CUSE_ERR_OTHER;
604 			goto done;
605 		}
606 	}
607 
608 	error = pccmd->error;
609 	pccmd->command = CUSE_CMD_NONE;
610 	cv_signal(&pccmd->cv);
611 
612 done:
613 
614 	/* wait until all process references are gone */
615 
616 	pccmd->proc_curr = NULL;
617 
618 	while (pccmd->proc_refs != 0)
619 		cv_wait(&pccmd->cv, &cuse_mtx);
620 
621 	return (error);
622 }
623 
624 /*------------------------------------------------------------------------*
625  *	CUSE SERVER PART
626  *------------------------------------------------------------------------*/
627 
628 static void
629 cuse_server_free_dev(struct cuse_server_dev *pcsd)
630 {
631 	struct cuse_server *pcs;
632 	struct cuse_client *pcc;
633 
634 	/* get server pointer */
635 	pcs = pcsd->server;
636 
637 	/* prevent creation of more devices */
638 	cuse_lock();
639 	if (pcsd->kern_dev != NULL)
640 		pcsd->kern_dev->si_drv1 = NULL;
641 
642 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
643 		if (pcc->server_dev == pcsd)
644 			cuse_client_is_closing(pcc);
645 	}
646 	cuse_unlock();
647 
648 	/* destroy device, if any */
649 	if (pcsd->kern_dev != NULL) {
650 		/* destroy device synchronously */
651 		destroy_dev(pcsd->kern_dev);
652 	}
653 	free(pcsd, M_CUSE);
654 }
655 
656 static void
657 cuse_server_unref(struct cuse_server *pcs)
658 {
659 	struct cuse_server_dev *pcsd;
660 	struct cuse_memory *mem;
661 
662 	cuse_lock();
663 	pcs->refs--;
664 	if (pcs->refs != 0) {
665 		cuse_unlock();
666 		return;
667 	}
668 	cuse_server_is_closing(pcs);
669 	/* final client wakeup, if any */
670 	cuse_server_wakeup_all_client_locked(pcs);
671 
672 	TAILQ_REMOVE(&cuse_server_head, pcs, entry);
673 
674 	while ((pcsd = TAILQ_FIRST(&pcs->hdev)) != NULL) {
675 		TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
676 		cuse_unlock();
677 		cuse_server_free_dev(pcsd);
678 		cuse_lock();
679 	}
680 
681 	cuse_free_unit_by_id_locked(pcs, -1);
682 
683 	while ((mem = TAILQ_FIRST(&pcs->hmem)) != NULL) {
684 		TAILQ_REMOVE(&pcs->hmem, mem, entry);
685 		cuse_unlock();
686 		cuse_vm_memory_free(mem);
687 		cuse_lock();
688 	}
689 
690 	knlist_clear(&pcs->selinfo.si_note, 1);
691 	knlist_destroy(&pcs->selinfo.si_note);
692 
693 	cuse_unlock();
694 
695 	seldrain(&pcs->selinfo);
696 
697 	cv_destroy(&pcs->cv);
698 
699 	free(pcs, M_CUSE);
700 }
701 
702 static int
703 cuse_server_do_close(struct cuse_server *pcs)
704 {
705 	int retval;
706 
707 	cuse_lock();
708 	cuse_server_is_closing(pcs);
709 	/* final client wakeup, if any */
710 	cuse_server_wakeup_all_client_locked(pcs);
711 
712 	knlist_clear(&pcs->selinfo.si_note, 1);
713 
714 	retval = pcs->refs;
715 	cuse_unlock();
716 
717 	return (retval);
718 }
719 
720 static void
721 cuse_server_free(void *arg)
722 {
723 	struct cuse_server *pcs = arg;
724 
725 	/*
726 	 * The final server unref should be done by the server thread
727 	 * to prevent deadlock in the client cdevpriv destructor,
728 	 * which cannot destroy itself.
729 	 */
730 	while (cuse_server_do_close(pcs) != 1)
731 		pause("W", hz);
732 
733 	/* drop final refcount */
734 	cuse_server_unref(pcs);
735 }
736 
737 static int
738 cuse_server_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
739 {
740 	struct cuse_server *pcs;
741 
742 	pcs = malloc(sizeof(*pcs), M_CUSE, M_WAITOK | M_ZERO);
743 	if (pcs == NULL)
744 		return (ENOMEM);
745 
746 	if (devfs_set_cdevpriv(pcs, &cuse_server_free)) {
747 		printf("Cuse: Cannot set cdevpriv.\n");
748 		free(pcs, M_CUSE);
749 		return (ENOMEM);
750 	}
751 	/* store current process ID */
752 	pcs->pid = curproc->p_pid;
753 
754 	TAILQ_INIT(&pcs->head);
755 	TAILQ_INIT(&pcs->hdev);
756 	TAILQ_INIT(&pcs->hcli);
757 	TAILQ_INIT(&pcs->hmem);
758 
759 	cv_init(&pcs->cv, "cuse-server-cv");
760 
761 	knlist_init_mtx(&pcs->selinfo.si_note, &cuse_mtx);
762 
763 	cuse_lock();
764 	pcs->refs++;
765 	TAILQ_INSERT_TAIL(&cuse_server_head, pcs, entry);
766 	cuse_unlock();
767 
768 	return (0);
769 }
770 
771 static int
772 cuse_server_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
773 {
774 	struct cuse_server *pcs;
775 
776 	if (cuse_server_get(&pcs) == 0)
777 		cuse_server_do_close(pcs);
778 
779 	return (0);
780 }
781 
782 static int
783 cuse_server_read(struct cdev *dev, struct uio *uio, int ioflag)
784 {
785 	return (ENXIO);
786 }
787 
788 static int
789 cuse_server_write(struct cdev *dev, struct uio *uio, int ioflag)
790 {
791 	return (ENXIO);
792 }
793 
794 static int
795 cuse_server_ioctl_copy_locked(struct cuse_client_command *pccmd,
796     struct cuse_data_chunk *pchk, int isread)
797 {
798 	struct proc *p_proc;
799 	uint32_t offset;
800 	int error;
801 
802 	offset = pchk->peer_ptr - CUSE_BUF_MIN_PTR;
803 
804 	if (pchk->length > CUSE_BUFFER_MAX)
805 		return (EFAULT);
806 
807 	if (offset >= CUSE_BUFFER_MAX)
808 		return (EFAULT);
809 
810 	if ((offset + pchk->length) > CUSE_BUFFER_MAX)
811 		return (EFAULT);
812 
813 	p_proc = pccmd->proc_curr;
814 	if (p_proc == NULL)
815 		return (ENXIO);
816 
817 	if (pccmd->proc_refs < 0)
818 		return (ENOMEM);
819 
820 	pccmd->proc_refs++;
821 
822 	cuse_unlock();
823 
824 	if (isread == 0) {
825 		error = copyin(
826 		    (void *)pchk->local_ptr,
827 		    pccmd->client->ioctl_buffer + offset,
828 		    pchk->length);
829 	} else {
830 		error = copyout(
831 		    pccmd->client->ioctl_buffer + offset,
832 		    (void *)pchk->local_ptr,
833 		    pchk->length);
834 	}
835 
836 	cuse_lock();
837 
838 	pccmd->proc_refs--;
839 
840 	if (pccmd->proc_curr == NULL)
841 		cv_signal(&pccmd->cv);
842 
843 	return (error);
844 }
845 
846 static int
847 cuse_proc2proc_copy(struct proc *proc_s, vm_offset_t data_s,
848     struct proc *proc_d, vm_offset_t data_d, size_t len)
849 {
850 	struct thread *td;
851 	struct proc *proc_cur;
852 	int error;
853 
854 	td = curthread;
855 	proc_cur = td->td_proc;
856 
857 	if (proc_cur == proc_d) {
858 		struct iovec iov = {
859 			.iov_base = (caddr_t)data_d,
860 			.iov_len = len,
861 		};
862 		struct uio uio = {
863 			.uio_iov = &iov,
864 			.uio_iovcnt = 1,
865 			.uio_offset = (off_t)data_s,
866 			.uio_resid = len,
867 			.uio_segflg = UIO_USERSPACE,
868 			.uio_rw = UIO_READ,
869 			.uio_td = td,
870 		};
871 
872 		PHOLD(proc_s);
873 		error = proc_rwmem(proc_s, &uio);
874 		PRELE(proc_s);
875 
876 	} else if (proc_cur == proc_s) {
877 		struct iovec iov = {
878 			.iov_base = (caddr_t)data_s,
879 			.iov_len = len,
880 		};
881 		struct uio uio = {
882 			.uio_iov = &iov,
883 			.uio_iovcnt = 1,
884 			.uio_offset = (off_t)data_d,
885 			.uio_resid = len,
886 			.uio_segflg = UIO_USERSPACE,
887 			.uio_rw = UIO_WRITE,
888 			.uio_td = td,
889 		};
890 
891 		PHOLD(proc_d);
892 		error = proc_rwmem(proc_d, &uio);
893 		PRELE(proc_d);
894 	} else {
895 		error = EINVAL;
896 	}
897 	return (error);
898 }
899 
900 static int
901 cuse_server_data_copy_locked(struct cuse_client_command *pccmd,
902     struct cuse_data_chunk *pchk, int isread)
903 {
904 	struct proc *p_proc;
905 	int error;
906 
907 	p_proc = pccmd->proc_curr;
908 	if (p_proc == NULL)
909 		return (ENXIO);
910 
911 	if (pccmd->proc_refs < 0)
912 		return (ENOMEM);
913 
914 	pccmd->proc_refs++;
915 
916 	cuse_unlock();
917 
918 	if (isread == 0) {
919 		error = cuse_proc2proc_copy(
920 		    curthread->td_proc, pchk->local_ptr,
921 		    p_proc, pchk->peer_ptr,
922 		    pchk->length);
923 	} else {
924 		error = cuse_proc2proc_copy(
925 		    p_proc, pchk->peer_ptr,
926 		    curthread->td_proc, pchk->local_ptr,
927 		    pchk->length);
928 	}
929 
930 	cuse_lock();
931 
932 	pccmd->proc_refs--;
933 
934 	if (pccmd->proc_curr == NULL)
935 		cv_signal(&pccmd->cv);
936 
937 	return (error);
938 }
939 
940 static int
941 cuse_alloc_unit_by_id_locked(struct cuse_server *pcs, int id)
942 {
943 	int n;
944 	int x = 0;
945 	int match;
946 
947 	do {
948 		for (match = n = 0; n != CUSE_DEVICES_MAX; n++) {
949 			if (cuse_alloc_unit[n] != NULL) {
950 				if ((cuse_alloc_unit_id[n] ^ id) & CUSE_ID_MASK)
951 					continue;
952 				if ((cuse_alloc_unit_id[n] & ~CUSE_ID_MASK) == x) {
953 					x++;
954 					match = 1;
955 				}
956 			}
957 		}
958 	} while (match);
959 
960 	if (x < 256) {
961 		for (n = 0; n != CUSE_DEVICES_MAX; n++) {
962 			if (cuse_alloc_unit[n] == NULL) {
963 				cuse_alloc_unit[n] = pcs;
964 				cuse_alloc_unit_id[n] = id | x;
965 				return (x);
966 			}
967 		}
968 	}
969 	return (-1);
970 }
971 
972 static void
973 cuse_server_wakeup_locked(struct cuse_server *pcs)
974 {
975 	selwakeup(&pcs->selinfo);
976 	KNOTE_LOCKED(&pcs->selinfo.si_note, 0);
977 }
978 
979 static void
980 cuse_server_wakeup_all_client_locked(struct cuse_server *pcs)
981 {
982 	struct cuse_client *pcc;
983 
984 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
985 		pcc->cflags |= (CUSE_CLI_KNOTE_NEED_READ |
986 		    CUSE_CLI_KNOTE_NEED_WRITE);
987 	}
988 	cuse_server_wakeup_locked(pcs);
989 }
990 
991 static int
992 cuse_free_unit_by_id_locked(struct cuse_server *pcs, int id)
993 {
994 	int n;
995 	int found = 0;
996 
997 	for (n = 0; n != CUSE_DEVICES_MAX; n++) {
998 		if (cuse_alloc_unit[n] == pcs) {
999 			if (cuse_alloc_unit_id[n] == id || id == -1) {
1000 				cuse_alloc_unit[n] = NULL;
1001 				cuse_alloc_unit_id[n] = 0;
1002 				found = 1;
1003 			}
1004 		}
1005 	}
1006 
1007 	return (found ? 0 : EINVAL);
1008 }
1009 
1010 static int
1011 cuse_server_ioctl(struct cdev *dev, unsigned long cmd,
1012     caddr_t data, int fflag, struct thread *td)
1013 {
1014 	struct cuse_server *pcs;
1015 	int error;
1016 
1017 	error = cuse_server_get(&pcs);
1018 	if (error != 0)
1019 		return (error);
1020 
1021 	switch (cmd) {
1022 		struct cuse_client_command *pccmd;
1023 		struct cuse_client *pcc;
1024 		struct cuse_command *pcmd;
1025 		struct cuse_alloc_info *pai;
1026 		struct cuse_create_dev *pcd;
1027 		struct cuse_server_dev *pcsd;
1028 		struct cuse_data_chunk *pchk;
1029 		int n;
1030 
1031 	case CUSE_IOCTL_GET_COMMAND:
1032 		pcmd = (void *)data;
1033 
1034 		cuse_lock();
1035 
1036 		while ((pccmd = TAILQ_FIRST(&pcs->head)) == NULL) {
1037 			error = cv_wait_sig(&pcs->cv, &cuse_mtx);
1038 
1039 			if (pcs->is_closing)
1040 				error = ENXIO;
1041 
1042 			if (error) {
1043 				cuse_unlock();
1044 				return (error);
1045 			}
1046 		}
1047 
1048 		TAILQ_REMOVE(&pcs->head, pccmd, entry);
1049 		pccmd->entry.tqe_prev = NULL;
1050 
1051 		pccmd->entered = curthread;
1052 
1053 		*pcmd = pccmd->sub;
1054 
1055 		cuse_unlock();
1056 
1057 		break;
1058 
1059 	case CUSE_IOCTL_SYNC_COMMAND:
1060 
1061 		cuse_lock();
1062 		while ((pccmd = cuse_server_find_command(pcs, curthread)) != NULL) {
1063 
1064 			/* send sync command */
1065 			pccmd->entered = NULL;
1066 			pccmd->error = *(int *)data;
1067 			pccmd->command = CUSE_CMD_SYNC;
1068 
1069 			/* signal peer, if any */
1070 			cv_signal(&pccmd->cv);
1071 		}
1072 		cuse_unlock();
1073 
1074 		break;
1075 
1076 	case CUSE_IOCTL_ALLOC_UNIT:
1077 
1078 		cuse_lock();
1079 		n = cuse_alloc_unit_by_id_locked(pcs,
1080 		    CUSE_ID_DEFAULT(0));
1081 		cuse_unlock();
1082 
1083 		if (n < 0)
1084 			error = ENOMEM;
1085 		else
1086 			*(int *)data = n;
1087 		break;
1088 
1089 	case CUSE_IOCTL_ALLOC_UNIT_BY_ID:
1090 
1091 		n = *(int *)data;
1092 
1093 		n = (n & CUSE_ID_MASK);
1094 
1095 		cuse_lock();
1096 		n = cuse_alloc_unit_by_id_locked(pcs, n);
1097 		cuse_unlock();
1098 
1099 		if (n < 0)
1100 			error = ENOMEM;
1101 		else
1102 			*(int *)data = n;
1103 		break;
1104 
1105 	case CUSE_IOCTL_FREE_UNIT:
1106 
1107 		n = *(int *)data;
1108 
1109 		n = CUSE_ID_DEFAULT(n);
1110 
1111 		cuse_lock();
1112 		error = cuse_free_unit_by_id_locked(pcs, n);
1113 		cuse_unlock();
1114 		break;
1115 
1116 	case CUSE_IOCTL_FREE_UNIT_BY_ID:
1117 
1118 		n = *(int *)data;
1119 
1120 		cuse_lock();
1121 		error = cuse_free_unit_by_id_locked(pcs, n);
1122 		cuse_unlock();
1123 		break;
1124 
1125 	case CUSE_IOCTL_ALLOC_MEMORY:
1126 
1127 		pai = (void *)data;
1128 
1129 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1130 			error = ENOMEM;
1131 			break;
1132 		}
1133 		if (pai->page_count >= CUSE_ALLOC_PAGES_MAX) {
1134 			error = ENOMEM;
1135 			break;
1136 		}
1137 		error = cuse_server_alloc_memory(pcs,
1138 		    pai->alloc_nr, pai->page_count);
1139 		break;
1140 
1141 	case CUSE_IOCTL_FREE_MEMORY:
1142 		pai = (void *)data;
1143 
1144 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1145 			error = ENOMEM;
1146 			break;
1147 		}
1148 		error = cuse_server_free_memory(pcs, pai->alloc_nr);
1149 		break;
1150 
1151 	case CUSE_IOCTL_GET_SIG:
1152 
1153 		cuse_lock();
1154 		pccmd = cuse_server_find_command(pcs, curthread);
1155 
1156 		if (pccmd != NULL) {
1157 			n = pccmd->got_signal;
1158 			pccmd->got_signal = 0;
1159 		} else {
1160 			n = 0;
1161 		}
1162 		cuse_unlock();
1163 
1164 		*(int *)data = n;
1165 
1166 		break;
1167 
1168 	case CUSE_IOCTL_SET_PFH:
1169 
1170 		cuse_lock();
1171 		pccmd = cuse_server_find_command(pcs, curthread);
1172 
1173 		if (pccmd != NULL) {
1174 			pcc = pccmd->client;
1175 			for (n = 0; n != CUSE_CMD_MAX; n++) {
1176 				pcc->cmds[n].sub.per_file_handle = *(uintptr_t *)data;
1177 			}
1178 		} else {
1179 			error = ENXIO;
1180 		}
1181 		cuse_unlock();
1182 		break;
1183 
1184 	case CUSE_IOCTL_CREATE_DEV:
1185 
1186 		error = priv_check(curthread, PRIV_DRIVER);
1187 		if (error)
1188 			break;
1189 
1190 		pcd = (void *)data;
1191 
1192 		/* filter input */
1193 
1194 		pcd->devname[sizeof(pcd->devname) - 1] = 0;
1195 
1196 		if (pcd->devname[0] == 0) {
1197 			error = EINVAL;
1198 			break;
1199 		}
1200 		cuse_str_filter(pcd->devname);
1201 
1202 		pcd->permissions &= 0777;
1203 
1204 		/* try to allocate a character device */
1205 
1206 		pcsd = malloc(sizeof(*pcsd), M_CUSE, M_WAITOK | M_ZERO);
1207 
1208 		if (pcsd == NULL) {
1209 			error = ENOMEM;
1210 			break;
1211 		}
1212 		pcsd->server = pcs;
1213 
1214 		pcsd->user_dev = pcd->dev;
1215 
1216 		pcsd->kern_dev = make_dev_credf(MAKEDEV_CHECKNAME,
1217 		    &cuse_client_devsw, 0, NULL, pcd->user_id, pcd->group_id,
1218 		    pcd->permissions, "%s", pcd->devname);
1219 
1220 		if (pcsd->kern_dev == NULL) {
1221 			free(pcsd, M_CUSE);
1222 			error = ENOMEM;
1223 			break;
1224 		}
1225 		pcsd->kern_dev->si_drv1 = pcsd;
1226 
1227 		cuse_lock();
1228 		TAILQ_INSERT_TAIL(&pcs->hdev, pcsd, entry);
1229 		cuse_unlock();
1230 
1231 		break;
1232 
1233 	case CUSE_IOCTL_DESTROY_DEV:
1234 
1235 		error = priv_check(curthread, PRIV_DRIVER);
1236 		if (error)
1237 			break;
1238 
1239 		cuse_lock();
1240 
1241 		error = EINVAL;
1242 
1243 		pcsd = TAILQ_FIRST(&pcs->hdev);
1244 		while (pcsd != NULL) {
1245 			if (pcsd->user_dev == *(struct cuse_dev **)data) {
1246 				TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
1247 				cuse_unlock();
1248 				cuse_server_free_dev(pcsd);
1249 				cuse_lock();
1250 				error = 0;
1251 				pcsd = TAILQ_FIRST(&pcs->hdev);
1252 			} else {
1253 				pcsd = TAILQ_NEXT(pcsd, entry);
1254 			}
1255 		}
1256 
1257 		cuse_unlock();
1258 		break;
1259 
1260 	case CUSE_IOCTL_WRITE_DATA:
1261 	case CUSE_IOCTL_READ_DATA:
1262 
1263 		cuse_lock();
1264 		pchk = (struct cuse_data_chunk *)data;
1265 
1266 		pccmd = cuse_server_find_command(pcs, curthread);
1267 
1268 		if (pccmd == NULL) {
1269 			error = ENXIO;	/* invalid request */
1270 		} else if (pchk->peer_ptr < CUSE_BUF_MIN_PTR) {
1271 			error = EFAULT;	/* NULL pointer */
1272 		} else if (pchk->peer_ptr < CUSE_BUF_MAX_PTR) {
1273 			error = cuse_server_ioctl_copy_locked(pccmd,
1274 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1275 		} else {
1276 			error = cuse_server_data_copy_locked(pccmd,
1277 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1278 		}
1279 		cuse_unlock();
1280 		break;
1281 
1282 	case CUSE_IOCTL_SELWAKEUP:
1283 		cuse_lock();
1284 		/*
1285 		 * We don't know which direction caused the event.
1286 		 * Wakeup both!
1287 		 */
1288 		cuse_server_wakeup_all_client_locked(pcs);
1289 		cuse_unlock();
1290 		break;
1291 
1292 	default:
1293 		error = ENXIO;
1294 		break;
1295 	}
1296 	return (error);
1297 }
1298 
1299 static int
1300 cuse_server_poll(struct cdev *dev, int events, struct thread *td)
1301 {
1302 	return (events & (POLLHUP | POLLPRI | POLLIN |
1303 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1304 }
1305 
1306 static int
1307 cuse_server_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1308     vm_size_t size, struct vm_object **object, int nprot)
1309 {
1310 	uint32_t page_nr = *offset / PAGE_SIZE;
1311 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1312 	struct cuse_memory *mem;
1313 	struct cuse_server *pcs;
1314 	int error;
1315 
1316 	error = cuse_server_get(&pcs);
1317 	if (error != 0)
1318 		return (error);
1319 
1320 	cuse_lock();
1321 	/* lookup memory structure */
1322 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
1323 		if (mem->alloc_nr == alloc_nr)
1324 			break;
1325 	}
1326 	if (mem == NULL) {
1327 		cuse_unlock();
1328 		return (ENOMEM);
1329 	}
1330 	/* verify page offset */
1331 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1332 	if (page_nr >= mem->page_count) {
1333 		cuse_unlock();
1334 		return (ENXIO);
1335 	}
1336 	/* verify mmap size */
1337 	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1338 	    (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1339 		cuse_unlock();
1340 		return (EINVAL);
1341 	}
1342 	vm_object_reference(mem->object);
1343 	*object = mem->object;
1344 	cuse_unlock();
1345 
1346 	/* set new VM object offset to use */
1347 	*offset = page_nr * PAGE_SIZE;
1348 
1349 	/* success */
1350 	return (0);
1351 }
1352 
1353 /*------------------------------------------------------------------------*
1354  *	CUSE CLIENT PART
1355  *------------------------------------------------------------------------*/
1356 static void
1357 cuse_client_free(void *arg)
1358 {
1359 	struct cuse_client *pcc = arg;
1360 	struct cuse_client_command *pccmd;
1361 	struct cuse_server *pcs;
1362 	int n;
1363 
1364 	cuse_lock();
1365 	cuse_client_is_closing(pcc);
1366 	TAILQ_REMOVE(&pcc->server->hcli, pcc, entry);
1367 	cuse_unlock();
1368 
1369 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1370 
1371 		pccmd = &pcc->cmds[n];
1372 
1373 		sx_destroy(&pccmd->sx);
1374 		cv_destroy(&pccmd->cv);
1375 	}
1376 
1377 	pcs = pcc->server;
1378 
1379 	free(pcc, M_CUSE);
1380 
1381 	/* drop reference on server */
1382 	cuse_server_unref(pcs);
1383 }
1384 
1385 static int
1386 cuse_client_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
1387 {
1388 	struct cuse_client_command *pccmd;
1389 	struct cuse_server_dev *pcsd;
1390 	struct cuse_client *pcc;
1391 	struct cuse_server *pcs;
1392 	struct cuse_dev *pcd;
1393 	int error;
1394 	int n;
1395 
1396 	cuse_lock();
1397 	pcsd = dev->si_drv1;
1398 	if (pcsd != NULL) {
1399 		pcs = pcsd->server;
1400 		pcd = pcsd->user_dev;
1401 		/*
1402 		 * Check that the refcount didn't wrap and that the
1403 		 * same process is not both client and server. This
1404 		 * can easily lead to deadlocks when destroying the
1405 		 * CUSE character device nodes:
1406 		 */
1407 		pcs->refs++;
1408 		if (pcs->refs < 0 || pcs->pid == curproc->p_pid) {
1409 			/* overflow or wrong PID */
1410 			pcs->refs--;
1411 			pcsd = NULL;
1412 		}
1413 	} else {
1414 		pcs = NULL;
1415 		pcd = NULL;
1416 	}
1417 	cuse_unlock();
1418 
1419 	if (pcsd == NULL)
1420 		return (EINVAL);
1421 
1422 	pcc = malloc(sizeof(*pcc), M_CUSE, M_WAITOK | M_ZERO);
1423 	if (pcc == NULL) {
1424 		/* drop reference on server */
1425 		cuse_server_unref(pcs);
1426 		return (ENOMEM);
1427 	}
1428 	if (devfs_set_cdevpriv(pcc, &cuse_client_free)) {
1429 		printf("Cuse: Cannot set cdevpriv.\n");
1430 		/* drop reference on server */
1431 		cuse_server_unref(pcs);
1432 		free(pcc, M_CUSE);
1433 		return (ENOMEM);
1434 	}
1435 	pcc->fflags = fflags;
1436 	pcc->server_dev = pcsd;
1437 	pcc->server = pcs;
1438 
1439 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1440 
1441 		pccmd = &pcc->cmds[n];
1442 
1443 		pccmd->sub.dev = pcd;
1444 		pccmd->sub.command = n;
1445 		pccmd->client = pcc;
1446 
1447 		sx_init(&pccmd->sx, "cuse-client-sx");
1448 		cv_init(&pccmd->cv, "cuse-client-cv");
1449 	}
1450 
1451 	cuse_lock();
1452 
1453 	/* cuse_client_free() assumes that the client is listed somewhere! */
1454 	/* always enqueue */
1455 
1456 	TAILQ_INSERT_TAIL(&pcs->hcli, pcc, entry);
1457 
1458 	/* check if server is closing */
1459 	if ((pcs->is_closing != 0) || (dev->si_drv1 == NULL)) {
1460 		error = EINVAL;
1461 	} else {
1462 		error = 0;
1463 	}
1464 	cuse_unlock();
1465 
1466 	if (error) {
1467 		devfs_clear_cdevpriv();	/* XXX bugfix */
1468 		return (error);
1469 	}
1470 	pccmd = &pcc->cmds[CUSE_CMD_OPEN];
1471 
1472 	cuse_cmd_lock(pccmd);
1473 
1474 	cuse_lock();
1475 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1476 
1477 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1478 	cuse_unlock();
1479 
1480 	if (error < 0) {
1481 		error = cuse_convert_error(error);
1482 	} else {
1483 		error = 0;
1484 	}
1485 
1486 	cuse_cmd_unlock(pccmd);
1487 
1488 	if (error)
1489 		devfs_clear_cdevpriv();	/* XXX bugfix */
1490 
1491 	return (error);
1492 }
1493 
1494 static int
1495 cuse_client_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
1496 {
1497 	struct cuse_client_command *pccmd;
1498 	struct cuse_client *pcc;
1499 	int error;
1500 
1501 	error = cuse_client_get(&pcc);
1502 	if (error != 0)
1503 		return (0);
1504 
1505 	pccmd = &pcc->cmds[CUSE_CMD_CLOSE];
1506 
1507 	cuse_cmd_lock(pccmd);
1508 
1509 	cuse_lock();
1510 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1511 
1512 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1513 	cuse_unlock();
1514 
1515 	cuse_cmd_unlock(pccmd);
1516 
1517 	cuse_lock();
1518 	cuse_client_is_closing(pcc);
1519 	cuse_unlock();
1520 
1521 	return (0);
1522 }
1523 
1524 static void
1525 cuse_client_kqfilter_poll(struct cdev *dev, struct cuse_client *pcc)
1526 {
1527 	int temp;
1528 
1529 	cuse_lock();
1530 	temp = (pcc->cflags & (CUSE_CLI_KNOTE_HAS_READ |
1531 	    CUSE_CLI_KNOTE_HAS_WRITE));
1532 	pcc->cflags &= ~(CUSE_CLI_KNOTE_NEED_READ |
1533 	    CUSE_CLI_KNOTE_NEED_WRITE);
1534 	cuse_unlock();
1535 
1536 	if (temp != 0) {
1537 		/* get the latest polling state from the server */
1538 		temp = cuse_client_poll(dev, POLLIN | POLLOUT, NULL);
1539 
1540 		if (temp & (POLLIN | POLLOUT)) {
1541 			cuse_lock();
1542 			if (temp & POLLIN)
1543 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_READ;
1544 			if (temp & POLLOUT)
1545 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_WRITE;
1546 
1547 			/* make sure the "knote" gets woken up */
1548 			cuse_server_wakeup_locked(pcc->server);
1549 			cuse_unlock();
1550 		}
1551 	}
1552 }
1553 
1554 static int
1555 cuse_client_read(struct cdev *dev, struct uio *uio, int ioflag)
1556 {
1557 	struct cuse_client_command *pccmd;
1558 	struct cuse_client *pcc;
1559 	int error;
1560 	int len;
1561 
1562 	error = cuse_client_get(&pcc);
1563 	if (error != 0)
1564 		return (error);
1565 
1566 	pccmd = &pcc->cmds[CUSE_CMD_READ];
1567 
1568 	if (uio->uio_segflg != UIO_USERSPACE) {
1569 		return (EINVAL);
1570 	}
1571 	uio->uio_segflg = UIO_NOCOPY;
1572 
1573 	cuse_cmd_lock(pccmd);
1574 
1575 	while (uio->uio_resid != 0) {
1576 
1577 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1578 			error = ENOMEM;
1579 			break;
1580 		}
1581 		len = uio->uio_iov->iov_len;
1582 
1583 		cuse_lock();
1584 		cuse_client_send_command_locked(pccmd,
1585 		    (uintptr_t)uio->uio_iov->iov_base,
1586 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1587 
1588 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1589 		cuse_unlock();
1590 
1591 		if (error < 0) {
1592 			error = cuse_convert_error(error);
1593 			break;
1594 		} else if (error == len) {
1595 			error = uiomove(NULL, error, uio);
1596 			if (error)
1597 				break;
1598 		} else {
1599 			error = uiomove(NULL, error, uio);
1600 			break;
1601 		}
1602 	}
1603 	cuse_cmd_unlock(pccmd);
1604 
1605 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1606 
1607 	if (error == EWOULDBLOCK)
1608 		cuse_client_kqfilter_poll(dev, pcc);
1609 
1610 	return (error);
1611 }
1612 
1613 static int
1614 cuse_client_write(struct cdev *dev, struct uio *uio, int ioflag)
1615 {
1616 	struct cuse_client_command *pccmd;
1617 	struct cuse_client *pcc;
1618 	int error;
1619 	int len;
1620 
1621 	error = cuse_client_get(&pcc);
1622 	if (error != 0)
1623 		return (error);
1624 
1625 	pccmd = &pcc->cmds[CUSE_CMD_WRITE];
1626 
1627 	if (uio->uio_segflg != UIO_USERSPACE) {
1628 		return (EINVAL);
1629 	}
1630 	uio->uio_segflg = UIO_NOCOPY;
1631 
1632 	cuse_cmd_lock(pccmd);
1633 
1634 	while (uio->uio_resid != 0) {
1635 
1636 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1637 			error = ENOMEM;
1638 			break;
1639 		}
1640 		len = uio->uio_iov->iov_len;
1641 
1642 		cuse_lock();
1643 		cuse_client_send_command_locked(pccmd,
1644 		    (uintptr_t)uio->uio_iov->iov_base,
1645 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1646 
1647 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1648 		cuse_unlock();
1649 
1650 		if (error < 0) {
1651 			error = cuse_convert_error(error);
1652 			break;
1653 		} else if (error == len) {
1654 			error = uiomove(NULL, error, uio);
1655 			if (error)
1656 				break;
1657 		} else {
1658 			error = uiomove(NULL, error, uio);
1659 			break;
1660 		}
1661 	}
1662 	cuse_cmd_unlock(pccmd);
1663 
1664 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1665 
1666 	if (error == EWOULDBLOCK)
1667 		cuse_client_kqfilter_poll(dev, pcc);
1668 
1669 	return (error);
1670 }
1671 
1672 int
1673 cuse_client_ioctl(struct cdev *dev, unsigned long cmd,
1674     caddr_t data, int fflag, struct thread *td)
1675 {
1676 	struct cuse_client_command *pccmd;
1677 	struct cuse_client *pcc;
1678 	int error;
1679 	int len;
1680 
1681 	error = cuse_client_get(&pcc);
1682 	if (error != 0)
1683 		return (error);
1684 
1685 	len = IOCPARM_LEN(cmd);
1686 	if (len > CUSE_BUFFER_MAX)
1687 		return (ENOMEM);
1688 
1689 	pccmd = &pcc->cmds[CUSE_CMD_IOCTL];
1690 
1691 	cuse_cmd_lock(pccmd);
1692 
1693 	if (cmd & (IOC_IN | IOC_VOID))
1694 		memcpy(pcc->ioctl_buffer, data, len);
1695 
1696 	/*
1697 	 * When the ioctl-length is zero drivers can pass information
1698 	 * through the data pointer of the ioctl. Make sure this information
1699 	 * is forwarded to the driver.
1700 	 */
1701 
1702 	cuse_lock();
1703 	cuse_client_send_command_locked(pccmd,
1704 	    (len == 0) ? *(long *)data : CUSE_BUF_MIN_PTR,
1705 	    (unsigned long)cmd, pcc->fflags,
1706 	    (fflag & O_NONBLOCK) ? IO_NDELAY : 0);
1707 
1708 	error = cuse_client_receive_command_locked(pccmd, data, len);
1709 	cuse_unlock();
1710 
1711 	if (error < 0) {
1712 		error = cuse_convert_error(error);
1713 	} else {
1714 		error = 0;
1715 	}
1716 
1717 	if (cmd & IOC_OUT)
1718 		memcpy(data, pcc->ioctl_buffer, len);
1719 
1720 	cuse_cmd_unlock(pccmd);
1721 
1722 	if (error == EWOULDBLOCK)
1723 		cuse_client_kqfilter_poll(dev, pcc);
1724 
1725 	return (error);
1726 }
1727 
1728 static int
1729 cuse_client_poll(struct cdev *dev, int events, struct thread *td)
1730 {
1731 	struct cuse_client_command *pccmd;
1732 	struct cuse_client *pcc;
1733 	unsigned long temp;
1734 	int error;
1735 	int revents;
1736 
1737 	error = cuse_client_get(&pcc);
1738 	if (error != 0)
1739 		goto pollnval;
1740 
1741 	temp = 0;
1742 
1743 	if (events & (POLLPRI | POLLIN | POLLRDNORM))
1744 		temp |= CUSE_POLL_READ;
1745 
1746 	if (events & (POLLOUT | POLLWRNORM))
1747 		temp |= CUSE_POLL_WRITE;
1748 
1749 	if (events & POLLHUP)
1750 		temp |= CUSE_POLL_ERROR;
1751 
1752 	pccmd = &pcc->cmds[CUSE_CMD_POLL];
1753 
1754 	cuse_cmd_lock(pccmd);
1755 
1756 	/* Need to selrecord() first to not loose any events. */
1757 	if (temp != 0 && td != NULL)
1758 		selrecord(td, &pcc->server->selinfo);
1759 
1760 	cuse_lock();
1761 	cuse_client_send_command_locked(pccmd,
1762 	    0, temp, pcc->fflags, IO_NDELAY);
1763 
1764 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1765 	cuse_unlock();
1766 
1767 	cuse_cmd_unlock(pccmd);
1768 
1769 	if (error < 0) {
1770 		goto pollnval;
1771 	} else {
1772 		revents = 0;
1773 		if (error & CUSE_POLL_READ)
1774 			revents |= (events & (POLLPRI | POLLIN | POLLRDNORM));
1775 		if (error & CUSE_POLL_WRITE)
1776 			revents |= (events & (POLLOUT | POLLWRNORM));
1777 		if (error & CUSE_POLL_ERROR)
1778 			revents |= (events & POLLHUP);
1779 	}
1780 	return (revents);
1781 
1782 pollnval:
1783 	/* XXX many clients don't understand POLLNVAL */
1784 	return (events & (POLLHUP | POLLPRI | POLLIN |
1785 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1786 }
1787 
1788 static int
1789 cuse_client_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1790     vm_size_t size, struct vm_object **object, int nprot)
1791 {
1792 	uint32_t page_nr = *offset / PAGE_SIZE;
1793 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1794 	struct cuse_memory *mem;
1795 	struct cuse_client *pcc;
1796 	int error;
1797 
1798 	error = cuse_client_get(&pcc);
1799 	if (error != 0)
1800 		return (error);
1801 
1802 	cuse_lock();
1803 	/* lookup memory structure */
1804 	TAILQ_FOREACH(mem, &pcc->server->hmem, entry) {
1805 		if (mem->alloc_nr == alloc_nr)
1806 			break;
1807 	}
1808 	if (mem == NULL) {
1809 		cuse_unlock();
1810 		return (ENOMEM);
1811 	}
1812 	/* verify page offset */
1813 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1814 	if (page_nr >= mem->page_count) {
1815 		cuse_unlock();
1816 		return (ENXIO);
1817 	}
1818 	/* verify mmap size */
1819 	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1820 	    (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1821 		cuse_unlock();
1822 		return (EINVAL);
1823 	}
1824 	vm_object_reference(mem->object);
1825 	*object = mem->object;
1826 	cuse_unlock();
1827 
1828 	/* set new VM object offset to use */
1829 	*offset = page_nr * PAGE_SIZE;
1830 
1831 	/* success */
1832 	return (0);
1833 }
1834 
1835 static void
1836 cuse_client_kqfilter_read_detach(struct knote *kn)
1837 {
1838 	struct cuse_client *pcc;
1839 
1840 	cuse_lock();
1841 	pcc = kn->kn_hook;
1842 	knlist_remove(&pcc->server->selinfo.si_note, kn, 1);
1843 	cuse_unlock();
1844 }
1845 
1846 static void
1847 cuse_client_kqfilter_write_detach(struct knote *kn)
1848 {
1849 	struct cuse_client *pcc;
1850 
1851 	cuse_lock();
1852 	pcc = kn->kn_hook;
1853 	knlist_remove(&pcc->server->selinfo.si_note, kn, 1);
1854 	cuse_unlock();
1855 }
1856 
1857 static int
1858 cuse_client_kqfilter_read_event(struct knote *kn, long hint)
1859 {
1860 	struct cuse_client *pcc;
1861 
1862 	mtx_assert(&cuse_mtx, MA_OWNED);
1863 
1864 	pcc = kn->kn_hook;
1865 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_READ) ? 1 : 0);
1866 }
1867 
1868 static int
1869 cuse_client_kqfilter_write_event(struct knote *kn, long hint)
1870 {
1871 	struct cuse_client *pcc;
1872 
1873 	mtx_assert(&cuse_mtx, MA_OWNED);
1874 
1875 	pcc = kn->kn_hook;
1876 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_WRITE) ? 1 : 0);
1877 }
1878 
1879 static int
1880 cuse_client_kqfilter(struct cdev *dev, struct knote *kn)
1881 {
1882 	struct cuse_client *pcc;
1883 	struct cuse_server *pcs;
1884 	int error;
1885 
1886 	error = cuse_client_get(&pcc);
1887 	if (error != 0)
1888 		return (error);
1889 
1890 	cuse_lock();
1891 	pcs = pcc->server;
1892 	switch (kn->kn_filter) {
1893 	case EVFILT_READ:
1894 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_READ;
1895 		kn->kn_hook = pcc;
1896 		kn->kn_fop = &cuse_client_kqfilter_read_ops;
1897 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1898 		break;
1899 	case EVFILT_WRITE:
1900 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_WRITE;
1901 		kn->kn_hook = pcc;
1902 		kn->kn_fop = &cuse_client_kqfilter_write_ops;
1903 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1904 		break;
1905 	default:
1906 		error = EINVAL;
1907 		break;
1908 	}
1909 	cuse_unlock();
1910 
1911 	if (error == 0)
1912 		cuse_client_kqfilter_poll(dev, pcc);
1913 	return (error);
1914 }
1915