xref: /freebsd/sys/fs/cuse/cuse.c (revision e17f5b1d)
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2010-2020 Hans Petter Selasky. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/stdint.h>
28 #include <sys/stddef.h>
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/systm.h>
32 #include <sys/conf.h>
33 #include <sys/kernel.h>
34 #include <sys/bus.h>
35 #include <sys/linker_set.h>
36 #include <sys/module.h>
37 #include <sys/lock.h>
38 #include <sys/mutex.h>
39 #include <sys/condvar.h>
40 #include <sys/sysctl.h>
41 #include <sys/unistd.h>
42 #include <sys/malloc.h>
43 #include <sys/priv.h>
44 #include <sys/uio.h>
45 #include <sys/poll.h>
46 #include <sys/sx.h>
47 #include <sys/rwlock.h>
48 #include <sys/queue.h>
49 #include <sys/fcntl.h>
50 #include <sys/proc.h>
51 #include <sys/vnode.h>
52 #include <sys/selinfo.h>
53 #include <sys/ptrace.h>
54 #include <sys/sysent.h>
55 
56 #include <machine/bus.h>
57 
58 #include <vm/vm.h>
59 #include <vm/pmap.h>
60 #include <vm/vm_object.h>
61 #include <vm/vm_page.h>
62 #include <vm/vm_pager.h>
63 
64 #include <fs/cuse/cuse_defs.h>
65 #include <fs/cuse/cuse_ioctl.h>
66 
67 MODULE_VERSION(cuse, 1);
68 
69 /*
70  * Prevent cuse4bsd.ko and cuse.ko from loading at the same time by
71  * declaring support for the cuse4bsd interface in cuse.ko:
72  */
73 MODULE_VERSION(cuse4bsd, 1);
74 
75 #ifdef FEATURE
76 FEATURE(cuse, "Userspace character devices");
77 #endif
78 
79 struct cuse_command;
80 struct cuse_server;
81 struct cuse_client;
82 
83 struct cuse_client_command {
84 	TAILQ_ENTRY(cuse_client_command) entry;
85 	struct cuse_command sub;
86 	struct sx sx;
87 	struct cv cv;
88 	struct thread *entered;
89 	struct cuse_client *client;
90 	struct proc *proc_curr;
91 	int	proc_refs;
92 	int	got_signal;
93 	int	error;
94 	int	command;
95 };
96 
97 struct cuse_memory {
98 	TAILQ_ENTRY(cuse_memory) entry;
99 	vm_object_t object;
100 	uint32_t page_count;
101 	uint32_t alloc_nr;
102 };
103 
104 struct cuse_server_dev {
105 	TAILQ_ENTRY(cuse_server_dev) entry;
106 	struct cuse_server *server;
107 	struct cdev *kern_dev;
108 	struct cuse_dev *user_dev;
109 };
110 
111 struct cuse_server {
112 	TAILQ_ENTRY(cuse_server) entry;
113 	TAILQ_HEAD(, cuse_client_command) head;
114 	TAILQ_HEAD(, cuse_server_dev) hdev;
115 	TAILQ_HEAD(, cuse_client) hcli;
116 	TAILQ_HEAD(, cuse_memory) hmem;
117 	struct mtx mtx;
118 	struct cv cv;
119 	struct selinfo selinfo;
120 	pid_t	pid;
121 	int	is_closing;
122 	int	refs;
123 };
124 
125 struct cuse_client {
126 	TAILQ_ENTRY(cuse_client) entry;
127 	TAILQ_ENTRY(cuse_client) entry_ref;
128 	struct cuse_client_command cmds[CUSE_CMD_MAX];
129 	struct cuse_server *server;
130 	struct cuse_server_dev *server_dev;
131 
132 	uint8_t	ioctl_buffer[CUSE_BUFFER_MAX] __aligned(4);
133 
134 	int	fflags;			/* file flags */
135 	int	cflags;			/* client flags */
136 #define	CUSE_CLI_IS_CLOSING 0x01
137 #define	CUSE_CLI_KNOTE_NEED_READ 0x02
138 #define	CUSE_CLI_KNOTE_NEED_WRITE 0x04
139 #define	CUSE_CLI_KNOTE_HAS_READ 0x08
140 #define	CUSE_CLI_KNOTE_HAS_WRITE 0x10
141 };
142 
143 #define	CUSE_CLIENT_CLOSING(pcc) \
144     ((pcc)->cflags & CUSE_CLI_IS_CLOSING)
145 
146 static	MALLOC_DEFINE(M_CUSE, "cuse", "CUSE memory");
147 
148 static TAILQ_HEAD(, cuse_server) cuse_server_head;
149 static struct mtx cuse_global_mtx;
150 static struct cdev *cuse_dev;
151 static struct cuse_server *cuse_alloc_unit[CUSE_DEVICES_MAX];
152 static int cuse_alloc_unit_id[CUSE_DEVICES_MAX];
153 
154 static void cuse_server_wakeup_all_client_locked(struct cuse_server *pcs);
155 static void cuse_client_kqfilter_read_detach(struct knote *kn);
156 static void cuse_client_kqfilter_write_detach(struct knote *kn);
157 static int cuse_client_kqfilter_read_event(struct knote *kn, long hint);
158 static int cuse_client_kqfilter_write_event(struct knote *kn, long hint);
159 
160 static struct filterops cuse_client_kqfilter_read_ops = {
161 	.f_isfd = 1,
162 	.f_detach = cuse_client_kqfilter_read_detach,
163 	.f_event = cuse_client_kqfilter_read_event,
164 };
165 
166 static struct filterops cuse_client_kqfilter_write_ops = {
167 	.f_isfd = 1,
168 	.f_detach = cuse_client_kqfilter_write_detach,
169 	.f_event = cuse_client_kqfilter_write_event,
170 };
171 
172 static d_open_t cuse_client_open;
173 static d_close_t cuse_client_close;
174 static d_ioctl_t cuse_client_ioctl;
175 static d_read_t cuse_client_read;
176 static d_write_t cuse_client_write;
177 static d_poll_t cuse_client_poll;
178 static d_mmap_single_t cuse_client_mmap_single;
179 static d_kqfilter_t cuse_client_kqfilter;
180 
181 static struct cdevsw cuse_client_devsw = {
182 	.d_version = D_VERSION,
183 	.d_open = cuse_client_open,
184 	.d_close = cuse_client_close,
185 	.d_ioctl = cuse_client_ioctl,
186 	.d_name = "cuse_client",
187 	.d_flags = D_TRACKCLOSE,
188 	.d_read = cuse_client_read,
189 	.d_write = cuse_client_write,
190 	.d_poll = cuse_client_poll,
191 	.d_mmap_single = cuse_client_mmap_single,
192 	.d_kqfilter = cuse_client_kqfilter,
193 };
194 
195 static d_open_t cuse_server_open;
196 static d_close_t cuse_server_close;
197 static d_ioctl_t cuse_server_ioctl;
198 static d_read_t cuse_server_read;
199 static d_write_t cuse_server_write;
200 static d_poll_t cuse_server_poll;
201 static d_mmap_single_t cuse_server_mmap_single;
202 
203 static struct cdevsw cuse_server_devsw = {
204 	.d_version = D_VERSION,
205 	.d_open = cuse_server_open,
206 	.d_close = cuse_server_close,
207 	.d_ioctl = cuse_server_ioctl,
208 	.d_name = "cuse_server",
209 	.d_flags = D_TRACKCLOSE,
210 	.d_read = cuse_server_read,
211 	.d_write = cuse_server_write,
212 	.d_poll = cuse_server_poll,
213 	.d_mmap_single = cuse_server_mmap_single,
214 };
215 
216 static void cuse_client_is_closing(struct cuse_client *);
217 static int cuse_free_unit_by_id_locked(struct cuse_server *, int);
218 
219 static void
220 cuse_global_lock(void)
221 {
222 	mtx_lock(&cuse_global_mtx);
223 }
224 
225 static void
226 cuse_global_unlock(void)
227 {
228 	mtx_unlock(&cuse_global_mtx);
229 }
230 
231 static void
232 cuse_server_lock(struct cuse_server *pcs)
233 {
234 	mtx_lock(&pcs->mtx);
235 }
236 
237 static void
238 cuse_server_unlock(struct cuse_server *pcs)
239 {
240 	mtx_unlock(&pcs->mtx);
241 }
242 
243 static void
244 cuse_cmd_lock(struct cuse_client_command *pccmd)
245 {
246 	sx_xlock(&pccmd->sx);
247 }
248 
249 static void
250 cuse_cmd_unlock(struct cuse_client_command *pccmd)
251 {
252 	sx_xunlock(&pccmd->sx);
253 }
254 
255 static void
256 cuse_kern_init(void *arg)
257 {
258 	TAILQ_INIT(&cuse_server_head);
259 
260 	mtx_init(&cuse_global_mtx, "cuse-global-mtx", NULL, MTX_DEF);
261 
262 	cuse_dev = make_dev(&cuse_server_devsw, 0,
263 	    UID_ROOT, GID_OPERATOR, 0600, "cuse");
264 
265 	printf("Cuse v%d.%d.%d @ /dev/cuse\n",
266 	    (CUSE_VERSION >> 16) & 0xFF, (CUSE_VERSION >> 8) & 0xFF,
267 	    (CUSE_VERSION >> 0) & 0xFF);
268 }
269 SYSINIT(cuse_kern_init, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_init, NULL);
270 
271 static void
272 cuse_kern_uninit(void *arg)
273 {
274 	void *ptr;
275 
276 	while (1) {
277 
278 		printf("Cuse: Please exit all /dev/cuse instances "
279 		    "and processes which have used this device.\n");
280 
281 		pause("DRAIN", 2 * hz);
282 
283 		cuse_global_lock();
284 		ptr = TAILQ_FIRST(&cuse_server_head);
285 		cuse_global_unlock();
286 
287 		if (ptr == NULL)
288 			break;
289 	}
290 
291 	if (cuse_dev != NULL)
292 		destroy_dev(cuse_dev);
293 
294 	mtx_destroy(&cuse_global_mtx);
295 }
296 SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, 0);
297 
298 static int
299 cuse_server_get(struct cuse_server **ppcs)
300 {
301 	struct cuse_server *pcs;
302 	int error;
303 
304 	error = devfs_get_cdevpriv((void **)&pcs);
305 	if (error != 0) {
306 		*ppcs = NULL;
307 		return (error);
308 	}
309 	if (pcs->is_closing) {
310 		*ppcs = NULL;
311 		return (EINVAL);
312 	}
313 	*ppcs = pcs;
314 	return (0);
315 }
316 
317 static void
318 cuse_server_is_closing(struct cuse_server *pcs)
319 {
320 	struct cuse_client *pcc;
321 
322 	if (pcs->is_closing)
323 		return;
324 
325 	pcs->is_closing = 1;
326 
327 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
328 		cuse_client_is_closing(pcc);
329 	}
330 }
331 
332 static struct cuse_client_command *
333 cuse_server_find_command(struct cuse_server *pcs, struct thread *td)
334 {
335 	struct cuse_client *pcc;
336 	int n;
337 
338 	if (pcs->is_closing)
339 		goto done;
340 
341 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
342 		if (CUSE_CLIENT_CLOSING(pcc))
343 			continue;
344 		for (n = 0; n != CUSE_CMD_MAX; n++) {
345 			if (pcc->cmds[n].entered == td)
346 				return (&pcc->cmds[n]);
347 		}
348 	}
349 done:
350 	return (NULL);
351 }
352 
353 static void
354 cuse_str_filter(char *ptr)
355 {
356 	int c;
357 
358 	while (((c = *ptr) != 0)) {
359 
360 		if ((c >= 'a') && (c <= 'z')) {
361 			ptr++;
362 			continue;
363 		}
364 		if ((c >= 'A') && (c <= 'Z')) {
365 			ptr++;
366 			continue;
367 		}
368 		if ((c >= '0') && (c <= '9')) {
369 			ptr++;
370 			continue;
371 		}
372 		if ((c == '.') || (c == '_') || (c == '/')) {
373 			ptr++;
374 			continue;
375 		}
376 		*ptr = '_';
377 
378 		ptr++;
379 	}
380 }
381 
382 static int
383 cuse_convert_error(int error)
384 {
385 	;				/* indent fix */
386 	switch (error) {
387 	case CUSE_ERR_NONE:
388 		return (0);
389 	case CUSE_ERR_BUSY:
390 		return (EBUSY);
391 	case CUSE_ERR_WOULDBLOCK:
392 		return (EWOULDBLOCK);
393 	case CUSE_ERR_INVALID:
394 		return (EINVAL);
395 	case CUSE_ERR_NO_MEMORY:
396 		return (ENOMEM);
397 	case CUSE_ERR_FAULT:
398 		return (EFAULT);
399 	case CUSE_ERR_SIGNAL:
400 		return (EINTR);
401 	case CUSE_ERR_NO_DEVICE:
402 		return (ENODEV);
403 	default:
404 		return (ENXIO);
405 	}
406 }
407 
408 static void
409 cuse_vm_memory_free(struct cuse_memory *mem)
410 {
411 	/* last user is gone - free */
412 	vm_object_deallocate(mem->object);
413 
414 	/* free CUSE memory */
415 	free(mem, M_CUSE);
416 }
417 
418 static int
419 cuse_server_alloc_memory(struct cuse_server *pcs, uint32_t alloc_nr,
420     uint32_t page_count)
421 {
422 	struct cuse_memory *temp;
423 	struct cuse_memory *mem;
424 	vm_object_t object;
425 	int error;
426 
427 	mem = malloc(sizeof(*mem), M_CUSE, M_WAITOK | M_ZERO);
428 	if (mem == NULL)
429 		return (ENOMEM);
430 
431 	object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * page_count,
432 	    VM_PROT_DEFAULT, 0, curthread->td_ucred);
433 	if (object == NULL) {
434 		error = ENOMEM;
435 		goto error_0;
436 	}
437 
438 	cuse_server_lock(pcs);
439 	/* check if allocation number already exists */
440 	TAILQ_FOREACH(temp, &pcs->hmem, entry) {
441 		if (temp->alloc_nr == alloc_nr)
442 			break;
443 	}
444 	if (temp != NULL) {
445 		cuse_server_unlock(pcs);
446 		error = EBUSY;
447 		goto error_1;
448 	}
449 	mem->object = object;
450 	mem->page_count = page_count;
451 	mem->alloc_nr = alloc_nr;
452 	TAILQ_INSERT_TAIL(&pcs->hmem, mem, entry);
453 	cuse_server_unlock(pcs);
454 
455 	return (0);
456 
457 error_1:
458 	vm_object_deallocate(object);
459 error_0:
460 	free(mem, M_CUSE);
461 	return (error);
462 }
463 
464 static int
465 cuse_server_free_memory(struct cuse_server *pcs, uint32_t alloc_nr)
466 {
467 	struct cuse_memory *mem;
468 
469 	cuse_server_lock(pcs);
470 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
471 		if (mem->alloc_nr == alloc_nr)
472 			break;
473 	}
474 	if (mem == NULL) {
475 		cuse_server_unlock(pcs);
476 		return (EINVAL);
477 	}
478 	TAILQ_REMOVE(&pcs->hmem, mem, entry);
479 	cuse_server_unlock(pcs);
480 
481 	cuse_vm_memory_free(mem);
482 
483 	return (0);
484 }
485 
486 static int
487 cuse_client_get(struct cuse_client **ppcc)
488 {
489 	struct cuse_client *pcc;
490 	int error;
491 
492 	/* try to get private data */
493 	error = devfs_get_cdevpriv((void **)&pcc);
494 	if (error != 0) {
495 		*ppcc = NULL;
496 		return (error);
497 	}
498 	if (CUSE_CLIENT_CLOSING(pcc) || pcc->server->is_closing) {
499 		*ppcc = NULL;
500 		return (EINVAL);
501 	}
502 	*ppcc = pcc;
503 	return (0);
504 }
505 
506 static void
507 cuse_client_is_closing(struct cuse_client *pcc)
508 {
509 	struct cuse_client_command *pccmd;
510 	uint32_t n;
511 
512 	if (CUSE_CLIENT_CLOSING(pcc))
513 		return;
514 
515 	pcc->cflags |= CUSE_CLI_IS_CLOSING;
516 	pcc->server_dev = NULL;
517 
518 	for (n = 0; n != CUSE_CMD_MAX; n++) {
519 
520 		pccmd = &pcc->cmds[n];
521 
522 		if (pccmd->entry.tqe_prev != NULL) {
523 			TAILQ_REMOVE(&pcc->server->head, pccmd, entry);
524 			pccmd->entry.tqe_prev = NULL;
525 		}
526 		cv_broadcast(&pccmd->cv);
527 	}
528 }
529 
530 static void
531 cuse_client_send_command_locked(struct cuse_client_command *pccmd,
532     uintptr_t data_ptr, unsigned long arg, int fflags, int ioflag)
533 {
534 	unsigned long cuse_fflags = 0;
535 	struct cuse_server *pcs;
536 
537 	if (fflags & FREAD)
538 		cuse_fflags |= CUSE_FFLAG_READ;
539 
540 	if (fflags & FWRITE)
541 		cuse_fflags |= CUSE_FFLAG_WRITE;
542 
543 	if (ioflag & IO_NDELAY)
544 		cuse_fflags |= CUSE_FFLAG_NONBLOCK;
545 #if defined(__LP64__)
546 	if (SV_CURPROC_FLAG(SV_ILP32))
547 		cuse_fflags |= CUSE_FFLAG_COMPAT32;
548 #endif
549 	pccmd->sub.fflags = cuse_fflags;
550 	pccmd->sub.data_pointer = data_ptr;
551 	pccmd->sub.argument = arg;
552 
553 	pcs = pccmd->client->server;
554 
555 	if ((pccmd->entry.tqe_prev == NULL) &&
556 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
557 	    (pcs->is_closing == 0)) {
558 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
559 		cv_signal(&pcs->cv);
560 	}
561 }
562 
563 static void
564 cuse_client_got_signal(struct cuse_client_command *pccmd)
565 {
566 	struct cuse_server *pcs;
567 
568 	pccmd->got_signal = 1;
569 
570 	pccmd = &pccmd->client->cmds[CUSE_CMD_SIGNAL];
571 
572 	pcs = pccmd->client->server;
573 
574 	if ((pccmd->entry.tqe_prev == NULL) &&
575 	    (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
576 	    (pcs->is_closing == 0)) {
577 		TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
578 		cv_signal(&pcs->cv);
579 	}
580 }
581 
582 static int
583 cuse_client_receive_command_locked(struct cuse_client_command *pccmd,
584     uint8_t *arg_ptr, uint32_t arg_len)
585 {
586 	struct cuse_server *pcs;
587 	int error;
588 
589 	pcs = pccmd->client->server;
590 	error = 0;
591 
592 	pccmd->proc_curr = curthread->td_proc;
593 
594 	if (CUSE_CLIENT_CLOSING(pccmd->client) || pcs->is_closing) {
595 		error = CUSE_ERR_OTHER;
596 		goto done;
597 	}
598 	while (pccmd->command == CUSE_CMD_NONE) {
599 		if (error != 0) {
600 			cv_wait(&pccmd->cv, &pcs->mtx);
601 		} else {
602 			error = cv_wait_sig(&pccmd->cv, &pcs->mtx);
603 
604 			if (error != 0)
605 				cuse_client_got_signal(pccmd);
606 		}
607 		if (CUSE_CLIENT_CLOSING(pccmd->client) || pcs->is_closing) {
608 			error = CUSE_ERR_OTHER;
609 			goto done;
610 		}
611 	}
612 
613 	error = pccmd->error;
614 	pccmd->command = CUSE_CMD_NONE;
615 	cv_signal(&pccmd->cv);
616 
617 done:
618 
619 	/* wait until all process references are gone */
620 
621 	pccmd->proc_curr = NULL;
622 
623 	while (pccmd->proc_refs != 0)
624 		cv_wait(&pccmd->cv, &pcs->mtx);
625 
626 	return (error);
627 }
628 
629 /*------------------------------------------------------------------------*
630  *	CUSE SERVER PART
631  *------------------------------------------------------------------------*/
632 
633 static void
634 cuse_server_free_dev(struct cuse_server_dev *pcsd)
635 {
636 	struct cuse_server *pcs;
637 	struct cuse_client *pcc;
638 
639 	/* get server pointer */
640 	pcs = pcsd->server;
641 
642 	/* prevent creation of more devices */
643 	cuse_server_lock(pcs);
644 	if (pcsd->kern_dev != NULL)
645 		pcsd->kern_dev->si_drv1 = NULL;
646 
647 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
648 		if (pcc->server_dev == pcsd)
649 			cuse_client_is_closing(pcc);
650 	}
651 	cuse_server_unlock(pcs);
652 
653 	/* destroy device, if any */
654 	if (pcsd->kern_dev != NULL) {
655 		/* destroy device synchronously */
656 		destroy_dev(pcsd->kern_dev);
657 	}
658 	free(pcsd, M_CUSE);
659 }
660 
661 static void
662 cuse_server_unref(struct cuse_server *pcs)
663 {
664 	struct cuse_server_dev *pcsd;
665 	struct cuse_memory *mem;
666 
667 	cuse_server_lock(pcs);
668 	if (--(pcs->refs) != 0) {
669 		cuse_server_unlock(pcs);
670 		return;
671 	}
672 	cuse_server_is_closing(pcs);
673 	/* final client wakeup, if any */
674 	cuse_server_wakeup_all_client_locked(pcs);
675 
676 	cuse_global_lock();
677 	TAILQ_REMOVE(&cuse_server_head, pcs, entry);
678 	cuse_global_unlock();
679 
680 	while ((pcsd = TAILQ_FIRST(&pcs->hdev)) != NULL) {
681 		TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
682 		cuse_server_unlock(pcs);
683 		cuse_server_free_dev(pcsd);
684 		cuse_server_lock(pcs);
685 	}
686 
687 	cuse_free_unit_by_id_locked(pcs, -1);
688 
689 	while ((mem = TAILQ_FIRST(&pcs->hmem)) != NULL) {
690 		TAILQ_REMOVE(&pcs->hmem, mem, entry);
691 		cuse_server_unlock(pcs);
692 		cuse_vm_memory_free(mem);
693 		cuse_server_lock(pcs);
694 	}
695 
696 	knlist_clear(&pcs->selinfo.si_note, 1);
697 	knlist_destroy(&pcs->selinfo.si_note);
698 
699 	cuse_server_unlock(pcs);
700 
701 	seldrain(&pcs->selinfo);
702 
703 	cv_destroy(&pcs->cv);
704 
705 	mtx_destroy(&pcs->mtx);
706 
707 	free(pcs, M_CUSE);
708 }
709 
710 static int
711 cuse_server_do_close(struct cuse_server *pcs)
712 {
713 	int retval;
714 
715 	cuse_server_lock(pcs);
716 	cuse_server_is_closing(pcs);
717 	/* final client wakeup, if any */
718 	cuse_server_wakeup_all_client_locked(pcs);
719 
720 	knlist_clear(&pcs->selinfo.si_note, 1);
721 
722 	retval = pcs->refs;
723 	cuse_server_unlock(pcs);
724 
725 	return (retval);
726 }
727 
728 static void
729 cuse_server_free(void *arg)
730 {
731 	struct cuse_server *pcs = arg;
732 
733 	/*
734 	 * The final server unref should be done by the server thread
735 	 * to prevent deadlock in the client cdevpriv destructor,
736 	 * which cannot destroy itself.
737 	 */
738 	while (cuse_server_do_close(pcs) != 1)
739 		pause("W", hz);
740 
741 	/* drop final refcount */
742 	cuse_server_unref(pcs);
743 }
744 
745 static int
746 cuse_server_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
747 {
748 	struct cuse_server *pcs;
749 
750 	pcs = malloc(sizeof(*pcs), M_CUSE, M_WAITOK | M_ZERO);
751 	if (pcs == NULL)
752 		return (ENOMEM);
753 
754 	if (devfs_set_cdevpriv(pcs, &cuse_server_free)) {
755 		printf("Cuse: Cannot set cdevpriv.\n");
756 		free(pcs, M_CUSE);
757 		return (ENOMEM);
758 	}
759 	/* store current process ID */
760 	pcs->pid = curproc->p_pid;
761 
762 	TAILQ_INIT(&pcs->head);
763 	TAILQ_INIT(&pcs->hdev);
764 	TAILQ_INIT(&pcs->hcli);
765 	TAILQ_INIT(&pcs->hmem);
766 
767 	cv_init(&pcs->cv, "cuse-server-cv");
768 
769 	mtx_init(&pcs->mtx, "cuse-server-mtx", NULL, MTX_DEF);
770 
771 	knlist_init_mtx(&pcs->selinfo.si_note, &pcs->mtx);
772 
773 	cuse_global_lock();
774 	pcs->refs++;
775 	TAILQ_INSERT_TAIL(&cuse_server_head, pcs, entry);
776 	cuse_global_unlock();
777 
778 	return (0);
779 }
780 
781 static int
782 cuse_server_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
783 {
784 	struct cuse_server *pcs;
785 
786 	if (cuse_server_get(&pcs) == 0)
787 		cuse_server_do_close(pcs);
788 
789 	return (0);
790 }
791 
792 static int
793 cuse_server_read(struct cdev *dev, struct uio *uio, int ioflag)
794 {
795 	return (ENXIO);
796 }
797 
798 static int
799 cuse_server_write(struct cdev *dev, struct uio *uio, int ioflag)
800 {
801 	return (ENXIO);
802 }
803 
804 static int
805 cuse_server_ioctl_copy_locked(struct cuse_server *pcs,
806     struct cuse_client_command *pccmd,
807     struct cuse_data_chunk *pchk, int isread)
808 {
809 	struct proc *p_proc;
810 	uint32_t offset;
811 	int error;
812 
813 	offset = pchk->peer_ptr - CUSE_BUF_MIN_PTR;
814 
815 	if (pchk->length > CUSE_BUFFER_MAX)
816 		return (EFAULT);
817 
818 	if (offset >= CUSE_BUFFER_MAX)
819 		return (EFAULT);
820 
821 	if ((offset + pchk->length) > CUSE_BUFFER_MAX)
822 		return (EFAULT);
823 
824 	p_proc = pccmd->proc_curr;
825 	if (p_proc == NULL)
826 		return (ENXIO);
827 
828 	if (pccmd->proc_refs < 0)
829 		return (ENOMEM);
830 
831 	pccmd->proc_refs++;
832 
833 	cuse_server_unlock(pcs);
834 
835 	if (isread == 0) {
836 		error = copyin(
837 		    (void *)pchk->local_ptr,
838 		    pccmd->client->ioctl_buffer + offset,
839 		    pchk->length);
840 	} else {
841 		error = copyout(
842 		    pccmd->client->ioctl_buffer + offset,
843 		    (void *)pchk->local_ptr,
844 		    pchk->length);
845 	}
846 
847 	cuse_server_lock(pcs);
848 
849 	pccmd->proc_refs--;
850 
851 	if (pccmd->proc_curr == NULL)
852 		cv_signal(&pccmd->cv);
853 
854 	return (error);
855 }
856 
857 static int
858 cuse_proc2proc_copy(struct proc *proc_s, vm_offset_t data_s,
859     struct proc *proc_d, vm_offset_t data_d, size_t len)
860 {
861 	struct thread *td;
862 	struct proc *proc_cur;
863 	int error;
864 
865 	td = curthread;
866 	proc_cur = td->td_proc;
867 
868 	if (proc_cur == proc_d) {
869 		struct iovec iov = {
870 			.iov_base = (caddr_t)data_d,
871 			.iov_len = len,
872 		};
873 		struct uio uio = {
874 			.uio_iov = &iov,
875 			.uio_iovcnt = 1,
876 			.uio_offset = (off_t)data_s,
877 			.uio_resid = len,
878 			.uio_segflg = UIO_USERSPACE,
879 			.uio_rw = UIO_READ,
880 			.uio_td = td,
881 		};
882 
883 		PHOLD(proc_s);
884 		error = proc_rwmem(proc_s, &uio);
885 		PRELE(proc_s);
886 
887 	} else if (proc_cur == proc_s) {
888 		struct iovec iov = {
889 			.iov_base = (caddr_t)data_s,
890 			.iov_len = len,
891 		};
892 		struct uio uio = {
893 			.uio_iov = &iov,
894 			.uio_iovcnt = 1,
895 			.uio_offset = (off_t)data_d,
896 			.uio_resid = len,
897 			.uio_segflg = UIO_USERSPACE,
898 			.uio_rw = UIO_WRITE,
899 			.uio_td = td,
900 		};
901 
902 		PHOLD(proc_d);
903 		error = proc_rwmem(proc_d, &uio);
904 		PRELE(proc_d);
905 	} else {
906 		error = EINVAL;
907 	}
908 	return (error);
909 }
910 
911 static int
912 cuse_server_data_copy_locked(struct cuse_server *pcs,
913     struct cuse_client_command *pccmd,
914     struct cuse_data_chunk *pchk, int isread)
915 {
916 	struct proc *p_proc;
917 	int error;
918 
919 	p_proc = pccmd->proc_curr;
920 	if (p_proc == NULL)
921 		return (ENXIO);
922 
923 	if (pccmd->proc_refs < 0)
924 		return (ENOMEM);
925 
926 	pccmd->proc_refs++;
927 
928 	cuse_server_unlock(pcs);
929 
930 	if (isread == 0) {
931 		error = cuse_proc2proc_copy(
932 		    curthread->td_proc, pchk->local_ptr,
933 		    p_proc, pchk->peer_ptr,
934 		    pchk->length);
935 	} else {
936 		error = cuse_proc2proc_copy(
937 		    p_proc, pchk->peer_ptr,
938 		    curthread->td_proc, pchk->local_ptr,
939 		    pchk->length);
940 	}
941 
942 	cuse_server_lock(pcs);
943 
944 	pccmd->proc_refs--;
945 
946 	if (pccmd->proc_curr == NULL)
947 		cv_signal(&pccmd->cv);
948 
949 	return (error);
950 }
951 
952 static int
953 cuse_alloc_unit_by_id_locked(struct cuse_server *pcs, int id)
954 {
955 	int n;
956 	int x = 0;
957 	int match;
958 
959 	do {
960 		for (match = n = 0; n != CUSE_DEVICES_MAX; n++) {
961 			if (cuse_alloc_unit[n] != NULL) {
962 				if ((cuse_alloc_unit_id[n] ^ id) & CUSE_ID_MASK)
963 					continue;
964 				if ((cuse_alloc_unit_id[n] & ~CUSE_ID_MASK) == x) {
965 					x++;
966 					match = 1;
967 				}
968 			}
969 		}
970 	} while (match);
971 
972 	if (x < 256) {
973 		for (n = 0; n != CUSE_DEVICES_MAX; n++) {
974 			if (cuse_alloc_unit[n] == NULL) {
975 				cuse_alloc_unit[n] = pcs;
976 				cuse_alloc_unit_id[n] = id | x;
977 				return (x);
978 			}
979 		}
980 	}
981 	return (-1);
982 }
983 
984 static void
985 cuse_server_wakeup_locked(struct cuse_server *pcs)
986 {
987 	selwakeup(&pcs->selinfo);
988 	KNOTE_LOCKED(&pcs->selinfo.si_note, 0);
989 }
990 
991 static void
992 cuse_server_wakeup_all_client_locked(struct cuse_server *pcs)
993 {
994 	struct cuse_client *pcc;
995 
996 	TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
997 		pcc->cflags |= (CUSE_CLI_KNOTE_NEED_READ |
998 		    CUSE_CLI_KNOTE_NEED_WRITE);
999 	}
1000 	cuse_server_wakeup_locked(pcs);
1001 }
1002 
1003 static int
1004 cuse_free_unit_by_id_locked(struct cuse_server *pcs, int id)
1005 {
1006 	int n;
1007 	int found = 0;
1008 
1009 	for (n = 0; n != CUSE_DEVICES_MAX; n++) {
1010 		if (cuse_alloc_unit[n] == pcs) {
1011 			if (cuse_alloc_unit_id[n] == id || id == -1) {
1012 				cuse_alloc_unit[n] = NULL;
1013 				cuse_alloc_unit_id[n] = 0;
1014 				found = 1;
1015 			}
1016 		}
1017 	}
1018 
1019 	return (found ? 0 : EINVAL);
1020 }
1021 
1022 static int
1023 cuse_server_ioctl(struct cdev *dev, unsigned long cmd,
1024     caddr_t data, int fflag, struct thread *td)
1025 {
1026 	struct cuse_server *pcs;
1027 	int error;
1028 
1029 	error = cuse_server_get(&pcs);
1030 	if (error != 0)
1031 		return (error);
1032 
1033 	switch (cmd) {
1034 		struct cuse_client_command *pccmd;
1035 		struct cuse_client *pcc;
1036 		struct cuse_command *pcmd;
1037 		struct cuse_alloc_info *pai;
1038 		struct cuse_create_dev *pcd;
1039 		struct cuse_server_dev *pcsd;
1040 		struct cuse_data_chunk *pchk;
1041 		int n;
1042 
1043 	case CUSE_IOCTL_GET_COMMAND:
1044 		pcmd = (void *)data;
1045 
1046 		cuse_server_lock(pcs);
1047 
1048 		while ((pccmd = TAILQ_FIRST(&pcs->head)) == NULL) {
1049 			error = cv_wait_sig(&pcs->cv, &pcs->mtx);
1050 
1051 			if (pcs->is_closing)
1052 				error = ENXIO;
1053 
1054 			if (error) {
1055 				cuse_server_unlock(pcs);
1056 				return (error);
1057 			}
1058 		}
1059 
1060 		TAILQ_REMOVE(&pcs->head, pccmd, entry);
1061 		pccmd->entry.tqe_prev = NULL;
1062 
1063 		pccmd->entered = curthread;
1064 
1065 		*pcmd = pccmd->sub;
1066 
1067 		cuse_server_unlock(pcs);
1068 
1069 		break;
1070 
1071 	case CUSE_IOCTL_SYNC_COMMAND:
1072 
1073 		cuse_server_lock(pcs);
1074 		while ((pccmd = cuse_server_find_command(pcs, curthread)) != NULL) {
1075 
1076 			/* send sync command */
1077 			pccmd->entered = NULL;
1078 			pccmd->error = *(int *)data;
1079 			pccmd->command = CUSE_CMD_SYNC;
1080 
1081 			/* signal peer, if any */
1082 			cv_signal(&pccmd->cv);
1083 		}
1084 		cuse_server_unlock(pcs);
1085 
1086 		break;
1087 
1088 	case CUSE_IOCTL_ALLOC_UNIT:
1089 
1090 		cuse_server_lock(pcs);
1091 		n = cuse_alloc_unit_by_id_locked(pcs,
1092 		    CUSE_ID_DEFAULT(0));
1093 		cuse_server_unlock(pcs);
1094 
1095 		if (n < 0)
1096 			error = ENOMEM;
1097 		else
1098 			*(int *)data = n;
1099 		break;
1100 
1101 	case CUSE_IOCTL_ALLOC_UNIT_BY_ID:
1102 
1103 		n = *(int *)data;
1104 
1105 		n = (n & CUSE_ID_MASK);
1106 
1107 		cuse_server_lock(pcs);
1108 		n = cuse_alloc_unit_by_id_locked(pcs, n);
1109 		cuse_server_unlock(pcs);
1110 
1111 		if (n < 0)
1112 			error = ENOMEM;
1113 		else
1114 			*(int *)data = n;
1115 		break;
1116 
1117 	case CUSE_IOCTL_FREE_UNIT:
1118 
1119 		n = *(int *)data;
1120 
1121 		n = CUSE_ID_DEFAULT(n);
1122 
1123 		cuse_server_lock(pcs);
1124 		error = cuse_free_unit_by_id_locked(pcs, n);
1125 		cuse_server_unlock(pcs);
1126 		break;
1127 
1128 	case CUSE_IOCTL_FREE_UNIT_BY_ID:
1129 
1130 		n = *(int *)data;
1131 
1132 		cuse_server_lock(pcs);
1133 		error = cuse_free_unit_by_id_locked(pcs, n);
1134 		cuse_server_unlock(pcs);
1135 		break;
1136 
1137 	case CUSE_IOCTL_ALLOC_MEMORY:
1138 
1139 		pai = (void *)data;
1140 
1141 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1142 			error = ENOMEM;
1143 			break;
1144 		}
1145 		if (pai->page_count >= CUSE_ALLOC_PAGES_MAX) {
1146 			error = ENOMEM;
1147 			break;
1148 		}
1149 		error = cuse_server_alloc_memory(pcs,
1150 		    pai->alloc_nr, pai->page_count);
1151 		break;
1152 
1153 	case CUSE_IOCTL_FREE_MEMORY:
1154 		pai = (void *)data;
1155 
1156 		if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1157 			error = ENOMEM;
1158 			break;
1159 		}
1160 		error = cuse_server_free_memory(pcs, pai->alloc_nr);
1161 		break;
1162 
1163 	case CUSE_IOCTL_GET_SIG:
1164 
1165 		cuse_server_lock(pcs);
1166 		pccmd = cuse_server_find_command(pcs, curthread);
1167 
1168 		if (pccmd != NULL) {
1169 			n = pccmd->got_signal;
1170 			pccmd->got_signal = 0;
1171 		} else {
1172 			n = 0;
1173 		}
1174 		cuse_server_unlock(pcs);
1175 
1176 		*(int *)data = n;
1177 
1178 		break;
1179 
1180 	case CUSE_IOCTL_SET_PFH:
1181 
1182 		cuse_server_lock(pcs);
1183 		pccmd = cuse_server_find_command(pcs, curthread);
1184 
1185 		if (pccmd != NULL) {
1186 			pcc = pccmd->client;
1187 			for (n = 0; n != CUSE_CMD_MAX; n++) {
1188 				pcc->cmds[n].sub.per_file_handle = *(uintptr_t *)data;
1189 			}
1190 		} else {
1191 			error = ENXIO;
1192 		}
1193 		cuse_server_unlock(pcs);
1194 		break;
1195 
1196 	case CUSE_IOCTL_CREATE_DEV:
1197 
1198 		error = priv_check(curthread, PRIV_DRIVER);
1199 		if (error)
1200 			break;
1201 
1202 		pcd = (void *)data;
1203 
1204 		/* filter input */
1205 
1206 		pcd->devname[sizeof(pcd->devname) - 1] = 0;
1207 
1208 		if (pcd->devname[0] == 0) {
1209 			error = EINVAL;
1210 			break;
1211 		}
1212 		cuse_str_filter(pcd->devname);
1213 
1214 		pcd->permissions &= 0777;
1215 
1216 		/* try to allocate a character device */
1217 
1218 		pcsd = malloc(sizeof(*pcsd), M_CUSE, M_WAITOK | M_ZERO);
1219 
1220 		if (pcsd == NULL) {
1221 			error = ENOMEM;
1222 			break;
1223 		}
1224 		pcsd->server = pcs;
1225 
1226 		pcsd->user_dev = pcd->dev;
1227 
1228 		pcsd->kern_dev = make_dev_credf(MAKEDEV_CHECKNAME,
1229 		    &cuse_client_devsw, 0, NULL, pcd->user_id, pcd->group_id,
1230 		    pcd->permissions, "%s", pcd->devname);
1231 
1232 		if (pcsd->kern_dev == NULL) {
1233 			free(pcsd, M_CUSE);
1234 			error = ENOMEM;
1235 			break;
1236 		}
1237 		pcsd->kern_dev->si_drv1 = pcsd;
1238 
1239 		cuse_server_lock(pcs);
1240 		TAILQ_INSERT_TAIL(&pcs->hdev, pcsd, entry);
1241 		cuse_server_unlock(pcs);
1242 
1243 		break;
1244 
1245 	case CUSE_IOCTL_DESTROY_DEV:
1246 
1247 		error = priv_check(curthread, PRIV_DRIVER);
1248 		if (error)
1249 			break;
1250 
1251 		cuse_server_lock(pcs);
1252 
1253 		error = EINVAL;
1254 
1255 		pcsd = TAILQ_FIRST(&pcs->hdev);
1256 		while (pcsd != NULL) {
1257 			if (pcsd->user_dev == *(struct cuse_dev **)data) {
1258 				TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
1259 				cuse_server_unlock(pcs);
1260 				cuse_server_free_dev(pcsd);
1261 				cuse_server_lock(pcs);
1262 				error = 0;
1263 				pcsd = TAILQ_FIRST(&pcs->hdev);
1264 			} else {
1265 				pcsd = TAILQ_NEXT(pcsd, entry);
1266 			}
1267 		}
1268 
1269 		cuse_server_unlock(pcs);
1270 		break;
1271 
1272 	case CUSE_IOCTL_WRITE_DATA:
1273 	case CUSE_IOCTL_READ_DATA:
1274 
1275 		cuse_server_lock(pcs);
1276 		pchk = (struct cuse_data_chunk *)data;
1277 
1278 		pccmd = cuse_server_find_command(pcs, curthread);
1279 
1280 		if (pccmd == NULL) {
1281 			error = ENXIO;	/* invalid request */
1282 		} else if (pchk->peer_ptr < CUSE_BUF_MIN_PTR) {
1283 			error = EFAULT;	/* NULL pointer */
1284 		} else if (pchk->peer_ptr < CUSE_BUF_MAX_PTR) {
1285 			error = cuse_server_ioctl_copy_locked(pcs, pccmd,
1286 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1287 		} else {
1288 			error = cuse_server_data_copy_locked(pcs, pccmd,
1289 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
1290 		}
1291 		cuse_server_unlock(pcs);
1292 		break;
1293 
1294 	case CUSE_IOCTL_SELWAKEUP:
1295 		cuse_server_lock(pcs);
1296 		/*
1297 		 * We don't know which direction caused the event.
1298 		 * Wakeup both!
1299 		 */
1300 		cuse_server_wakeup_all_client_locked(pcs);
1301 		cuse_server_unlock(pcs);
1302 		break;
1303 
1304 	default:
1305 		error = ENXIO;
1306 		break;
1307 	}
1308 	return (error);
1309 }
1310 
1311 static int
1312 cuse_server_poll(struct cdev *dev, int events, struct thread *td)
1313 {
1314 	return (events & (POLLHUP | POLLPRI | POLLIN |
1315 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1316 }
1317 
1318 static int
1319 cuse_server_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1320     vm_size_t size, struct vm_object **object, int nprot)
1321 {
1322 	uint32_t page_nr = *offset / PAGE_SIZE;
1323 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1324 	struct cuse_memory *mem;
1325 	struct cuse_server *pcs;
1326 	int error;
1327 
1328 	error = cuse_server_get(&pcs);
1329 	if (error != 0)
1330 		return (error);
1331 
1332 	cuse_server_lock(pcs);
1333 	/* lookup memory structure */
1334 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
1335 		if (mem->alloc_nr == alloc_nr)
1336 			break;
1337 	}
1338 	if (mem == NULL) {
1339 		cuse_server_unlock(pcs);
1340 		return (ENOMEM);
1341 	}
1342 	/* verify page offset */
1343 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1344 	if (page_nr >= mem->page_count) {
1345 		cuse_server_unlock(pcs);
1346 		return (ENXIO);
1347 	}
1348 	/* verify mmap size */
1349 	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1350 	    (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1351 		cuse_server_unlock(pcs);
1352 		return (EINVAL);
1353 	}
1354 	vm_object_reference(mem->object);
1355 	*object = mem->object;
1356 	cuse_server_unlock(pcs);
1357 
1358 	/* set new VM object offset to use */
1359 	*offset = page_nr * PAGE_SIZE;
1360 
1361 	/* success */
1362 	return (0);
1363 }
1364 
1365 /*------------------------------------------------------------------------*
1366  *	CUSE CLIENT PART
1367  *------------------------------------------------------------------------*/
1368 static void
1369 cuse_client_free(void *arg)
1370 {
1371 	struct cuse_client *pcc = arg;
1372 	struct cuse_client_command *pccmd;
1373 	struct cuse_server *pcs;
1374 	int n;
1375 
1376 	pcs = pcc->server;
1377 
1378 	cuse_server_lock(pcs);
1379 	cuse_client_is_closing(pcc);
1380 	TAILQ_REMOVE(&pcs->hcli, pcc, entry);
1381 	cuse_server_unlock(pcs);
1382 
1383 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1384 
1385 		pccmd = &pcc->cmds[n];
1386 
1387 		sx_destroy(&pccmd->sx);
1388 		cv_destroy(&pccmd->cv);
1389 	}
1390 
1391 	free(pcc, M_CUSE);
1392 
1393 	/* drop reference on server */
1394 	cuse_server_unref(pcs);
1395 }
1396 
1397 static int
1398 cuse_client_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
1399 {
1400 	struct cuse_client_command *pccmd;
1401 	struct cuse_server_dev *pcsd;
1402 	struct cuse_client *pcc;
1403 	struct cuse_server *pcs;
1404 	struct cuse_dev *pcd;
1405 	int error;
1406 	int n;
1407 
1408 	pcsd = dev->si_drv1;
1409 	if (pcsd != NULL) {
1410 		pcs = pcsd->server;
1411 		pcd = pcsd->user_dev;
1412 
1413 		cuse_server_lock(pcs);
1414 		/*
1415 		 * Check that the refcount didn't wrap and that the
1416 		 * same process is not both client and server. This
1417 		 * can easily lead to deadlocks when destroying the
1418 		 * CUSE character device nodes:
1419 		 */
1420 		pcs->refs++;
1421 		if (pcs->refs < 0 || pcs->pid == curproc->p_pid) {
1422 			/* overflow or wrong PID */
1423 			pcs->refs--;
1424 			cuse_server_unlock(pcs);
1425 			return (EINVAL);
1426 		}
1427 		cuse_server_unlock(pcs);
1428 	} else {
1429 		return (EINVAL);
1430 	}
1431 
1432 	pcc = malloc(sizeof(*pcc), M_CUSE, M_WAITOK | M_ZERO);
1433 	if (pcc == NULL) {
1434 		/* drop reference on server */
1435 		cuse_server_unref(pcs);
1436 		return (ENOMEM);
1437 	}
1438 	if (devfs_set_cdevpriv(pcc, &cuse_client_free)) {
1439 		printf("Cuse: Cannot set cdevpriv.\n");
1440 		/* drop reference on server */
1441 		cuse_server_unref(pcs);
1442 		free(pcc, M_CUSE);
1443 		return (ENOMEM);
1444 	}
1445 	pcc->fflags = fflags;
1446 	pcc->server_dev = pcsd;
1447 	pcc->server = pcs;
1448 
1449 	for (n = 0; n != CUSE_CMD_MAX; n++) {
1450 
1451 		pccmd = &pcc->cmds[n];
1452 
1453 		pccmd->sub.dev = pcd;
1454 		pccmd->sub.command = n;
1455 		pccmd->client = pcc;
1456 
1457 		sx_init(&pccmd->sx, "cuse-client-sx");
1458 		cv_init(&pccmd->cv, "cuse-client-cv");
1459 	}
1460 
1461 	cuse_server_lock(pcs);
1462 
1463 	/* cuse_client_free() assumes that the client is listed somewhere! */
1464 	/* always enqueue */
1465 
1466 	TAILQ_INSERT_TAIL(&pcs->hcli, pcc, entry);
1467 
1468 	/* check if server is closing */
1469 	if ((pcs->is_closing != 0) || (dev->si_drv1 == NULL)) {
1470 		error = EINVAL;
1471 	} else {
1472 		error = 0;
1473 	}
1474 	cuse_server_unlock(pcs);
1475 
1476 	if (error) {
1477 		devfs_clear_cdevpriv();	/* XXX bugfix */
1478 		return (error);
1479 	}
1480 	pccmd = &pcc->cmds[CUSE_CMD_OPEN];
1481 
1482 	cuse_cmd_lock(pccmd);
1483 
1484 	cuse_server_lock(pcs);
1485 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1486 
1487 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1488 	cuse_server_unlock(pcs);
1489 
1490 	if (error < 0) {
1491 		error = cuse_convert_error(error);
1492 	} else {
1493 		error = 0;
1494 	}
1495 
1496 	cuse_cmd_unlock(pccmd);
1497 
1498 	if (error)
1499 		devfs_clear_cdevpriv();	/* XXX bugfix */
1500 
1501 	return (error);
1502 }
1503 
1504 static int
1505 cuse_client_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
1506 {
1507 	struct cuse_client_command *pccmd;
1508 	struct cuse_client *pcc;
1509 	struct cuse_server *pcs;
1510 	int error;
1511 
1512 	error = cuse_client_get(&pcc);
1513 	if (error != 0)
1514 		return (0);
1515 
1516 	pccmd = &pcc->cmds[CUSE_CMD_CLOSE];
1517 	pcs = pcc->server;
1518 
1519 	cuse_cmd_lock(pccmd);
1520 
1521 	cuse_server_lock(pcs);
1522 	cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1523 
1524 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1525 	cuse_cmd_unlock(pccmd);
1526 
1527 	cuse_client_is_closing(pcc);
1528 	cuse_server_unlock(pcs);
1529 
1530 	return (0);
1531 }
1532 
1533 static void
1534 cuse_client_kqfilter_poll(struct cdev *dev, struct cuse_client *pcc)
1535 {
1536 	struct cuse_server *pcs = pcc->server;
1537 	int temp;
1538 
1539 	cuse_server_lock(pcs);
1540 	temp = (pcc->cflags & (CUSE_CLI_KNOTE_HAS_READ |
1541 	    CUSE_CLI_KNOTE_HAS_WRITE));
1542 	pcc->cflags &= ~(CUSE_CLI_KNOTE_NEED_READ |
1543 	    CUSE_CLI_KNOTE_NEED_WRITE);
1544 	cuse_server_unlock(pcs);
1545 
1546 	if (temp != 0) {
1547 		/* get the latest polling state from the server */
1548 		temp = cuse_client_poll(dev, POLLIN | POLLOUT, NULL);
1549 
1550 		if (temp & (POLLIN | POLLOUT)) {
1551 			cuse_server_lock(pcs);
1552 			if (temp & POLLIN)
1553 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_READ;
1554 			if (temp & POLLOUT)
1555 				pcc->cflags |= CUSE_CLI_KNOTE_NEED_WRITE;
1556 
1557 			/* make sure the "knote" gets woken up */
1558 			cuse_server_wakeup_locked(pcc->server);
1559 			cuse_server_unlock(pcs);
1560 		}
1561 	}
1562 }
1563 
1564 static int
1565 cuse_client_read(struct cdev *dev, struct uio *uio, int ioflag)
1566 {
1567 	struct cuse_client_command *pccmd;
1568 	struct cuse_client *pcc;
1569 	struct cuse_server *pcs;
1570 	int error;
1571 	int len;
1572 
1573 	error = cuse_client_get(&pcc);
1574 	if (error != 0)
1575 		return (error);
1576 
1577 	pccmd = &pcc->cmds[CUSE_CMD_READ];
1578 	pcs = pcc->server;
1579 
1580 	if (uio->uio_segflg != UIO_USERSPACE) {
1581 		return (EINVAL);
1582 	}
1583 	uio->uio_segflg = UIO_NOCOPY;
1584 
1585 	cuse_cmd_lock(pccmd);
1586 
1587 	while (uio->uio_resid != 0) {
1588 
1589 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1590 			error = ENOMEM;
1591 			break;
1592 		}
1593 		len = uio->uio_iov->iov_len;
1594 
1595 		cuse_server_lock(pcs);
1596 		cuse_client_send_command_locked(pccmd,
1597 		    (uintptr_t)uio->uio_iov->iov_base,
1598 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1599 
1600 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1601 		cuse_server_unlock(pcs);
1602 
1603 		if (error < 0) {
1604 			error = cuse_convert_error(error);
1605 			break;
1606 		} else if (error == len) {
1607 			error = uiomove(NULL, error, uio);
1608 			if (error)
1609 				break;
1610 		} else {
1611 			error = uiomove(NULL, error, uio);
1612 			break;
1613 		}
1614 	}
1615 	cuse_cmd_unlock(pccmd);
1616 
1617 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1618 
1619 	if (error == EWOULDBLOCK)
1620 		cuse_client_kqfilter_poll(dev, pcc);
1621 
1622 	return (error);
1623 }
1624 
1625 static int
1626 cuse_client_write(struct cdev *dev, struct uio *uio, int ioflag)
1627 {
1628 	struct cuse_client_command *pccmd;
1629 	struct cuse_client *pcc;
1630 	struct cuse_server *pcs;
1631 	int error;
1632 	int len;
1633 
1634 	error = cuse_client_get(&pcc);
1635 	if (error != 0)
1636 		return (error);
1637 
1638 	pccmd = &pcc->cmds[CUSE_CMD_WRITE];
1639 	pcs = pcc->server;
1640 
1641 	if (uio->uio_segflg != UIO_USERSPACE) {
1642 		return (EINVAL);
1643 	}
1644 	uio->uio_segflg = UIO_NOCOPY;
1645 
1646 	cuse_cmd_lock(pccmd);
1647 
1648 	while (uio->uio_resid != 0) {
1649 
1650 		if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1651 			error = ENOMEM;
1652 			break;
1653 		}
1654 		len = uio->uio_iov->iov_len;
1655 
1656 		cuse_server_lock(pcs);
1657 		cuse_client_send_command_locked(pccmd,
1658 		    (uintptr_t)uio->uio_iov->iov_base,
1659 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1660 
1661 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
1662 		cuse_server_unlock(pcs);
1663 
1664 		if (error < 0) {
1665 			error = cuse_convert_error(error);
1666 			break;
1667 		} else if (error == len) {
1668 			error = uiomove(NULL, error, uio);
1669 			if (error)
1670 				break;
1671 		} else {
1672 			error = uiomove(NULL, error, uio);
1673 			break;
1674 		}
1675 	}
1676 	cuse_cmd_unlock(pccmd);
1677 
1678 	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1679 
1680 	if (error == EWOULDBLOCK)
1681 		cuse_client_kqfilter_poll(dev, pcc);
1682 
1683 	return (error);
1684 }
1685 
1686 int
1687 cuse_client_ioctl(struct cdev *dev, unsigned long cmd,
1688     caddr_t data, int fflag, struct thread *td)
1689 {
1690 	struct cuse_client_command *pccmd;
1691 	struct cuse_client *pcc;
1692 	struct cuse_server *pcs;
1693 	int error;
1694 	int len;
1695 
1696 	error = cuse_client_get(&pcc);
1697 	if (error != 0)
1698 		return (error);
1699 
1700 	len = IOCPARM_LEN(cmd);
1701 	if (len > CUSE_BUFFER_MAX)
1702 		return (ENOMEM);
1703 
1704 	pccmd = &pcc->cmds[CUSE_CMD_IOCTL];
1705 	pcs = pcc->server;
1706 
1707 	cuse_cmd_lock(pccmd);
1708 
1709 	if (cmd & (IOC_IN | IOC_VOID))
1710 		memcpy(pcc->ioctl_buffer, data, len);
1711 
1712 	/*
1713 	 * When the ioctl-length is zero drivers can pass information
1714 	 * through the data pointer of the ioctl. Make sure this information
1715 	 * is forwarded to the driver.
1716 	 */
1717 
1718 	cuse_server_lock(pcs);
1719 	cuse_client_send_command_locked(pccmd,
1720 	    (len == 0) ? *(long *)data : CUSE_BUF_MIN_PTR,
1721 	    (unsigned long)cmd, pcc->fflags,
1722 	    (fflag & O_NONBLOCK) ? IO_NDELAY : 0);
1723 
1724 	error = cuse_client_receive_command_locked(pccmd, data, len);
1725 	cuse_server_unlock(pcs);
1726 
1727 	if (error < 0) {
1728 		error = cuse_convert_error(error);
1729 	} else {
1730 		error = 0;
1731 	}
1732 
1733 	if (cmd & IOC_OUT)
1734 		memcpy(data, pcc->ioctl_buffer, len);
1735 
1736 	cuse_cmd_unlock(pccmd);
1737 
1738 	if (error == EWOULDBLOCK)
1739 		cuse_client_kqfilter_poll(dev, pcc);
1740 
1741 	return (error);
1742 }
1743 
1744 static int
1745 cuse_client_poll(struct cdev *dev, int events, struct thread *td)
1746 {
1747 	struct cuse_client_command *pccmd;
1748 	struct cuse_client *pcc;
1749 	struct cuse_server *pcs;
1750 	unsigned long temp;
1751 	int error;
1752 	int revents;
1753 
1754 	error = cuse_client_get(&pcc);
1755 	if (error != 0)
1756 		goto pollnval;
1757 
1758 	temp = 0;
1759 	pcs = pcc->server;
1760 
1761 	if (events & (POLLPRI | POLLIN | POLLRDNORM))
1762 		temp |= CUSE_POLL_READ;
1763 
1764 	if (events & (POLLOUT | POLLWRNORM))
1765 		temp |= CUSE_POLL_WRITE;
1766 
1767 	if (events & POLLHUP)
1768 		temp |= CUSE_POLL_ERROR;
1769 
1770 	pccmd = &pcc->cmds[CUSE_CMD_POLL];
1771 
1772 	cuse_cmd_lock(pccmd);
1773 
1774 	/* Need to selrecord() first to not loose any events. */
1775 	if (temp != 0 && td != NULL)
1776 		selrecord(td, &pcs->selinfo);
1777 
1778 	cuse_server_lock(pcs);
1779 	cuse_client_send_command_locked(pccmd,
1780 	    0, temp, pcc->fflags, IO_NDELAY);
1781 
1782 	error = cuse_client_receive_command_locked(pccmd, 0, 0);
1783 	cuse_server_unlock(pcs);
1784 
1785 	cuse_cmd_unlock(pccmd);
1786 
1787 	if (error < 0) {
1788 		goto pollnval;
1789 	} else {
1790 		revents = 0;
1791 		if (error & CUSE_POLL_READ)
1792 			revents |= (events & (POLLPRI | POLLIN | POLLRDNORM));
1793 		if (error & CUSE_POLL_WRITE)
1794 			revents |= (events & (POLLOUT | POLLWRNORM));
1795 		if (error & CUSE_POLL_ERROR)
1796 			revents |= (events & POLLHUP);
1797 	}
1798 	return (revents);
1799 
1800 pollnval:
1801 	/* XXX many clients don't understand POLLNVAL */
1802 	return (events & (POLLHUP | POLLPRI | POLLIN |
1803 	    POLLRDNORM | POLLOUT | POLLWRNORM));
1804 }
1805 
1806 static int
1807 cuse_client_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1808     vm_size_t size, struct vm_object **object, int nprot)
1809 {
1810 	uint32_t page_nr = *offset / PAGE_SIZE;
1811 	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1812 	struct cuse_memory *mem;
1813 	struct cuse_client *pcc;
1814 	struct cuse_server *pcs;
1815 	int error;
1816 
1817 	error = cuse_client_get(&pcc);
1818 	if (error != 0)
1819 		return (error);
1820 
1821 	pcs = pcc->server;
1822 
1823 	cuse_server_lock(pcs);
1824 	/* lookup memory structure */
1825 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
1826 		if (mem->alloc_nr == alloc_nr)
1827 			break;
1828 	}
1829 	if (mem == NULL) {
1830 		cuse_server_unlock(pcs);
1831 		return (ENOMEM);
1832 	}
1833 	/* verify page offset */
1834 	page_nr %= CUSE_ALLOC_PAGES_MAX;
1835 	if (page_nr >= mem->page_count) {
1836 		cuse_server_unlock(pcs);
1837 		return (ENXIO);
1838 	}
1839 	/* verify mmap size */
1840 	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1841 	    (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1842 		cuse_server_unlock(pcs);
1843 		return (EINVAL);
1844 	}
1845 	vm_object_reference(mem->object);
1846 	*object = mem->object;
1847 	cuse_server_unlock(pcs);
1848 
1849 	/* set new VM object offset to use */
1850 	*offset = page_nr * PAGE_SIZE;
1851 
1852 	/* success */
1853 	return (0);
1854 }
1855 
1856 static void
1857 cuse_client_kqfilter_read_detach(struct knote *kn)
1858 {
1859 	struct cuse_client *pcc;
1860 	struct cuse_server *pcs;
1861 
1862 	pcc = kn->kn_hook;
1863 	pcs = pcc->server;
1864 
1865 	cuse_server_lock(pcs);
1866 	knlist_remove(&pcs->selinfo.si_note, kn, 1);
1867 	cuse_server_unlock(pcs);
1868 }
1869 
1870 static void
1871 cuse_client_kqfilter_write_detach(struct knote *kn)
1872 {
1873 	struct cuse_client *pcc;
1874 	struct cuse_server *pcs;
1875 
1876 	pcc = kn->kn_hook;
1877 	pcs = pcc->server;
1878 
1879 	cuse_server_lock(pcs);
1880 	knlist_remove(&pcs->selinfo.si_note, kn, 1);
1881 	cuse_server_unlock(pcs);
1882 }
1883 
1884 static int
1885 cuse_client_kqfilter_read_event(struct knote *kn, long hint)
1886 {
1887 	struct cuse_client *pcc;
1888 
1889 	pcc = kn->kn_hook;
1890 
1891 	mtx_assert(&pcc->server->mtx, MA_OWNED);
1892 
1893 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_READ) ? 1 : 0);
1894 }
1895 
1896 static int
1897 cuse_client_kqfilter_write_event(struct knote *kn, long hint)
1898 {
1899 	struct cuse_client *pcc;
1900 
1901 	pcc = kn->kn_hook;
1902 
1903 	mtx_assert(&pcc->server->mtx, MA_OWNED);
1904 
1905 	return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_WRITE) ? 1 : 0);
1906 }
1907 
1908 static int
1909 cuse_client_kqfilter(struct cdev *dev, struct knote *kn)
1910 {
1911 	struct cuse_client *pcc;
1912 	struct cuse_server *pcs;
1913 	int error;
1914 
1915 	error = cuse_client_get(&pcc);
1916 	if (error != 0)
1917 		return (error);
1918 
1919 	pcs = pcc->server;
1920 
1921 	cuse_server_lock(pcs);
1922 	switch (kn->kn_filter) {
1923 	case EVFILT_READ:
1924 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_READ;
1925 		kn->kn_hook = pcc;
1926 		kn->kn_fop = &cuse_client_kqfilter_read_ops;
1927 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1928 		break;
1929 	case EVFILT_WRITE:
1930 		pcc->cflags |= CUSE_CLI_KNOTE_HAS_WRITE;
1931 		kn->kn_hook = pcc;
1932 		kn->kn_fop = &cuse_client_kqfilter_write_ops;
1933 		knlist_add(&pcs->selinfo.si_note, kn, 1);
1934 		break;
1935 	default:
1936 		error = EINVAL;
1937 		break;
1938 	}
1939 	cuse_server_unlock(pcs);
1940 
1941 	if (error == 0)
1942 		cuse_client_kqfilter_poll(dev, pcc);
1943 	return (error);
1944 }
1945