xref: /freebsd/sys/fs/fuse/fuse_ipc.c (revision 3429092c)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2007-2009 Google Inc. and Amit Singh
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  * * Redistributions of source code must retain the above copyright
12  *   notice, this list of conditions and the following disclaimer.
13  * * Redistributions in binary form must reproduce the above
14  *   copyright notice, this list of conditions and the following disclaimer
15  *   in the documentation and/or other materials provided with the
16  *   distribution.
17  * * Neither the name of Google Inc. nor the names of its
18  *   contributors may be used to endorse or promote products derived from
19  *   this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Copyright (C) 2005 Csaba Henk.
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  *
45  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55  * SUCH DAMAGE.
56  */
57 
58 #include <sys/cdefs.h>
59 __FBSDID("$FreeBSD$");
60 
61 #include <sys/param.h>
62 #include <sys/module.h>
63 #include <sys/systm.h>
64 #include <sys/errno.h>
65 #include <sys/kernel.h>
66 #include <sys/conf.h>
67 #include <sys/uio.h>
68 #include <sys/malloc.h>
69 #include <sys/queue.h>
70 #include <sys/lock.h>
71 #include <sys/sx.h>
72 #include <sys/mutex.h>
73 #include <sys/proc.h>
74 #include <sys/mount.h>
75 #include <sys/sdt.h>
76 #include <sys/vnode.h>
77 #include <sys/signalvar.h>
78 #include <sys/syscallsubr.h>
79 #include <sys/sysctl.h>
80 #include <vm/uma.h>
81 
82 #include "fuse.h"
83 #include "fuse_node.h"
84 #include "fuse_ipc.h"
85 #include "fuse_internal.h"
86 
87 SDT_PROVIDER_DECLARE(fusefs);
88 /*
89  * Fuse trace probe:
90  * arg0: verbosity.  Higher numbers give more verbose messages
91  * arg1: Textual message
92  */
93 SDT_PROBE_DEFINE2(fusefs, , ipc, trace, "int", "char*");
94 
95 static void fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
96     struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred);
97 static void fiov_clear(struct fuse_iov *fiov);
98 static void fuse_interrupt_send(struct fuse_ticket *otick, int err);
99 static struct fuse_ticket *fticket_alloc(struct fuse_data *data);
100 static void fticket_refresh(struct fuse_ticket *ftick);
101 static void fticket_destroy(struct fuse_ticket *ftick);
102 static int fticket_wait_answer(struct fuse_ticket *ftick);
103 static inline int
104 fticket_aw_pull_uio(struct fuse_ticket *ftick,
105     struct uio *uio);
106 
107 static int fuse_body_audit(struct fuse_ticket *ftick, size_t blen);
108 
109 static fuse_handler_t fuse_standard_handler;
110 
111 SYSCTL_NODE(_vfs, OID_AUTO, fusefs, CTLFLAG_RW, 0, "FUSE tunables");
112 static int fuse_ticket_count = 0;
113 
114 SYSCTL_INT(_vfs_fusefs, OID_AUTO, ticket_count, CTLFLAG_RW,
115     &fuse_ticket_count, 0, "number of allocated tickets");
116 static long fuse_iov_permanent_bufsize = 1 << 19;
117 
118 SYSCTL_LONG(_vfs_fusefs, OID_AUTO, iov_permanent_bufsize, CTLFLAG_RW,
119     &fuse_iov_permanent_bufsize, 0,
120     "limit for permanently stored buffer size for fuse_iovs");
121 static int fuse_iov_credit = 16;
122 
123 SYSCTL_INT(_vfs_fusefs, OID_AUTO, iov_credit, CTLFLAG_RW,
124     &fuse_iov_credit, 0,
125     "how many times is an oversized fuse_iov tolerated");
126 
127 MALLOC_DEFINE(M_FUSEMSG, "fuse_msgbuf", "fuse message buffer");
128 static uma_zone_t ticket_zone;
129 
130 /*
131  * TODO: figure out how to timeout INTERRUPT requests, because the daemon may
132  * leagally never respond
133  */
134 static int
135 fuse_interrupt_callback(struct fuse_ticket *tick, struct uio *uio)
136 {
137 	struct fuse_ticket *otick, *x_tick;
138 	struct fuse_interrupt_in *fii;
139 	struct fuse_data *data = tick->tk_data;
140 	bool found = false;
141 
142 	fii = (struct fuse_interrupt_in*)((char*)tick->tk_ms_fiov.base +
143 		sizeof(struct fuse_in_header));
144 
145 	fuse_lck_mtx_lock(data->aw_mtx);
146 	TAILQ_FOREACH_SAFE(otick, &data->aw_head, tk_aw_link, x_tick) {
147 		if (otick->tk_unique == fii->unique) {
148 			found = true;
149 			break;
150 		}
151 	}
152 	fuse_lck_mtx_unlock(data->aw_mtx);
153 
154 	if (!found) {
155 		/* Original is already complete.  Just return */
156 		return 0;
157 	}
158 
159 	/* Clear the original ticket's interrupt association */
160 	otick->irq_unique = 0;
161 
162 	if (tick->tk_aw_ohead.error == ENOSYS) {
163 		fsess_set_notimpl(data->mp, FUSE_INTERRUPT);
164 		return 0;
165 	} else if (tick->tk_aw_ohead.error == EAGAIN) {
166 		/*
167 		 * There are two reasons we might get this:
168 		 * 1) the daemon received the INTERRUPT request before the
169 		 *    original, or
170 		 * 2) the daemon received the INTERRUPT request after it
171 		 *    completed the original request.
172 		 * In the first case we should re-send the INTERRUPT.  In the
173 		 * second, we should ignore it.
174 		 */
175 		/* Resend */
176 		fuse_interrupt_send(otick, EINTR);
177 		return 0;
178 	} else {
179 		/* Illegal FUSE_INTERRUPT response */
180 		return EINVAL;
181 	}
182 }
183 
184 /* Interrupt the operation otick.  Return err as its error code */
185 void
186 fuse_interrupt_send(struct fuse_ticket *otick, int err)
187 {
188 	struct fuse_dispatcher fdi;
189 	struct fuse_interrupt_in *fii;
190 	struct fuse_in_header *ftick_hdr;
191 	struct fuse_data *data = otick->tk_data;
192 	struct fuse_ticket *tick, *xtick;
193 	struct ucred reused_creds;
194 	gid_t reused_groups[1];
195 
196 	if (otick->irq_unique == 0) {
197 		/*
198 		 * If the daemon hasn't yet received otick, then we can answer
199 		 * it ourselves and return.
200 		 */
201 		fuse_lck_mtx_lock(data->ms_mtx);
202 		STAILQ_FOREACH_SAFE(tick, &otick->tk_data->ms_head, tk_ms_link,
203 			xtick) {
204 			if (tick == otick) {
205 				STAILQ_REMOVE(&otick->tk_data->ms_head, tick,
206 					fuse_ticket, tk_ms_link);
207 				otick->tk_ms_link.stqe_next = NULL;
208 				fuse_lck_mtx_unlock(data->ms_mtx);
209 
210 				fuse_lck_mtx_lock(otick->tk_aw_mtx);
211 				if (!fticket_answered(otick)) {
212 					fticket_set_answered(otick);
213 					otick->tk_aw_errno = err;
214 					wakeup(otick);
215 				}
216 				fuse_lck_mtx_unlock(otick->tk_aw_mtx);
217 
218 				fuse_ticket_drop(tick);
219 				return;
220 			}
221 		}
222 		fuse_lck_mtx_unlock(data->ms_mtx);
223 
224 		/*
225 		 * If the fuse daemon doesn't support interrupts, then there's
226 		 * nothing more that we can do
227 		 */
228 		if (!fsess_isimpl(data->mp, FUSE_INTERRUPT))
229 			return;
230 
231 		/*
232 		 * If the fuse daemon has already received otick, then we must
233 		 * send FUSE_INTERRUPT.
234 		 */
235 		ftick_hdr = fticket_in_header(otick);
236 		reused_creds.cr_uid = ftick_hdr->uid;
237 		reused_groups[0] = ftick_hdr->gid;
238 		reused_creds.cr_groups = reused_groups;
239 		fdisp_init(&fdi, sizeof(*fii));
240 		fdisp_make_pid(&fdi, FUSE_INTERRUPT, data, ftick_hdr->nodeid,
241 			ftick_hdr->pid, &reused_creds);
242 
243 		fii = fdi.indata;
244 		fii->unique = otick->tk_unique;
245 		fuse_insert_callback(fdi.tick, fuse_interrupt_callback);
246 
247 		otick->irq_unique = fdi.tick->tk_unique;
248 		/* Interrupt ops should be delivered ASAP */
249 		fuse_insert_message(fdi.tick, true);
250 		fdisp_destroy(&fdi);
251 	} else {
252 		/* This ticket has already been interrupted */
253 	}
254 }
255 
256 void
257 fiov_init(struct fuse_iov *fiov, size_t size)
258 {
259 	uint32_t msize = FU_AT_LEAST(size);
260 
261 	fiov->len = 0;
262 
263 	fiov->base = malloc(msize, M_FUSEMSG, M_WAITOK | M_ZERO);
264 
265 	fiov->allocated_size = msize;
266 	fiov->credit = fuse_iov_credit;
267 }
268 
269 void
270 fiov_teardown(struct fuse_iov *fiov)
271 {
272 	MPASS(fiov->base != NULL);
273 	free(fiov->base, M_FUSEMSG);
274 }
275 
276 void
277 fiov_adjust(struct fuse_iov *fiov, size_t size)
278 {
279 	if (fiov->allocated_size < size ||
280 	    (fuse_iov_permanent_bufsize >= 0 &&
281 	    fiov->allocated_size - size > fuse_iov_permanent_bufsize &&
282 	    --fiov->credit < 0)) {
283 
284 		fiov->base = realloc(fiov->base, FU_AT_LEAST(size), M_FUSEMSG,
285 		    M_WAITOK | M_ZERO);
286 		if (!fiov->base) {
287 			panic("FUSE: realloc failed");
288 		}
289 		fiov->allocated_size = FU_AT_LEAST(size);
290 		fiov->credit = fuse_iov_credit;
291 		/* Clear data buffer after reallocation */
292 		bzero(fiov->base, size);
293 	} else if (size > fiov->len) {
294 		/* Clear newly extended portion of data buffer */
295 		bzero((char*)fiov->base + fiov->len, size - fiov->len);
296 	}
297 	fiov->len = size;
298 }
299 
300 /* Clear the fiov's data buffer */
301 static void
302 fiov_clear(struct fuse_iov *fiov)
303 {
304 	bzero(fiov->base, fiov->len);
305 }
306 
307 /* Resize the fiov if needed, and clear it's buffer */
308 void
309 fiov_refresh(struct fuse_iov *fiov)
310 {
311 	fiov_adjust(fiov, 0);
312 }
313 
314 static int
315 fticket_ctor(void *mem, int size, void *arg, int flags)
316 {
317 	struct fuse_ticket *ftick = mem;
318 	struct fuse_data *data = arg;
319 
320 	FUSE_ASSERT_MS_DONE(ftick);
321 	FUSE_ASSERT_AW_DONE(ftick);
322 
323 	ftick->tk_data = data;
324 
325 	if (ftick->tk_unique != 0)
326 		fticket_refresh(ftick);
327 
328 	/* May be truncated to 32 bits */
329 	ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1);
330 	if (ftick->tk_unique == 0)
331 		ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1);
332 
333 	ftick->irq_unique = 0;
334 
335 	refcount_init(&ftick->tk_refcount, 1);
336 	atomic_add_acq_int(&fuse_ticket_count, 1);
337 
338 	return 0;
339 }
340 
341 static void
342 fticket_dtor(void *mem, int size, void *arg)
343 {
344 #ifdef INVARIANTS
345 	struct fuse_ticket *ftick = mem;
346 #endif
347 
348 	FUSE_ASSERT_MS_DONE(ftick);
349 	FUSE_ASSERT_AW_DONE(ftick);
350 
351 	atomic_subtract_acq_int(&fuse_ticket_count, 1);
352 }
353 
354 static int
355 fticket_init(void *mem, int size, int flags)
356 {
357 	struct fuse_ticket *ftick = mem;
358 
359 	bzero(ftick, sizeof(struct fuse_ticket));
360 
361 	fiov_init(&ftick->tk_ms_fiov, sizeof(struct fuse_in_header));
362 	ftick->tk_ms_type = FT_M_FIOV;
363 
364 	mtx_init(&ftick->tk_aw_mtx, "fuse answer delivery mutex", NULL, MTX_DEF);
365 	fiov_init(&ftick->tk_aw_fiov, 0);
366 	ftick->tk_aw_type = FT_A_FIOV;
367 
368 	return 0;
369 }
370 
371 static void
372 fticket_fini(void *mem, int size)
373 {
374 	struct fuse_ticket *ftick = mem;
375 
376 	fiov_teardown(&ftick->tk_ms_fiov);
377 	fiov_teardown(&ftick->tk_aw_fiov);
378 	mtx_destroy(&ftick->tk_aw_mtx);
379 }
380 
381 static inline struct fuse_ticket *
382 fticket_alloc(struct fuse_data *data)
383 {
384 	return uma_zalloc_arg(ticket_zone, data, M_WAITOK);
385 }
386 
387 static inline void
388 fticket_destroy(struct fuse_ticket *ftick)
389 {
390 	return uma_zfree(ticket_zone, ftick);
391 }
392 
393 static inline
394 void
395 fticket_refresh(struct fuse_ticket *ftick)
396 {
397 	FUSE_ASSERT_MS_DONE(ftick);
398 	FUSE_ASSERT_AW_DONE(ftick);
399 
400 	fiov_refresh(&ftick->tk_ms_fiov);
401 	ftick->tk_ms_bufdata = NULL;
402 	ftick->tk_ms_bufsize = 0;
403 	ftick->tk_ms_type = FT_M_FIOV;
404 
405 	bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header));
406 
407 	fiov_refresh(&ftick->tk_aw_fiov);
408 	ftick->tk_aw_errno = 0;
409 	ftick->tk_aw_bufdata = NULL;
410 	ftick->tk_aw_bufsize = 0;
411 	ftick->tk_aw_type = FT_A_FIOV;
412 
413 	ftick->tk_flag = 0;
414 }
415 
416 /* Prepar the ticket to be reused, but don't clear its data buffers */
417 static inline void
418 fticket_reset(struct fuse_ticket *ftick)
419 {
420 	FUSE_ASSERT_MS_DONE(ftick);
421 	FUSE_ASSERT_AW_DONE(ftick);
422 
423 	ftick->tk_ms_bufdata = NULL;
424 	ftick->tk_ms_bufsize = 0;
425 	ftick->tk_ms_type = FT_M_FIOV;
426 
427 	bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header));
428 
429 	ftick->tk_aw_errno = 0;
430 	ftick->tk_aw_bufdata = NULL;
431 	ftick->tk_aw_bufsize = 0;
432 	ftick->tk_aw_type = FT_A_FIOV;
433 
434 	ftick->tk_flag = 0;
435 }
436 
437 static int
438 fticket_wait_answer(struct fuse_ticket *ftick)
439 {
440 	struct thread *td = curthread;
441 	sigset_t blockedset, oldset;
442 	int err = 0, stops_deferred;
443 	struct fuse_data *data;
444 
445 	if (fsess_isimpl(ftick->tk_data->mp, FUSE_INTERRUPT)) {
446 		SIGEMPTYSET(blockedset);
447 	} else {
448 		/* May as well block all signals */
449 		SIGFILLSET(blockedset);
450 		SIGDELSET(blockedset, SIGKILL);
451 	}
452 	stops_deferred = sigdeferstop(SIGDEFERSTOP_SILENT);
453 	kern_sigprocmask(td, SIG_BLOCK, NULL, &oldset, 0);
454 
455 	fuse_lck_mtx_lock(ftick->tk_aw_mtx);
456 
457 retry:
458 	if (fticket_answered(ftick)) {
459 		goto out;
460 	}
461 	data = ftick->tk_data;
462 
463 	if (fdata_get_dead(data)) {
464 		err = ENOTCONN;
465 		fticket_set_answered(ftick);
466 		goto out;
467 	}
468 	kern_sigprocmask(td, SIG_BLOCK, &blockedset, NULL, 0);
469 	err = msleep(ftick, &ftick->tk_aw_mtx, PCATCH, "fu_ans",
470 	    data->daemon_timeout * hz);
471 	kern_sigprocmask(td, SIG_SETMASK, &oldset, NULL, 0);
472 	if (err == EWOULDBLOCK) {
473 		SDT_PROBE2(fusefs, , ipc, trace, 3,
474 			"fticket_wait_answer: EWOULDBLOCK");
475 #ifdef XXXIP				/* die conditionally */
476 		if (!fdata_get_dead(data)) {
477 			fdata_set_dead(data);
478 		}
479 #endif
480 		err = ETIMEDOUT;
481 		fticket_set_answered(ftick);
482 	} else if ((err == EINTR || err == ERESTART)) {
483 		/*
484 		 * Whether we get EINTR or ERESTART depends on whether
485 		 * SA_RESTART was set by sigaction(2).
486 		 *
487 		 * Try to interrupt the operation and wait for an EINTR response
488 		 * to the original operation.  If the file system does not
489 		 * support FUSE_INTERRUPT, then we'll just wait for it to
490 		 * complete like normal.  If it does support FUSE_INTERRUPT,
491 		 * then it will either respond EINTR to the original operation,
492 		 * or EAGAIN to the interrupt.
493 		 */
494 		int sig;
495 		bool fatal;
496 
497 		SDT_PROBE2(fusefs, , ipc, trace, 4,
498 			"fticket_wait_answer: interrupt");
499 		fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
500 		fuse_interrupt_send(ftick, err);
501 
502 		PROC_LOCK(td->td_proc);
503 		mtx_lock(&td->td_proc->p_sigacts->ps_mtx);
504 		sig = cursig(td);
505 		fatal = sig_isfatal(td->td_proc, sig);
506 		mtx_unlock(&td->td_proc->p_sigacts->ps_mtx);
507 		PROC_UNLOCK(td->td_proc);
508 
509 		fuse_lck_mtx_lock(ftick->tk_aw_mtx);
510 		if (!fatal) {
511 			/*
512 			 * Block the just-delivered signal while we wait for an
513 			 * interrupt response
514 			 */
515 			SIGADDSET(blockedset, sig);
516 			goto retry;
517 		} else {
518 			/* Return immediately for fatal signals */
519 		}
520 	} else if (err) {
521 		SDT_PROBE2(fusefs, , ipc, trace, 6,
522 			"fticket_wait_answer: other error");
523 	} else {
524 		SDT_PROBE2(fusefs, , ipc, trace, 7, "fticket_wait_answer: OK");
525 	}
526 out:
527 	if (!(err || fticket_answered(ftick))) {
528 		SDT_PROBE2(fusefs, , ipc, trace, 1,
529 			"FUSE: requester was woken up but still no answer");
530 		err = ENXIO;
531 	}
532 	fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
533 	sigallowstop(stops_deferred);
534 
535 	return err;
536 }
537 
538 static	inline
539 int
540 fticket_aw_pull_uio(struct fuse_ticket *ftick, struct uio *uio)
541 {
542 	int err = 0;
543 	size_t len = uio_resid(uio);
544 
545 	if (len) {
546 		switch (ftick->tk_aw_type) {
547 		case FT_A_FIOV:
548 			fiov_adjust(fticket_resp(ftick), len);
549 			err = uiomove(fticket_resp(ftick)->base, len, uio);
550 			break;
551 
552 		case FT_A_BUF:
553 			ftick->tk_aw_bufsize = len;
554 			err = uiomove(ftick->tk_aw_bufdata, len, uio);
555 			break;
556 
557 		default:
558 			panic("FUSE: unknown answer type for ticket %p", ftick);
559 		}
560 	}
561 	return err;
562 }
563 
564 int
565 fticket_pull(struct fuse_ticket *ftick, struct uio *uio)
566 {
567 	int err = 0;
568 
569 	if (ftick->tk_aw_ohead.error) {
570 		return 0;
571 	}
572 	err = fuse_body_audit(ftick, uio_resid(uio));
573 	if (!err) {
574 		err = fticket_aw_pull_uio(ftick, uio);
575 	}
576 	return err;
577 }
578 
579 struct fuse_data *
580 fdata_alloc(struct cdev *fdev, struct ucred *cred)
581 {
582 	struct fuse_data *data;
583 
584 	data = malloc(sizeof(struct fuse_data), M_FUSEMSG, M_WAITOK | M_ZERO);
585 
586 	data->fdev = fdev;
587 	mtx_init(&data->ms_mtx, "fuse message list mutex", NULL, MTX_DEF);
588 	STAILQ_INIT(&data->ms_head);
589 	knlist_init_mtx(&data->ks_rsel.si_note, &data->ms_mtx);
590 	mtx_init(&data->aw_mtx, "fuse answer list mutex", NULL, MTX_DEF);
591 	TAILQ_INIT(&data->aw_head);
592 	data->daemoncred = crhold(cred);
593 	data->daemon_timeout = FUSE_DEFAULT_DAEMON_TIMEOUT;
594 	sx_init(&data->rename_lock, "fuse rename lock");
595 	data->ref = 1;
596 
597 	return data;
598 }
599 
600 void
601 fdata_trydestroy(struct fuse_data *data)
602 {
603 	data->ref--;
604 	MPASS(data->ref >= 0);
605 	if (data->ref != 0)
606 		return;
607 
608 	/* Driving off stage all that stuff thrown at device... */
609 	sx_destroy(&data->rename_lock);
610 	crfree(data->daemoncred);
611 	mtx_destroy(&data->aw_mtx);
612 	knlist_delete(&data->ks_rsel.si_note, curthread, 0);
613 	knlist_destroy(&data->ks_rsel.si_note);
614 	mtx_destroy(&data->ms_mtx);
615 
616 	free(data, M_FUSEMSG);
617 }
618 
619 void
620 fdata_set_dead(struct fuse_data *data)
621 {
622 	FUSE_LOCK();
623 	if (fdata_get_dead(data)) {
624 		FUSE_UNLOCK();
625 		return;
626 	}
627 	fuse_lck_mtx_lock(data->ms_mtx);
628 	data->dataflags |= FSESS_DEAD;
629 	wakeup_one(data);
630 	selwakeuppri(&data->ks_rsel, PZERO + 1);
631 	wakeup(&data->ticketer);
632 	fuse_lck_mtx_unlock(data->ms_mtx);
633 	FUSE_UNLOCK();
634 }
635 
636 struct fuse_ticket *
637 fuse_ticket_fetch(struct fuse_data *data)
638 {
639 	int err = 0;
640 	struct fuse_ticket *ftick;
641 
642 	ftick = fticket_alloc(data);
643 
644 	if (!(data->dataflags & FSESS_INITED)) {
645 		/* Sleep until get answer for INIT messsage */
646 		FUSE_LOCK();
647 		if (!(data->dataflags & FSESS_INITED) && data->ticketer > 2) {
648 			err = msleep(&data->ticketer, &fuse_mtx, PCATCH | PDROP,
649 			    "fu_ini", 0);
650 			if (err)
651 				fdata_set_dead(data);
652 		} else
653 			FUSE_UNLOCK();
654 	}
655 	return ftick;
656 }
657 
658 int
659 fuse_ticket_drop(struct fuse_ticket *ftick)
660 {
661 	int die;
662 
663 	die = refcount_release(&ftick->tk_refcount);
664 	if (die)
665 		fticket_destroy(ftick);
666 
667 	return die;
668 }
669 
670 void
671 fuse_insert_callback(struct fuse_ticket *ftick, fuse_handler_t * handler)
672 {
673 	if (fdata_get_dead(ftick->tk_data)) {
674 		return;
675 	}
676 	ftick->tk_aw_handler = handler;
677 
678 	fuse_lck_mtx_lock(ftick->tk_data->aw_mtx);
679 	fuse_aw_push(ftick);
680 	fuse_lck_mtx_unlock(ftick->tk_data->aw_mtx);
681 }
682 
683 /*
684  * Insert a new upgoing ticket into the message queue
685  *
686  * If urgent is true, insert at the front of the queue.  Otherwise, insert in
687  * FIFO order.
688  */
689 void
690 fuse_insert_message(struct fuse_ticket *ftick, bool urgent)
691 {
692 	if (ftick->tk_flag & FT_DIRTY) {
693 		panic("FUSE: ticket reused without being refreshed");
694 	}
695 	ftick->tk_flag |= FT_DIRTY;
696 
697 	if (fdata_get_dead(ftick->tk_data)) {
698 		return;
699 	}
700 	fuse_lck_mtx_lock(ftick->tk_data->ms_mtx);
701 	if (urgent)
702 		fuse_ms_push_head(ftick);
703 	else
704 		fuse_ms_push(ftick);
705 	wakeup_one(ftick->tk_data);
706 	selwakeuppri(&ftick->tk_data->ks_rsel, PZERO + 1);
707 	KNOTE_LOCKED(&ftick->tk_data->ks_rsel.si_note, 0);
708 	fuse_lck_mtx_unlock(ftick->tk_data->ms_mtx);
709 }
710 
711 static int
712 fuse_body_audit(struct fuse_ticket *ftick, size_t blen)
713 {
714 	int err = 0;
715 	enum fuse_opcode opcode;
716 
717 	opcode = fticket_opcode(ftick);
718 
719 	switch (opcode) {
720 	case FUSE_LOOKUP:
721 		err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL;
722 		break;
723 
724 	case FUSE_FORGET:
725 		panic("FUSE: a handler has been intalled for FUSE_FORGET");
726 		break;
727 
728 	case FUSE_GETATTR:
729 		err = (blen == sizeof(struct fuse_attr_out)) ? 0 : EINVAL;
730 		break;
731 
732 	case FUSE_SETATTR:
733 		err = (blen == sizeof(struct fuse_attr_out)) ? 0 : EINVAL;
734 		break;
735 
736 	case FUSE_READLINK:
737 		err = (PAGE_SIZE >= blen) ? 0 : EINVAL;
738 		break;
739 
740 	case FUSE_SYMLINK:
741 		err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL;
742 		break;
743 
744 	case FUSE_MKNOD:
745 		err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL;
746 		break;
747 
748 	case FUSE_MKDIR:
749 		err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL;
750 		break;
751 
752 	case FUSE_UNLINK:
753 		err = (blen == 0) ? 0 : EINVAL;
754 		break;
755 
756 	case FUSE_RMDIR:
757 		err = (blen == 0) ? 0 : EINVAL;
758 		break;
759 
760 	case FUSE_RENAME:
761 		err = (blen == 0) ? 0 : EINVAL;
762 		break;
763 
764 	case FUSE_LINK:
765 		err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL;
766 		break;
767 
768 	case FUSE_OPEN:
769 		err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL;
770 		break;
771 
772 	case FUSE_READ:
773 		err = (((struct fuse_read_in *)(
774 		    (char *)ftick->tk_ms_fiov.base +
775 		    sizeof(struct fuse_in_header)
776 		    ))->size >= blen) ? 0 : EINVAL;
777 		break;
778 
779 	case FUSE_WRITE:
780 		err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL;
781 		break;
782 
783 	case FUSE_STATFS:
784 		if (fuse_libabi_geq(ftick->tk_data, 7, 4)) {
785 			err = (blen == sizeof(struct fuse_statfs_out)) ?
786 			  0 : EINVAL;
787 		} else {
788 			err = (blen == FUSE_COMPAT_STATFS_SIZE) ? 0 : EINVAL;
789 		}
790 		break;
791 
792 	case FUSE_RELEASE:
793 		err = (blen == 0) ? 0 : EINVAL;
794 		break;
795 
796 	case FUSE_FSYNC:
797 		err = (blen == 0) ? 0 : EINVAL;
798 		break;
799 
800 	case FUSE_SETXATTR:
801 		err = (blen == 0) ? 0 : EINVAL;
802 		break;
803 
804 	case FUSE_GETXATTR:
805 	case FUSE_LISTXATTR:
806 		/*
807 		 * These can have varying response lengths, and 0 length
808 		 * isn't necessarily invalid.
809 		 */
810 		err = 0;
811 		break;
812 
813 	case FUSE_REMOVEXATTR:
814 		err = (blen == 0) ? 0 : EINVAL;
815 		break;
816 
817 	case FUSE_FLUSH:
818 		err = (blen == 0) ? 0 : EINVAL;
819 		break;
820 
821 	case FUSE_INIT:
822 		if (blen == sizeof(struct fuse_init_out) || blen == 8) {
823 			err = 0;
824 		} else {
825 			err = EINVAL;
826 		}
827 		break;
828 
829 	case FUSE_OPENDIR:
830 		err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL;
831 		break;
832 
833 	case FUSE_READDIR:
834 		err = (((struct fuse_read_in *)(
835 		    (char *)ftick->tk_ms_fiov.base +
836 		    sizeof(struct fuse_in_header)
837 		    ))->size >= blen) ? 0 : EINVAL;
838 		break;
839 
840 	case FUSE_RELEASEDIR:
841 		err = (blen == 0) ? 0 : EINVAL;
842 		break;
843 
844 	case FUSE_FSYNCDIR:
845 		err = (blen == 0) ? 0 : EINVAL;
846 		break;
847 
848 	case FUSE_GETLK:
849 		err = (blen == sizeof(struct fuse_lk_out)) ? 0 : EINVAL;
850 		break;
851 
852 	case FUSE_SETLK:
853 		err = (blen == 0) ? 0 : EINVAL;
854 		break;
855 
856 	case FUSE_SETLKW:
857 		err = (blen == 0) ? 0 : EINVAL;
858 		break;
859 
860 	case FUSE_ACCESS:
861 		err = (blen == 0) ? 0 : EINVAL;
862 		break;
863 
864 	case FUSE_CREATE:
865 		err = (blen == sizeof(struct fuse_entry_out) +
866 		    sizeof(struct fuse_open_out)) ? 0 : EINVAL;
867 		break;
868 
869 	case FUSE_DESTROY:
870 		err = (blen == 0) ? 0 : EINVAL;
871 		break;
872 
873 	default:
874 		panic("FUSE: opcodes out of sync (%d)\n", opcode);
875 	}
876 
877 	return err;
878 }
879 
880 static inline void
881 fuse_setup_ihead(struct fuse_in_header *ihead, struct fuse_ticket *ftick,
882     uint64_t nid, enum fuse_opcode op, size_t blen, pid_t pid,
883     struct ucred *cred)
884 {
885 	ihead->len = sizeof(*ihead) + blen;
886 	ihead->unique = ftick->tk_unique;
887 	ihead->nodeid = nid;
888 	ihead->opcode = op;
889 
890 	ihead->pid = pid;
891 	ihead->uid = cred->cr_uid;
892 	ihead->gid = cred->cr_groups[0];
893 }
894 
895 /*
896  * fuse_standard_handler just pulls indata and wakes up pretender.
897  * Doesn't try to interpret data, that's left for the pretender.
898  * Though might do a basic size verification before the pull-in takes place
899  */
900 
901 static int
902 fuse_standard_handler(struct fuse_ticket *ftick, struct uio *uio)
903 {
904 	int err = 0;
905 
906 	err = fticket_pull(ftick, uio);
907 
908 	fuse_lck_mtx_lock(ftick->tk_aw_mtx);
909 
910 	if (!fticket_answered(ftick)) {
911 		fticket_set_answered(ftick);
912 		ftick->tk_aw_errno = err;
913 		wakeup(ftick);
914 	}
915 	fuse_lck_mtx_unlock(ftick->tk_aw_mtx);
916 
917 	return err;
918 }
919 
920 /*
921  * Reinitialize a dispatcher from a pid and node id, without resizing or
922  * clearing its data buffers
923  */
924 static void
925 fdisp_refresh_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
926     struct mount *mp, uint64_t nid, pid_t pid, struct ucred *cred)
927 {
928 	MPASS(fdip->tick);
929 	MPASS2(sizeof(fdip->finh) + fdip->iosize <= fdip->tick->tk_ms_fiov.len,
930 		"Must use fdisp_make_pid to increase the size of the fiov");
931 	fticket_reset(fdip->tick);
932 
933 	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
934 	    fdip->indata, fdip->iosize);
935 
936 	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid,
937 		cred);
938 }
939 
940 /* Initialize a dispatcher from a pid and node id */
941 static void
942 fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op,
943     struct fuse_data *data, uint64_t nid, pid_t pid, struct ucred *cred)
944 {
945 	if (fdip->tick) {
946 		fticket_refresh(fdip->tick);
947 	} else {
948 		fdip->tick = fuse_ticket_fetch(data);
949 	}
950 
951 	/* FUSE_DIMALLOC will bzero the fiovs when it enlarges them */
952 	FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh,
953 	    fdip->indata, fdip->iosize);
954 
955 	fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid, cred);
956 }
957 
958 void
959 fdisp_make(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct mount *mp,
960     uint64_t nid, struct thread *td, struct ucred *cred)
961 {
962 	struct fuse_data *data = fuse_get_mpdata(mp);
963 	RECTIFY_TDCR(td, cred);
964 
965 	return fdisp_make_pid(fdip, op, data, nid, td->td_proc->p_pid, cred);
966 }
967 
968 void
969 fdisp_make_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
970     struct vnode *vp, struct thread *td, struct ucred *cred)
971 {
972 	struct mount *mp = vnode_mount(vp);
973 	struct fuse_data *data = fuse_get_mpdata(mp);
974 
975 	RECTIFY_TDCR(td, cred);
976 	return fdisp_make_pid(fdip, op, data, VTOI(vp),
977 	    td->td_proc->p_pid, cred);
978 }
979 
980 /* Refresh a fuse_dispatcher so it can be reused, but don't zero its data */
981 void
982 fdisp_refresh_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op,
983     struct vnode *vp, struct thread *td, struct ucred *cred)
984 {
985 	RECTIFY_TDCR(td, cred);
986 	return fdisp_refresh_pid(fdip, op, vnode_mount(vp), VTOI(vp),
987 	    td->td_proc->p_pid, cred);
988 }
989 
990 void
991 fdisp_refresh(struct fuse_dispatcher *fdip)
992 {
993 	fticket_refresh(fdip->tick);
994 }
995 
996 SDT_PROBE_DEFINE2(fusefs, , ipc, fdisp_wait_answ_error, "char*", "int");
997 
998 int
999 fdisp_wait_answ(struct fuse_dispatcher *fdip)
1000 {
1001 	int err = 0;
1002 
1003 	fdip->answ_stat = 0;
1004 	fuse_insert_callback(fdip->tick, fuse_standard_handler);
1005 	fuse_insert_message(fdip->tick, false);
1006 
1007 	if ((err = fticket_wait_answer(fdip->tick))) {
1008 		fuse_lck_mtx_lock(fdip->tick->tk_aw_mtx);
1009 
1010 		if (fticket_answered(fdip->tick)) {
1011 			/*
1012 	                 * Just between noticing the interrupt and getting here,
1013 	                 * the standard handler has completed his job.
1014 	                 * So we drop the ticket and exit as usual.
1015 	                 */
1016 			SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1017 				"IPC: interrupted, already answered", err);
1018 			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
1019 			goto out;
1020 		} else {
1021 			/*
1022 	                 * So we were faster than the standard handler.
1023 	                 * Then by setting the answered flag we get *him*
1024 	                 * to drop the ticket.
1025 	                 */
1026 			SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1027 				"IPC: interrupted, setting to answered", err);
1028 			fticket_set_answered(fdip->tick);
1029 			fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx);
1030 			return err;
1031 		}
1032 	}
1033 
1034 	if (fdip->tick->tk_aw_errno == ENOTCONN) {
1035 		/* The daemon died while we were waiting for a response */
1036 		err = ENOTCONN;
1037 		goto out;
1038 	} else if (fdip->tick->tk_aw_errno) {
1039 		/*
1040 		 * There was some sort of communication error with the daemon
1041 		 * that the client wouldn't understand.
1042 		 */
1043 		SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1044 			"IPC: explicit EIO-ing", fdip->tick->tk_aw_errno);
1045 		err = EIO;
1046 		goto out;
1047 	}
1048 	if ((err = fdip->tick->tk_aw_ohead.error)) {
1049 		SDT_PROBE2(fusefs, , ipc, fdisp_wait_answ_error,
1050 			"IPC: setting status", fdip->tick->tk_aw_ohead.error);
1051 		/*
1052 	         * This means a "proper" fuse syscall error.
1053 	         * We record this value so the caller will
1054 	         * be able to know it's not a boring messaging
1055 	         * failure, if she wishes so (and if not, she can
1056 	         * just simply propagate the return value of this routine).
1057 	         * [XXX Maybe a bitflag would do the job too,
1058 	         * if other flags needed, this will be converted thusly.]
1059 	         */
1060 		fdip->answ_stat = err;
1061 		goto out;
1062 	}
1063 	fdip->answ = fticket_resp(fdip->tick)->base;
1064 	fdip->iosize = fticket_resp(fdip->tick)->len;
1065 
1066 	return 0;
1067 
1068 out:
1069 	return err;
1070 }
1071 
1072 void
1073 fuse_ipc_init(void)
1074 {
1075 	ticket_zone = uma_zcreate("fuse_ticket", sizeof(struct fuse_ticket),
1076 	    fticket_ctor, fticket_dtor, fticket_init, fticket_fini,
1077 	    UMA_ALIGN_PTR, 0);
1078 }
1079 
1080 void
1081 fuse_ipc_destroy(void)
1082 {
1083 	uma_zdestroy(ticket_zone);
1084 }
1085