xref: /freebsd/sys/fs/fuse/fuse_internal.c (revision 206b73d0)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2007-2009 Google Inc. and Amit Singh
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  * * Redistributions of source code must retain the above copyright
12  *   notice, this list of conditions and the following disclaimer.
13  * * Redistributions in binary form must reproduce the above
14  *   copyright notice, this list of conditions and the following disclaimer
15  *   in the documentation and/or other materials provided with the
16  *   distribution.
17  * * Neither the name of Google Inc. nor the names of its
18  *   contributors may be used to endorse or promote products derived from
19  *   this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Copyright (C) 2005 Csaba Henk.
34  * All rights reserved.
35  *
36  * Copyright (c) 2019 The FreeBSD Foundation
37  *
38  * Portions of this software were developed by BFF Storage Systems, LLC under
39  * sponsorship from the FreeBSD Foundation.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  *
50  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  */
62 
63 #include <sys/cdefs.h>
64 __FBSDID("$FreeBSD$");
65 
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/counter.h>
69 #include <sys/module.h>
70 #include <sys/errno.h>
71 #include <sys/kernel.h>
72 #include <sys/conf.h>
73 #include <sys/uio.h>
74 #include <sys/malloc.h>
75 #include <sys/queue.h>
76 #include <sys/lock.h>
77 #include <sys/mutex.h>
78 #include <sys/sdt.h>
79 #include <sys/sx.h>
80 #include <sys/proc.h>
81 #include <sys/mount.h>
82 #include <sys/vnode.h>
83 #include <sys/namei.h>
84 #include <sys/stat.h>
85 #include <sys/unistd.h>
86 #include <sys/filedesc.h>
87 #include <sys/file.h>
88 #include <sys/fcntl.h>
89 #include <sys/dirent.h>
90 #include <sys/bio.h>
91 #include <sys/buf.h>
92 #include <sys/sysctl.h>
93 #include <sys/priv.h>
94 
95 #include "fuse.h"
96 #include "fuse_file.h"
97 #include "fuse_internal.h"
98 #include "fuse_io.h"
99 #include "fuse_ipc.h"
100 #include "fuse_node.h"
101 #include "fuse_file.h"
102 
103 SDT_PROVIDER_DECLARE(fusefs);
104 /*
105  * Fuse trace probe:
106  * arg0: verbosity.  Higher numbers give more verbose messages
107  * arg1: Textual message
108  */
109 SDT_PROBE_DEFINE2(fusefs, , internal, trace, "int", "char*");
110 
111 #ifdef ZERO_PAD_INCOMPLETE_BUFS
112 static int isbzero(void *buf, size_t len);
113 
114 #endif
115 
116 counter_u64_t fuse_lookup_cache_hits;
117 counter_u64_t fuse_lookup_cache_misses;
118 
119 SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, lookup_cache_hits, CTLFLAG_RD,
120     &fuse_lookup_cache_hits, "number of positive cache hits in lookup");
121 
122 SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, lookup_cache_misses, CTLFLAG_RD,
123     &fuse_lookup_cache_misses, "number of cache misses in lookup");
124 
125 int
126 fuse_internal_get_cached_vnode(struct mount* mp, ino_t ino, int flags,
127 	struct vnode **vpp)
128 {
129 	struct bintime now;
130 	struct thread *td = curthread;
131 	uint64_t nodeid = ino;
132 	int error;
133 
134 	*vpp = NULL;
135 
136 	error = vfs_hash_get(mp, fuse_vnode_hash(nodeid), flags, td, vpp,
137 	    fuse_vnode_cmp, &nodeid);
138 	if (error)
139 		return error;
140 	/*
141 	 * Check the entry cache timeout.  We have to do this within fusefs
142 	 * instead of by using cache_enter_time/cache_lookup because those
143 	 * routines are only intended to work with pathnames, not inodes
144 	 */
145 	if (*vpp != NULL) {
146 		getbinuptime(&now);
147 		if (bintime_cmp(&(VTOFUD(*vpp)->entry_cache_timeout), &now, >)){
148 			counter_u64_add(fuse_lookup_cache_hits, 1);
149 			return 0;
150 		} else {
151 			/* Entry cache timeout */
152 			counter_u64_add(fuse_lookup_cache_misses, 1);
153 			cache_purge(*vpp);
154 			vput(*vpp);
155 			*vpp = NULL;
156 		}
157 	}
158 	return 0;
159 }
160 
161 /* Synchronously send a FUSE_ACCESS operation */
162 int
163 fuse_internal_access(struct vnode *vp,
164     accmode_t mode,
165     struct thread *td,
166     struct ucred *cred)
167 {
168 	int err = 0;
169 	uint32_t mask = F_OK;
170 	int dataflags;
171 	int vtype;
172 	struct mount *mp;
173 	struct fuse_dispatcher fdi;
174 	struct fuse_access_in *fai;
175 	struct fuse_data *data;
176 
177 	mp = vnode_mount(vp);
178 	vtype = vnode_vtype(vp);
179 
180 	data = fuse_get_mpdata(mp);
181 	dataflags = data->dataflags;
182 
183 	if (mode == 0)
184 		return 0;
185 
186 	if (mode & VMODIFY_PERMS && vfs_isrdonly(mp)) {
187 		switch (vp->v_type) {
188 		case VDIR:
189 			/* FALLTHROUGH */
190 		case VLNK:
191 			/* FALLTHROUGH */
192 		case VREG:
193 			return EROFS;
194 		default:
195 			break;
196 		}
197 	}
198 
199 	/* Unless explicitly permitted, deny everyone except the fs owner. */
200 	if (!(dataflags & FSESS_DAEMON_CAN_SPY)) {
201 		if (fuse_match_cred(data->daemoncred, cred))
202 			return EPERM;
203 	}
204 
205 	if (dataflags & FSESS_DEFAULT_PERMISSIONS) {
206 		struct vattr va;
207 
208 		fuse_internal_getattr(vp, &va, cred, td);
209 		return vaccess(vp->v_type, va.va_mode, va.va_uid,
210 		    va.va_gid, mode, cred, NULL);
211 	}
212 
213 	if (!fsess_isimpl(mp, FUSE_ACCESS))
214 		return 0;
215 
216 	if ((mode & (VWRITE | VAPPEND | VADMIN)) != 0)
217 		mask |= W_OK;
218 	if ((mode & VREAD) != 0)
219 		mask |= R_OK;
220 	if ((mode & VEXEC) != 0)
221 		mask |= X_OK;
222 
223 	fdisp_init(&fdi, sizeof(*fai));
224 	fdisp_make_vp(&fdi, FUSE_ACCESS, vp, td, cred);
225 
226 	fai = fdi.indata;
227 	fai->mask = mask;
228 
229 	err = fdisp_wait_answ(&fdi);
230 	fdisp_destroy(&fdi);
231 
232 	if (err == ENOSYS) {
233 		fsess_set_notimpl(mp, FUSE_ACCESS);
234 		err = 0;
235 	}
236 	return err;
237 }
238 
239 /*
240  * Cache FUSE attributes from attr, in attribute cache associated with vnode
241  * 'vp'.  Optionally, if argument 'vap' is not NULL, store a copy of the
242  * converted attributes there as well.
243  *
244  * If the nominal attribute cache TTL is zero, do not cache on the 'vp' (but do
245  * return the result to the caller).
246  */
247 void
248 fuse_internal_cache_attrs(struct vnode *vp, struct fuse_attr *attr,
249 	uint64_t attr_valid, uint32_t attr_valid_nsec, struct vattr *vap)
250 {
251 	struct mount *mp;
252 	struct fuse_vnode_data *fvdat;
253 	struct fuse_data *data;
254 	struct vattr *vp_cache_at;
255 
256 	mp = vnode_mount(vp);
257 	fvdat = VTOFUD(vp);
258 	data = fuse_get_mpdata(mp);
259 
260 	ASSERT_VOP_ELOCKED(vp, "fuse_internal_cache_attrs");
261 
262 	fuse_validity_2_bintime(attr_valid, attr_valid_nsec,
263 		&fvdat->attr_cache_timeout);
264 
265 	/* Fix our buffers if the filesize changed without us knowing */
266 	if (vnode_isreg(vp) && attr->size != fvdat->cached_attrs.va_size) {
267 		(void)fuse_vnode_setsize(vp, attr->size);
268 		fvdat->cached_attrs.va_size = attr->size;
269 	}
270 
271 	if (attr_valid > 0 || attr_valid_nsec > 0)
272 		vp_cache_at = &(fvdat->cached_attrs);
273 	else if (vap != NULL)
274 		vp_cache_at = vap;
275 	else
276 		return;
277 
278 	vattr_null(vp_cache_at);
279 	vp_cache_at->va_fsid = mp->mnt_stat.f_fsid.val[0];
280 	vp_cache_at->va_fileid = attr->ino;
281 	vp_cache_at->va_mode = attr->mode & ~S_IFMT;
282 	vp_cache_at->va_nlink     = attr->nlink;
283 	vp_cache_at->va_uid       = attr->uid;
284 	vp_cache_at->va_gid       = attr->gid;
285 	vp_cache_at->va_rdev      = attr->rdev;
286 	vp_cache_at->va_size      = attr->size;
287 	/* XXX on i386, seconds are truncated to 32 bits */
288 	vp_cache_at->va_atime.tv_sec  = attr->atime;
289 	vp_cache_at->va_atime.tv_nsec = attr->atimensec;
290 	vp_cache_at->va_mtime.tv_sec  = attr->mtime;
291 	vp_cache_at->va_mtime.tv_nsec = attr->mtimensec;
292 	vp_cache_at->va_ctime.tv_sec  = attr->ctime;
293 	vp_cache_at->va_ctime.tv_nsec = attr->ctimensec;
294 	if (fuse_libabi_geq(data, 7, 9) && attr->blksize > 0)
295 		vp_cache_at->va_blocksize = attr->blksize;
296 	else
297 		vp_cache_at->va_blocksize = PAGE_SIZE;
298 	vp_cache_at->va_type = IFTOVT(attr->mode);
299 	vp_cache_at->va_bytes = attr->blocks * S_BLKSIZE;
300 	vp_cache_at->va_flags = 0;
301 
302 	if (vap != vp_cache_at && vap != NULL)
303 		memcpy(vap, vp_cache_at, sizeof(*vap));
304 }
305 
306 
307 /* fsync */
308 
309 int
310 fuse_internal_fsync_callback(struct fuse_ticket *tick, struct uio *uio)
311 {
312 	if (tick->tk_aw_ohead.error == ENOSYS) {
313 		fsess_set_notimpl(tick->tk_data->mp, fticket_opcode(tick));
314 	}
315 	return 0;
316 }
317 
318 int
319 fuse_internal_fsync(struct vnode *vp,
320     struct thread *td,
321     int waitfor,
322     bool datasync)
323 {
324 	struct fuse_fsync_in *ffsi = NULL;
325 	struct fuse_dispatcher fdi;
326 	struct fuse_filehandle *fufh;
327 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
328 	struct mount *mp = vnode_mount(vp);
329 	int op = FUSE_FSYNC;
330 	int err = 0;
331 
332 	if (!fsess_isimpl(vnode_mount(vp),
333 	    (vnode_vtype(vp) == VDIR ? FUSE_FSYNCDIR : FUSE_FSYNC))) {
334 		return 0;
335 	}
336 	if (vnode_isdir(vp))
337 		op = FUSE_FSYNCDIR;
338 
339 	if (!fsess_isimpl(mp, op))
340 		return 0;
341 
342 	fdisp_init(&fdi, sizeof(*ffsi));
343 	/*
344 	 * fsync every open file handle for this file, because we can't be sure
345 	 * which file handle the caller is really referring to.
346 	 */
347 	LIST_FOREACH(fufh, &fvdat->handles, next) {
348 		if (ffsi == NULL)
349 			fdisp_make_vp(&fdi, op, vp, td, NULL);
350 		else
351 			fdisp_refresh_vp(&fdi, op, vp, td, NULL);
352 		ffsi = fdi.indata;
353 		ffsi->fh = fufh->fh_id;
354 		ffsi->fsync_flags = 0;
355 
356 		if (datasync)
357 			ffsi->fsync_flags = 1;
358 
359 		if (waitfor == MNT_WAIT) {
360 			err = fdisp_wait_answ(&fdi);
361 		} else {
362 			fuse_insert_callback(fdi.tick,
363 				fuse_internal_fsync_callback);
364 			fuse_insert_message(fdi.tick, false);
365 		}
366 		if (err == ENOSYS) {
367 			/* ENOSYS means "success, and don't call again" */
368 			fsess_set_notimpl(mp, op);
369 			err = 0;
370 			break;
371 		}
372 	}
373 	fdisp_destroy(&fdi);
374 
375 	return err;
376 }
377 
378 /* Asynchronous invalidation */
379 SDT_PROBE_DEFINE2(fusefs, , internal, invalidate_cache_hit,
380 	"struct vnode*", "struct vnode*");
381 int
382 fuse_internal_invalidate_entry(struct mount *mp, struct uio *uio)
383 {
384 	struct fuse_notify_inval_entry_out fnieo;
385 	struct componentname cn;
386 	struct vnode *dvp, *vp;
387 	char name[PATH_MAX];
388 	int err;
389 
390 	if ((err = uiomove(&fnieo, sizeof(fnieo), uio)) != 0)
391 		return (err);
392 
393 	if ((err = uiomove(name, fnieo.namelen, uio)) != 0)
394 		return (err);
395 	name[fnieo.namelen] = '\0';
396 	/* fusefs does not cache "." or ".." entries */
397 	if (strncmp(name, ".", sizeof(".")) == 0 ||
398 	    strncmp(name, "..", sizeof("..")) == 0)
399 		return (0);
400 
401 	if (fnieo.parent == FUSE_ROOT_ID)
402 		err = VFS_ROOT(mp, LK_SHARED, &dvp);
403 	else
404 		err = fuse_internal_get_cached_vnode( mp, fnieo.parent,
405 			LK_SHARED, &dvp);
406 	/*
407 	 * If dvp is not in the cache, then it must've been reclaimed.  And
408 	 * since fuse_vnop_reclaim does a cache_purge, name's entry must've
409 	 * been invalidated already.  So we can safely return if dvp == NULL
410 	 */
411 	if (err != 0 || dvp == NULL)
412 		return (err);
413 	/*
414 	 * XXX we can't check dvp's generation because the FUSE invalidate
415 	 * entry message doesn't include it.  Worse case is that we invalidate
416 	 * an entry that didn't need to be invalidated.
417 	 */
418 
419 	cn.cn_nameiop = LOOKUP;
420 	cn.cn_flags = 0;	/* !MAKEENTRY means free cached entry */
421 	cn.cn_thread = curthread;
422 	cn.cn_cred = curthread->td_ucred;
423 	cn.cn_lkflags = LK_SHARED;
424 	cn.cn_pnbuf = NULL;
425 	cn.cn_nameptr = name;
426 	cn.cn_namelen = fnieo.namelen;
427 	err = cache_lookup(dvp, &vp, &cn, NULL, NULL);
428 	MPASS(err == 0);
429 	fuse_vnode_clear_attr_cache(dvp);
430 	vput(dvp);
431 	return (0);
432 }
433 
434 int
435 fuse_internal_invalidate_inode(struct mount *mp, struct uio *uio)
436 {
437 	struct fuse_notify_inval_inode_out fniio;
438 	struct vnode *vp;
439 	int err;
440 
441 	if ((err = uiomove(&fniio, sizeof(fniio), uio)) != 0)
442 		return (err);
443 
444 	if (fniio.ino == FUSE_ROOT_ID)
445 		err = VFS_ROOT(mp, LK_EXCLUSIVE, &vp);
446 	else
447 		err = fuse_internal_get_cached_vnode(mp, fniio.ino, LK_SHARED,
448 			&vp);
449 	if (err != 0 || vp == NULL)
450 		return (err);
451 	/*
452 	 * XXX we can't check vp's generation because the FUSE invalidate
453 	 * entry message doesn't include it.  Worse case is that we invalidate
454 	 * an inode that didn't need to be invalidated.
455 	 */
456 
457 	/*
458 	 * Flush and invalidate buffers if off >= 0.  Technically we only need
459 	 * to flush and invalidate the range of offsets [off, off + len), but
460 	 * for simplicity's sake we do everything.
461 	 */
462 	if (fniio.off >= 0)
463 		fuse_io_invalbuf(vp, curthread);
464 	fuse_vnode_clear_attr_cache(vp);
465 	vput(vp);
466 	return (0);
467 }
468 
469 /* mknod */
470 int
471 fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp,
472 	struct componentname *cnp, struct vattr *vap)
473 {
474 	struct fuse_data *data;
475 	struct fuse_mknod_in fmni;
476 	size_t insize;
477 
478 	data = fuse_get_mpdata(dvp->v_mount);
479 
480 	fmni.mode = MAKEIMODE(vap->va_type, vap->va_mode);
481 	fmni.rdev = vap->va_rdev;
482 	if (fuse_libabi_geq(data, 7, 12)) {
483 		insize = sizeof(fmni);
484 		fmni.umask = curthread->td_proc->p_fd->fd_cmask;
485 	} else {
486 		insize = FUSE_COMPAT_MKNOD_IN_SIZE;
487 	}
488 	return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKNOD, &fmni,
489 	    insize, vap->va_type));
490 }
491 
492 /* readdir */
493 
494 int
495 fuse_internal_readdir(struct vnode *vp,
496     struct uio *uio,
497     off_t startoff,
498     struct fuse_filehandle *fufh,
499     struct fuse_iov *cookediov,
500     int *ncookies,
501     u_long *cookies)
502 {
503 	int err = 0;
504 	struct fuse_dispatcher fdi;
505 	struct fuse_read_in *fri = NULL;
506 	int fnd_start;
507 
508 	if (uio_resid(uio) == 0)
509 		return 0;
510 	fdisp_init(&fdi, 0);
511 
512 	/*
513 	 * Note that we DO NOT have a UIO_SYSSPACE here (so no need for p2p
514 	 * I/O).
515 	 */
516 
517 	/*
518 	 * fnd_start is set non-zero once the offset in the directory gets
519 	 * to the startoff.  This is done because directories must be read
520 	 * from the beginning (offset == 0) when fuse_vnop_readdir() needs
521 	 * to do an open of the directory.
522 	 * If it is not set non-zero here, it will be set non-zero in
523 	 * fuse_internal_readdir_processdata() when uio_offset == startoff.
524 	 */
525 	fnd_start = 0;
526 	if (uio->uio_offset == startoff)
527 		fnd_start = 1;
528 	while (uio_resid(uio) > 0) {
529 		fdi.iosize = sizeof(*fri);
530 		if (fri == NULL)
531 			fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
532 		else
533 			fdisp_refresh_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
534 
535 		fri = fdi.indata;
536 		fri->fh = fufh->fh_id;
537 		fri->offset = uio_offset(uio);
538 		fri->size = MIN(uio->uio_resid,
539 		    fuse_get_mpdata(vp->v_mount)->max_read);
540 
541 		if ((err = fdisp_wait_answ(&fdi)))
542 			break;
543 		if ((err = fuse_internal_readdir_processdata(uio, startoff,
544 		    &fnd_start, fri->size, fdi.answ, fdi.iosize, cookediov,
545 		    ncookies, &cookies)))
546 			break;
547 	}
548 
549 	fdisp_destroy(&fdi);
550 	return ((err == -1) ? 0 : err);
551 }
552 
553 /*
554  * Return -1 to indicate that this readdir is finished, 0 if it copied
555  * all the directory data read in and it may be possible to read more
556  * and greater than 0 for a failure.
557  */
558 int
559 fuse_internal_readdir_processdata(struct uio *uio,
560     off_t startoff,
561     int *fnd_start,
562     size_t reqsize,
563     void *buf,
564     size_t bufsize,
565     struct fuse_iov *cookediov,
566     int *ncookies,
567     u_long **cookiesp)
568 {
569 	int err = 0;
570 	int bytesavail;
571 	size_t freclen;
572 
573 	struct dirent *de;
574 	struct fuse_dirent *fudge;
575 	u_long *cookies;
576 
577 	cookies = *cookiesp;
578 	if (bufsize < FUSE_NAME_OFFSET)
579 		return -1;
580 	for (;;) {
581 		if (bufsize < FUSE_NAME_OFFSET) {
582 			err = -1;
583 			break;
584 		}
585 		fudge = (struct fuse_dirent *)buf;
586 		freclen = FUSE_DIRENT_SIZE(fudge);
587 
588 		if (bufsize < freclen) {
589 			/*
590 			 * This indicates a partial directory entry at the
591 			 * end of the directory data.
592 			 */
593 			err = -1;
594 			break;
595 		}
596 #ifdef ZERO_PAD_INCOMPLETE_BUFS
597 		if (isbzero(buf, FUSE_NAME_OFFSET)) {
598 			err = -1;
599 			break;
600 		}
601 #endif
602 
603 		if (!fudge->namelen || fudge->namelen > MAXNAMLEN) {
604 			err = EINVAL;
605 			break;
606 		}
607 		bytesavail = GENERIC_DIRSIZ((struct pseudo_dirent *)
608 					    &fudge->namelen);
609 
610 		if (bytesavail > uio_resid(uio)) {
611 			/* Out of space for the dir so we are done. */
612 			err = -1;
613 			break;
614 		}
615 		/*
616 		 * Don't start to copy the directory entries out until
617 		 * the requested offset in the directory is found.
618 		 */
619 		if (*fnd_start != 0) {
620 			fiov_adjust(cookediov, bytesavail);
621 			bzero(cookediov->base, bytesavail);
622 
623 			de = (struct dirent *)cookediov->base;
624 			de->d_fileno = fudge->ino;
625 			de->d_reclen = bytesavail;
626 			de->d_type = fudge->type;
627 			de->d_namlen = fudge->namelen;
628 			memcpy((char *)cookediov->base + sizeof(struct dirent) -
629 			       MAXNAMLEN - 1,
630 			       (char *)buf + FUSE_NAME_OFFSET, fudge->namelen);
631 			dirent_terminate(de);
632 
633 			err = uiomove(cookediov->base, cookediov->len, uio);
634 			if (err)
635 				break;
636 			if (cookies != NULL) {
637 				if (*ncookies == 0) {
638 					err = -1;
639 					break;
640 				}
641 				*cookies = fudge->off;
642 				cookies++;
643 				(*ncookies)--;
644 			}
645 		} else if (startoff == fudge->off)
646 			*fnd_start = 1;
647 		buf = (char *)buf + freclen;
648 		bufsize -= freclen;
649 		uio_setoffset(uio, fudge->off);
650 	}
651 	*cookiesp = cookies;
652 
653 	return err;
654 }
655 
656 /* remove */
657 
658 int
659 fuse_internal_remove(struct vnode *dvp,
660     struct vnode *vp,
661     struct componentname *cnp,
662     enum fuse_opcode op)
663 {
664 	struct fuse_dispatcher fdi;
665 	nlink_t nlink;
666 	int err = 0;
667 
668 	fdisp_init(&fdi, cnp->cn_namelen + 1);
669 	fdisp_make_vp(&fdi, op, dvp, cnp->cn_thread, cnp->cn_cred);
670 
671 	memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
672 	((char *)fdi.indata)[cnp->cn_namelen] = '\0';
673 
674 	err = fdisp_wait_answ(&fdi);
675 	fdisp_destroy(&fdi);
676 
677 	if (err)
678 		return (err);
679 
680 	/*
681 	 * Access the cached nlink even if the attr cached has expired.  If
682 	 * it's inaccurate, the worst that will happen is:
683 	 * 1) We'll recycle the vnode even though the file has another link we
684 	 *    don't know about, costing a bit of cpu time, or
685 	 * 2) We won't recycle the vnode even though all of its links are gone.
686 	 *    It will linger around until vnlru reclaims it, costing a bit of
687 	 *    temporary memory.
688 	 */
689 	nlink = VTOFUD(vp)->cached_attrs.va_nlink--;
690 
691 	/*
692 	 * Purge the parent's attribute cache because the daemon
693 	 * should've updated its mtime and ctime.
694 	 */
695 	fuse_vnode_clear_attr_cache(dvp);
696 
697 	/* NB: nlink could be zero if it was never cached */
698 	if (nlink <= 1 || vnode_vtype(vp) == VDIR) {
699 		fuse_internal_vnode_disappear(vp);
700 	} else {
701 		cache_purge(vp);
702 		fuse_vnode_update(vp, FN_CTIMECHANGE);
703 	}
704 
705 	return err;
706 }
707 
708 /* rename */
709 
710 int
711 fuse_internal_rename(struct vnode *fdvp,
712     struct componentname *fcnp,
713     struct vnode *tdvp,
714     struct componentname *tcnp)
715 {
716 	struct fuse_dispatcher fdi;
717 	struct fuse_rename_in *fri;
718 	int err = 0;
719 
720 	fdisp_init(&fdi, sizeof(*fri) + fcnp->cn_namelen + tcnp->cn_namelen + 2);
721 	fdisp_make_vp(&fdi, FUSE_RENAME, fdvp, tcnp->cn_thread, tcnp->cn_cred);
722 
723 	fri = fdi.indata;
724 	fri->newdir = VTOI(tdvp);
725 	memcpy((char *)fdi.indata + sizeof(*fri), fcnp->cn_nameptr,
726 	    fcnp->cn_namelen);
727 	((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen] = '\0';
728 	memcpy((char *)fdi.indata + sizeof(*fri) + fcnp->cn_namelen + 1,
729 	    tcnp->cn_nameptr, tcnp->cn_namelen);
730 	((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen +
731 	    tcnp->cn_namelen + 1] = '\0';
732 
733 	err = fdisp_wait_answ(&fdi);
734 	fdisp_destroy(&fdi);
735 	return err;
736 }
737 
738 /* strategy */
739 
740 /* entity creation */
741 
742 void
743 fuse_internal_newentry_makerequest(struct mount *mp,
744     uint64_t dnid,
745     struct componentname *cnp,
746     enum fuse_opcode op,
747     void *buf,
748     size_t bufsize,
749     struct fuse_dispatcher *fdip)
750 {
751 	fdip->iosize = bufsize + cnp->cn_namelen + 1;
752 
753 	fdisp_make(fdip, op, mp, dnid, cnp->cn_thread, cnp->cn_cred);
754 	memcpy(fdip->indata, buf, bufsize);
755 	memcpy((char *)fdip->indata + bufsize, cnp->cn_nameptr, cnp->cn_namelen);
756 	((char *)fdip->indata)[bufsize + cnp->cn_namelen] = '\0';
757 }
758 
759 int
760 fuse_internal_newentry_core(struct vnode *dvp,
761     struct vnode **vpp,
762     struct componentname *cnp,
763     enum vtype vtyp,
764     struct fuse_dispatcher *fdip)
765 {
766 	int err = 0;
767 	struct fuse_entry_out *feo;
768 	struct mount *mp = vnode_mount(dvp);
769 
770 	if ((err = fdisp_wait_answ(fdip))) {
771 		return err;
772 	}
773 	feo = fdip->answ;
774 
775 	if ((err = fuse_internal_checkentry(feo, vtyp))) {
776 		return err;
777 	}
778 	err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vtyp);
779 	if (err) {
780 		fuse_internal_forget_send(mp, cnp->cn_thread, cnp->cn_cred,
781 		    feo->nodeid, 1);
782 		return err;
783 	}
784 
785 	/*
786 	 * Purge the parent's attribute cache because the daemon should've
787 	 * updated its mtime and ctime
788 	 */
789 	fuse_vnode_clear_attr_cache(dvp);
790 
791 	fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid,
792 		feo->attr_valid_nsec, NULL);
793 
794 	return err;
795 }
796 
797 int
798 fuse_internal_newentry(struct vnode *dvp,
799     struct vnode **vpp,
800     struct componentname *cnp,
801     enum fuse_opcode op,
802     void *buf,
803     size_t bufsize,
804     enum vtype vtype)
805 {
806 	int err;
807 	struct fuse_dispatcher fdi;
808 	struct mount *mp = vnode_mount(dvp);
809 
810 	fdisp_init(&fdi, 0);
811 	fuse_internal_newentry_makerequest(mp, VTOI(dvp), cnp, op, buf,
812 	    bufsize, &fdi);
813 	err = fuse_internal_newentry_core(dvp, vpp, cnp, vtype, &fdi);
814 	fdisp_destroy(&fdi);
815 
816 	return err;
817 }
818 
819 /* entity destruction */
820 
821 int
822 fuse_internal_forget_callback(struct fuse_ticket *ftick, struct uio *uio)
823 {
824 	fuse_internal_forget_send(ftick->tk_data->mp, curthread, NULL,
825 	    ((struct fuse_in_header *)ftick->tk_ms_fiov.base)->nodeid, 1);
826 
827 	return 0;
828 }
829 
830 void
831 fuse_internal_forget_send(struct mount *mp,
832     struct thread *td,
833     struct ucred *cred,
834     uint64_t nodeid,
835     uint64_t nlookup)
836 {
837 
838 	struct fuse_dispatcher fdi;
839 	struct fuse_forget_in *ffi;
840 
841 	/*
842          * KASSERT(nlookup > 0, ("zero-times forget for vp #%llu",
843          *         (long long unsigned) nodeid));
844          */
845 
846 	fdisp_init(&fdi, sizeof(*ffi));
847 	fdisp_make(&fdi, FUSE_FORGET, mp, nodeid, td, cred);
848 
849 	ffi = fdi.indata;
850 	ffi->nlookup = nlookup;
851 
852 	fuse_insert_message(fdi.tick, false);
853 	fdisp_destroy(&fdi);
854 }
855 
856 /* Fetch the vnode's attributes from the daemon*/
857 int
858 fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap,
859 	struct ucred *cred, struct thread *td)
860 {
861 	struct fuse_dispatcher fdi;
862 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
863 	struct fuse_getattr_in *fgai;
864 	struct fuse_attr_out *fao;
865 	off_t old_filesize = fvdat->cached_attrs.va_size;
866 	struct timespec old_ctime = fvdat->cached_attrs.va_ctime;
867 	struct timespec old_mtime = fvdat->cached_attrs.va_mtime;
868 	enum vtype vtyp;
869 	int err;
870 
871 	fdisp_init(&fdi, sizeof(*fgai));
872 	fdisp_make_vp(&fdi, FUSE_GETATTR, vp, td, cred);
873 	fgai = fdi.indata;
874 	/*
875 	 * We could look up a file handle and set it in fgai->fh, but that
876 	 * involves extra runtime work and I'm unaware of any file systems that
877 	 * care.
878 	 */
879 	fgai->getattr_flags = 0;
880 	if ((err = fdisp_wait_answ(&fdi))) {
881 		if (err == ENOENT)
882 			fuse_internal_vnode_disappear(vp);
883 		goto out;
884 	}
885 
886 	fao = (struct fuse_attr_out *)fdi.answ;
887 	vtyp = IFTOVT(fao->attr.mode);
888 	if (fvdat->flag & FN_SIZECHANGE)
889 		fao->attr.size = old_filesize;
890 	if (fvdat->flag & FN_CTIMECHANGE) {
891 		fao->attr.ctime = old_ctime.tv_sec;
892 		fao->attr.ctimensec = old_ctime.tv_nsec;
893 	}
894 	if (fvdat->flag & FN_MTIMECHANGE) {
895 		fao->attr.mtime = old_mtime.tv_sec;
896 		fao->attr.mtimensec = old_mtime.tv_nsec;
897 	}
898 	fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
899 		fao->attr_valid_nsec, vap);
900 	if (vtyp != vnode_vtype(vp)) {
901 		fuse_internal_vnode_disappear(vp);
902 		err = ENOENT;
903 	}
904 
905 out:
906 	fdisp_destroy(&fdi);
907 	return err;
908 }
909 
910 /* Read a vnode's attributes from cache or fetch them from the fuse daemon */
911 int
912 fuse_internal_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred,
913 	struct thread *td)
914 {
915 	struct vattr *attrs;
916 
917 	if ((attrs = VTOVA(vp)) != NULL) {
918 		*vap = *attrs;	/* struct copy */
919 		return 0;
920 	}
921 
922 	return fuse_internal_do_getattr(vp, vap, cred, td);
923 }
924 
925 void
926 fuse_internal_vnode_disappear(struct vnode *vp)
927 {
928 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
929 
930 	ASSERT_VOP_ELOCKED(vp, "fuse_internal_vnode_disappear");
931 	fvdat->flag |= FN_REVOKED;
932 	cache_purge(vp);
933 }
934 
935 /* fuse start/stop */
936 
937 int
938 fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio)
939 {
940 	int err = 0;
941 	struct fuse_data *data = tick->tk_data;
942 	struct fuse_init_out *fiio;
943 
944 	if ((err = tick->tk_aw_ohead.error)) {
945 		goto out;
946 	}
947 	if ((err = fticket_pull(tick, uio))) {
948 		goto out;
949 	}
950 	fiio = fticket_resp(tick)->base;
951 
952 	data->fuse_libabi_major = fiio->major;
953 	data->fuse_libabi_minor = fiio->minor;
954 	if (!fuse_libabi_geq(data, 7, 4)) {
955 		/*
956 		 * With a little work we could support servers as old as 7.1.
957 		 * But there would be little payoff.
958 		 */
959 		SDT_PROBE2(fusefs, , internal, trace, 1,
960 			"userpace version too low");
961 		err = EPROTONOSUPPORT;
962 		goto out;
963 	}
964 
965 	if (fuse_libabi_geq(data, 7, 5)) {
966 		if (fticket_resp(tick)->len == sizeof(struct fuse_init_out) ||
967 		    fticket_resp(tick)->len == FUSE_COMPAT_22_INIT_OUT_SIZE) {
968 			data->max_write = fiio->max_write;
969 			if (fiio->flags & FUSE_ASYNC_READ)
970 				data->dataflags |= FSESS_ASYNC_READ;
971 			if (fiio->flags & FUSE_POSIX_LOCKS)
972 				data->dataflags |= FSESS_POSIX_LOCKS;
973 			if (fiio->flags & FUSE_EXPORT_SUPPORT)
974 				data->dataflags |= FSESS_EXPORT_SUPPORT;
975 			/*
976 			 * Don't bother to check FUSE_BIG_WRITES, because it's
977 			 * redundant with max_write
978 			 */
979 			/*
980 			 * max_background and congestion_threshold are not
981 			 * implemented
982 			 */
983 		} else {
984 			err = EINVAL;
985 		}
986 	} else {
987 		/* Old fixed values */
988 		data->max_write = 4096;
989 	}
990 
991 	if (fuse_libabi_geq(data, 7, 6))
992 		data->max_readahead_blocks = fiio->max_readahead / maxbcachebuf;
993 
994 	if (!fuse_libabi_geq(data, 7, 7))
995 		fsess_set_notimpl(data->mp, FUSE_INTERRUPT);
996 
997 	if (!fuse_libabi_geq(data, 7, 8)) {
998 		fsess_set_notimpl(data->mp, FUSE_BMAP);
999 		fsess_set_notimpl(data->mp, FUSE_DESTROY);
1000 	}
1001 
1002 	if (fuse_libabi_geq(data, 7, 23) && fiio->time_gran >= 1 &&
1003 	    fiio->time_gran <= 1000000000)
1004 		data->time_gran = fiio->time_gran;
1005 	else
1006 		data->time_gran = 1;
1007 
1008 	if (!fuse_libabi_geq(data, 7, 23))
1009 		data->cache_mode = fuse_data_cache_mode;
1010 	else if (fiio->flags & FUSE_WRITEBACK_CACHE)
1011 		data->cache_mode = FUSE_CACHE_WB;
1012 	else
1013 		data->cache_mode = FUSE_CACHE_WT;
1014 
1015 out:
1016 	if (err) {
1017 		fdata_set_dead(data);
1018 	}
1019 	FUSE_LOCK();
1020 	data->dataflags |= FSESS_INITED;
1021 	wakeup(&data->ticketer);
1022 	FUSE_UNLOCK();
1023 
1024 	return 0;
1025 }
1026 
1027 void
1028 fuse_internal_send_init(struct fuse_data *data, struct thread *td)
1029 {
1030 	struct fuse_init_in *fiii;
1031 	struct fuse_dispatcher fdi;
1032 
1033 	fdisp_init(&fdi, sizeof(*fiii));
1034 	fdisp_make(&fdi, FUSE_INIT, data->mp, 0, td, NULL);
1035 	fiii = fdi.indata;
1036 	fiii->major = FUSE_KERNEL_VERSION;
1037 	fiii->minor = FUSE_KERNEL_MINOR_VERSION;
1038 	/*
1039 	 * fusefs currently reads ahead no more than one cache block at a time.
1040 	 * See fuse_read_biobackend
1041 	 */
1042 	fiii->max_readahead = maxbcachebuf;
1043 	/*
1044 	 * Unsupported features:
1045 	 * FUSE_FILE_OPS: No known FUSE server or client supports it
1046 	 * FUSE_ATOMIC_O_TRUNC: our VFS cannot support it
1047 	 * FUSE_DONT_MASK: unlike Linux, FreeBSD always applies the umask, even
1048 	 *	when default ACLs are in use.
1049 	 * FUSE_SPLICE_WRITE, FUSE_SPLICE_MOVE, FUSE_SPLICE_READ: FreeBSD
1050 	 *	doesn't have splice(2).
1051 	 * FUSE_FLOCK_LOCKS: not yet implemented
1052 	 * FUSE_HAS_IOCTL_DIR: not yet implemented
1053 	 * FUSE_AUTO_INVAL_DATA: not yet implemented
1054 	 * FUSE_DO_READDIRPLUS: not yet implemented
1055 	 * FUSE_READDIRPLUS_AUTO: not yet implemented
1056 	 * FUSE_ASYNC_DIO: not yet implemented
1057 	 * FUSE_NO_OPEN_SUPPORT: not yet implemented
1058 	 */
1059 	fiii->flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_EXPORT_SUPPORT
1060 		| FUSE_BIG_WRITES | FUSE_WRITEBACK_CACHE;
1061 
1062 	fuse_insert_callback(fdi.tick, fuse_internal_init_callback);
1063 	fuse_insert_message(fdi.tick, false);
1064 	fdisp_destroy(&fdi);
1065 }
1066 
1067 /*
1068  * Send a FUSE_SETATTR operation with no permissions checks.  If cred is NULL,
1069  * send the request with root credentials
1070  */
1071 int fuse_internal_setattr(struct vnode *vp, struct vattr *vap,
1072 	struct thread *td, struct ucred *cred)
1073 {
1074 	struct fuse_vnode_data *fvdat;
1075 	struct fuse_dispatcher fdi;
1076 	struct fuse_setattr_in *fsai;
1077 	struct mount *mp;
1078 	pid_t pid = td->td_proc->p_pid;
1079 	struct fuse_data *data;
1080 	int dataflags;
1081 	int err = 0;
1082 	enum vtype vtyp;
1083 	int sizechanged = -1;
1084 	uint64_t newsize = 0;
1085 
1086 	mp = vnode_mount(vp);
1087 	fvdat = VTOFUD(vp);
1088 	data = fuse_get_mpdata(mp);
1089 	dataflags = data->dataflags;
1090 
1091 	fdisp_init(&fdi, sizeof(*fsai));
1092 	fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred);
1093 	if (!cred) {
1094 		fdi.finh->uid = 0;
1095 		fdi.finh->gid = 0;
1096 	}
1097 	fsai = fdi.indata;
1098 	fsai->valid = 0;
1099 
1100 	if (vap->va_uid != (uid_t)VNOVAL) {
1101 		fsai->uid = vap->va_uid;
1102 		fsai->valid |= FATTR_UID;
1103 	}
1104 	if (vap->va_gid != (gid_t)VNOVAL) {
1105 		fsai->gid = vap->va_gid;
1106 		fsai->valid |= FATTR_GID;
1107 	}
1108 	if (vap->va_size != VNOVAL) {
1109 		struct fuse_filehandle *fufh = NULL;
1110 
1111 		/*Truncate to a new value. */
1112 		fsai->size = vap->va_size;
1113 		sizechanged = 1;
1114 		newsize = vap->va_size;
1115 		fsai->valid |= FATTR_SIZE;
1116 
1117 		fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid);
1118 		if (fufh) {
1119 			fsai->fh = fufh->fh_id;
1120 			fsai->valid |= FATTR_FH;
1121 		}
1122 		VTOFUD(vp)->flag &= ~FN_SIZECHANGE;
1123 	}
1124 	if (vap->va_atime.tv_sec != VNOVAL) {
1125 		fsai->atime = vap->va_atime.tv_sec;
1126 		fsai->atimensec = vap->va_atime.tv_nsec;
1127 		fsai->valid |= FATTR_ATIME;
1128 		if (vap->va_vaflags & VA_UTIMES_NULL)
1129 			fsai->valid |= FATTR_ATIME_NOW;
1130 	}
1131 	if (vap->va_mtime.tv_sec != VNOVAL) {
1132 		fsai->mtime = vap->va_mtime.tv_sec;
1133 		fsai->mtimensec = vap->va_mtime.tv_nsec;
1134 		fsai->valid |= FATTR_MTIME;
1135 		if (vap->va_vaflags & VA_UTIMES_NULL)
1136 			fsai->valid |= FATTR_MTIME_NOW;
1137 	} else if (fvdat->flag & FN_MTIMECHANGE) {
1138 		fsai->mtime = fvdat->cached_attrs.va_mtime.tv_sec;
1139 		fsai->mtimensec = fvdat->cached_attrs.va_mtime.tv_nsec;
1140 		fsai->valid |= FATTR_MTIME;
1141 	}
1142 	if (fuse_libabi_geq(data, 7, 23) && fvdat->flag & FN_CTIMECHANGE) {
1143 		fsai->ctime = fvdat->cached_attrs.va_ctime.tv_sec;
1144 		fsai->ctimensec = fvdat->cached_attrs.va_ctime.tv_nsec;
1145 		fsai->valid |= FATTR_CTIME;
1146 	}
1147 	if (vap->va_mode != (mode_t)VNOVAL) {
1148 		fsai->mode = vap->va_mode & ALLPERMS;
1149 		fsai->valid |= FATTR_MODE;
1150 	}
1151 	if (!fsai->valid) {
1152 		goto out;
1153 	}
1154 
1155 	if ((err = fdisp_wait_answ(&fdi)))
1156 		goto out;
1157 	vtyp = IFTOVT(((struct fuse_attr_out *)fdi.answ)->attr.mode);
1158 
1159 	if (vnode_vtype(vp) != vtyp) {
1160 		if (vnode_vtype(vp) == VNON && vtyp != VNON) {
1161 			SDT_PROBE2(fusefs, , internal, trace, 1, "FUSE: Dang! "
1162 				"vnode_vtype is VNON and vtype isn't.");
1163 		} else {
1164 			/*
1165 	                 * STALE vnode, ditch
1166 	                 *
1167 			 * The vnode has changed its type "behind our back".
1168 			 * There's nothing really we can do, so let us just
1169 			 * force an internal revocation and tell the caller to
1170 			 * try again, if interested.
1171 	                 */
1172 			fuse_internal_vnode_disappear(vp);
1173 			err = EAGAIN;
1174 		}
1175 	}
1176 	if (err == 0) {
1177 		struct fuse_attr_out *fao = (struct fuse_attr_out*)fdi.answ;
1178 		fuse_vnode_undirty_cached_timestamps(vp);
1179 		fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
1180 			fao->attr_valid_nsec, NULL);
1181 	}
1182 
1183 out:
1184 	fdisp_destroy(&fdi);
1185 	return err;
1186 }
1187 
1188 #ifdef ZERO_PAD_INCOMPLETE_BUFS
1189 static int
1190 isbzero(void *buf, size_t len)
1191 {
1192 	int i;
1193 
1194 	for (i = 0; i < len; i++) {
1195 		if (((char *)buf)[i])
1196 			return (0);
1197 	}
1198 
1199 	return (1);
1200 }
1201 
1202 #endif
1203 
1204 void
1205 fuse_internal_init(void)
1206 {
1207 	fuse_lookup_cache_misses = counter_u64_alloc(M_WAITOK);
1208 	fuse_lookup_cache_hits = counter_u64_alloc(M_WAITOK);
1209 }
1210 
1211 void
1212 fuse_internal_destroy(void)
1213 {
1214 	counter_u64_free(fuse_lookup_cache_hits);
1215 	counter_u64_free(fuse_lookup_cache_misses);
1216 }
1217