xref: /freebsd/sys/fs/fuse/fuse_internal.c (revision 87ff949a)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2007-2009 Google Inc. and Amit Singh
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  * * Redistributions of source code must retain the above copyright
12  *   notice, this list of conditions and the following disclaimer.
13  * * Redistributions in binary form must reproduce the above
14  *   copyright notice, this list of conditions and the following disclaimer
15  *   in the documentation and/or other materials provided with the
16  *   distribution.
17  * * Neither the name of Google Inc. nor the names of its
18  *   contributors may be used to endorse or promote products derived from
19  *   this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Copyright (C) 2005 Csaba Henk.
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  *
45  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55  * SUCH DAMAGE.
56  */
57 
58 #include <sys/cdefs.h>
59 __FBSDID("$FreeBSD$");
60 
61 #include <sys/param.h>
62 #include <sys/module.h>
63 #include <sys/systm.h>
64 #include <sys/errno.h>
65 #include <sys/kernel.h>
66 #include <sys/conf.h>
67 #include <sys/uio.h>
68 #include <sys/malloc.h>
69 #include <sys/queue.h>
70 #include <sys/lock.h>
71 #include <sys/mutex.h>
72 #include <sys/sdt.h>
73 #include <sys/sx.h>
74 #include <sys/proc.h>
75 #include <sys/mount.h>
76 #include <sys/vnode.h>
77 #include <sys/namei.h>
78 #include <sys/stat.h>
79 #include <sys/unistd.h>
80 #include <sys/filedesc.h>
81 #include <sys/file.h>
82 #include <sys/fcntl.h>
83 #include <sys/dirent.h>
84 #include <sys/bio.h>
85 #include <sys/buf.h>
86 #include <sys/sysctl.h>
87 #include <sys/priv.h>
88 
89 #include "fuse.h"
90 #include "fuse_file.h"
91 #include "fuse_internal.h"
92 #include "fuse_io.h"
93 #include "fuse_ipc.h"
94 #include "fuse_node.h"
95 #include "fuse_file.h"
96 
97 SDT_PROVIDER_DECLARE(fusefs);
98 /*
99  * Fuse trace probe:
100  * arg0: verbosity.  Higher numbers give more verbose messages
101  * arg1: Textual message
102  */
103 SDT_PROBE_DEFINE2(fusefs, , internal, trace, "int", "char*");
104 
105 #ifdef ZERO_PAD_INCOMPLETE_BUFS
106 static int isbzero(void *buf, size_t len);
107 
108 #endif
109 
110 int
111 fuse_internal_get_cached_vnode(struct mount* mp, ino_t ino, int flags,
112 	struct vnode **vpp)
113 {
114 	struct bintime now;
115 	struct thread *td = curthread;
116 	uint64_t nodeid = ino;
117 	int error;
118 
119 	*vpp = NULL;
120 
121 	error = vfs_hash_get(mp, fuse_vnode_hash(nodeid), flags, td, vpp,
122 	    fuse_vnode_cmp, &nodeid);
123 	if (error)
124 		return error;
125 	/*
126 	 * Check the entry cache timeout.  We have to do this within fusefs
127 	 * instead of by using cache_enter_time/cache_lookup because those
128 	 * routines are only intended to work with pathnames, not inodes
129 	 */
130 	if (*vpp != NULL) {
131 		getbinuptime(&now);
132 		if (bintime_cmp(&(VTOFUD(*vpp)->entry_cache_timeout), &now, >)){
133 			atomic_add_acq_long(&fuse_lookup_cache_hits, 1);
134 			return 0;
135 		} else {
136 			/* Entry cache timeout */
137 			atomic_add_acq_long(&fuse_lookup_cache_misses, 1);
138 			cache_purge(*vpp);
139 			vput(*vpp);
140 			*vpp = NULL;
141 		}
142 	}
143 	return 0;
144 }
145 
146 /* Synchronously send a FUSE_ACCESS operation */
147 int
148 fuse_internal_access(struct vnode *vp,
149     accmode_t mode,
150     struct thread *td,
151     struct ucred *cred)
152 {
153 	int err = 0;
154 	uint32_t mask = F_OK;
155 	int dataflags;
156 	int vtype;
157 	struct mount *mp;
158 	struct fuse_dispatcher fdi;
159 	struct fuse_access_in *fai;
160 	struct fuse_data *data;
161 
162 	mp = vnode_mount(vp);
163 	vtype = vnode_vtype(vp);
164 
165 	data = fuse_get_mpdata(mp);
166 	dataflags = data->dataflags;
167 
168 	if (mode == 0)
169 		return 0;
170 
171 	if (mode & VMODIFY_PERMS && vfs_isrdonly(mp)) {
172 		switch (vp->v_type) {
173 		case VDIR:
174 			/* FALLTHROUGH */
175 		case VLNK:
176 			/* FALLTHROUGH */
177 		case VREG:
178 			return EROFS;
179 		default:
180 			break;
181 		}
182 	}
183 
184 	/* Unless explicitly permitted, deny everyone except the fs owner. */
185 	if (!(dataflags & FSESS_DAEMON_CAN_SPY)) {
186 		if (fuse_match_cred(data->daemoncred, cred))
187 			return EPERM;
188 	}
189 
190 	if (dataflags & FSESS_DEFAULT_PERMISSIONS) {
191 		struct vattr va;
192 
193 		fuse_internal_getattr(vp, &va, cred, td);
194 		return vaccess(vp->v_type, va.va_mode, va.va_uid,
195 		    va.va_gid, mode, cred, NULL);
196 	}
197 
198 	if (!fsess_isimpl(mp, FUSE_ACCESS))
199 		return 0;
200 
201 	if ((mode & (VWRITE | VAPPEND | VADMIN)) != 0)
202 		mask |= W_OK;
203 	if ((mode & VREAD) != 0)
204 		mask |= R_OK;
205 	if ((mode & VEXEC) != 0)
206 		mask |= X_OK;
207 
208 	fdisp_init(&fdi, sizeof(*fai));
209 	fdisp_make_vp(&fdi, FUSE_ACCESS, vp, td, cred);
210 
211 	fai = fdi.indata;
212 	fai->mask = mask;
213 
214 	err = fdisp_wait_answ(&fdi);
215 	fdisp_destroy(&fdi);
216 
217 	if (err == ENOSYS) {
218 		fsess_set_notimpl(mp, FUSE_ACCESS);
219 		err = 0;
220 	}
221 	return err;
222 }
223 
224 /*
225  * Cache FUSE attributes from attr, in attribute cache associated with vnode
226  * 'vp'.  Optionally, if argument 'vap' is not NULL, store a copy of the
227  * converted attributes there as well.
228  *
229  * If the nominal attribute cache TTL is zero, do not cache on the 'vp' (but do
230  * return the result to the caller).
231  */
232 void
233 fuse_internal_cache_attrs(struct vnode *vp, struct fuse_attr *attr,
234 	uint64_t attr_valid, uint32_t attr_valid_nsec, struct vattr *vap)
235 {
236 	struct mount *mp;
237 	struct fuse_vnode_data *fvdat;
238 	struct fuse_data *data;
239 	struct vattr *vp_cache_at;
240 
241 	mp = vnode_mount(vp);
242 	fvdat = VTOFUD(vp);
243 	data = fuse_get_mpdata(mp);
244 
245 	ASSERT_VOP_ELOCKED(vp, "fuse_internal_cache_attrs");
246 
247 	fuse_validity_2_bintime(attr_valid, attr_valid_nsec,
248 		&fvdat->attr_cache_timeout);
249 
250 	/* Fix our buffers if the filesize changed without us knowing */
251 	if (vnode_isreg(vp) && attr->size != fvdat->cached_attrs.va_size) {
252 		(void)fuse_vnode_setsize(vp, attr->size);
253 		fvdat->cached_attrs.va_size = attr->size;
254 	}
255 
256 	if (attr_valid > 0 || attr_valid_nsec > 0)
257 		vp_cache_at = &(fvdat->cached_attrs);
258 	else if (vap != NULL)
259 		vp_cache_at = vap;
260 	else
261 		return;
262 
263 	vattr_null(vp_cache_at);
264 	vp_cache_at->va_fsid = mp->mnt_stat.f_fsid.val[0];
265 	vp_cache_at->va_fileid = attr->ino;
266 	vp_cache_at->va_mode = attr->mode & ~S_IFMT;
267 	vp_cache_at->va_nlink     = attr->nlink;
268 	vp_cache_at->va_uid       = attr->uid;
269 	vp_cache_at->va_gid       = attr->gid;
270 	vp_cache_at->va_rdev      = attr->rdev;
271 	vp_cache_at->va_size      = attr->size;
272 	/* XXX on i386, seconds are truncated to 32 bits */
273 	vp_cache_at->va_atime.tv_sec  = attr->atime;
274 	vp_cache_at->va_atime.tv_nsec = attr->atimensec;
275 	vp_cache_at->va_mtime.tv_sec  = attr->mtime;
276 	vp_cache_at->va_mtime.tv_nsec = attr->mtimensec;
277 	vp_cache_at->va_ctime.tv_sec  = attr->ctime;
278 	vp_cache_at->va_ctime.tv_nsec = attr->ctimensec;
279 	if (fuse_libabi_geq(data, 7, 9) && attr->blksize > 0)
280 		vp_cache_at->va_blocksize = attr->blksize;
281 	else
282 		vp_cache_at->va_blocksize = PAGE_SIZE;
283 	vp_cache_at->va_type = IFTOVT(attr->mode);
284 	vp_cache_at->va_bytes = attr->blocks * S_BLKSIZE;
285 	vp_cache_at->va_flags = 0;
286 
287 	if (vap != vp_cache_at && vap != NULL)
288 		memcpy(vap, vp_cache_at, sizeof(*vap));
289 }
290 
291 
292 /* fsync */
293 
294 int
295 fuse_internal_fsync_callback(struct fuse_ticket *tick, struct uio *uio)
296 {
297 	if (tick->tk_aw_ohead.error == ENOSYS) {
298 		fsess_set_notimpl(tick->tk_data->mp, fticket_opcode(tick));
299 	}
300 	return 0;
301 }
302 
303 int
304 fuse_internal_fsync(struct vnode *vp,
305     struct thread *td,
306     int waitfor,
307     bool datasync)
308 {
309 	struct fuse_fsync_in *ffsi = NULL;
310 	struct fuse_dispatcher fdi;
311 	struct fuse_filehandle *fufh;
312 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
313 	struct mount *mp = vnode_mount(vp);
314 	int op = FUSE_FSYNC;
315 	int err = 0;
316 
317 	if (!fsess_isimpl(vnode_mount(vp),
318 	    (vnode_vtype(vp) == VDIR ? FUSE_FSYNCDIR : FUSE_FSYNC))) {
319 		return 0;
320 	}
321 	if (vnode_isdir(vp))
322 		op = FUSE_FSYNCDIR;
323 
324 	if (!fsess_isimpl(mp, op))
325 		return 0;
326 
327 	fdisp_init(&fdi, sizeof(*ffsi));
328 	/*
329 	 * fsync every open file handle for this file, because we can't be sure
330 	 * which file handle the caller is really referring to.
331 	 */
332 	LIST_FOREACH(fufh, &fvdat->handles, next) {
333 		if (ffsi == NULL)
334 			fdisp_make_vp(&fdi, op, vp, td, NULL);
335 		else
336 			fdisp_refresh_vp(&fdi, op, vp, td, NULL);
337 		ffsi = fdi.indata;
338 		ffsi->fh = fufh->fh_id;
339 		ffsi->fsync_flags = 0;
340 
341 		if (datasync)
342 			ffsi->fsync_flags = 1;
343 
344 		if (waitfor == MNT_WAIT) {
345 			err = fdisp_wait_answ(&fdi);
346 		} else {
347 			fuse_insert_callback(fdi.tick,
348 				fuse_internal_fsync_callback);
349 			fuse_insert_message(fdi.tick, false);
350 		}
351 		if (err == ENOSYS) {
352 			/* ENOSYS means "success, and don't call again" */
353 			fsess_set_notimpl(mp, op);
354 			err = 0;
355 			break;
356 		}
357 	}
358 	fdisp_destroy(&fdi);
359 
360 	return err;
361 }
362 
363 /* Asynchronous invalidation */
364 SDT_PROBE_DEFINE2(fusefs, , internal, invalidate_cache_hit,
365 	"struct vnode*", "struct vnode*");
366 int
367 fuse_internal_invalidate_entry(struct mount *mp, struct uio *uio)
368 {
369 	struct fuse_notify_inval_entry_out fnieo;
370 	struct componentname cn;
371 	struct vnode *dvp, *vp;
372 	char name[PATH_MAX];
373 	int err;
374 
375 	if ((err = uiomove(&fnieo, sizeof(fnieo), uio)) != 0)
376 		return (err);
377 
378 	if ((err = uiomove(name, fnieo.namelen, uio)) != 0)
379 		return (err);
380 	name[fnieo.namelen] = '\0';
381 	/* fusefs does not cache "." or ".." entries */
382 	if (strncmp(name, ".", sizeof(".")) == 0 ||
383 	    strncmp(name, "..", sizeof("..")) == 0)
384 		return (0);
385 
386 	if (fnieo.parent == FUSE_ROOT_ID)
387 		err = VFS_ROOT(mp, LK_SHARED, &dvp);
388 	else
389 		err = fuse_internal_get_cached_vnode( mp, fnieo.parent,
390 			LK_SHARED, &dvp);
391 	/*
392 	 * If dvp is not in the cache, then it must've been reclaimed.  And
393 	 * since fuse_vnop_reclaim does a cache_purge, name's entry must've
394 	 * been invalidated already.  So we can safely return if dvp == NULL
395 	 */
396 	if (err != 0 || dvp == NULL)
397 		return (err);
398 	/*
399 	 * XXX we can't check dvp's generation because the FUSE invalidate
400 	 * entry message doesn't include it.  Worse case is that we invalidate
401 	 * an entry that didn't need to be invalidated.
402 	 */
403 
404 	cn.cn_nameiop = LOOKUP;
405 	cn.cn_flags = 0;	/* !MAKEENTRY means free cached entry */
406 	cn.cn_thread = curthread;
407 	cn.cn_cred = curthread->td_ucred;
408 	cn.cn_lkflags = LK_SHARED;
409 	cn.cn_pnbuf = NULL;
410 	cn.cn_nameptr = name;
411 	cn.cn_namelen = fnieo.namelen;
412 	err = cache_lookup(dvp, &vp, &cn, NULL, NULL);
413 	MPASS(err == 0);
414 	fuse_vnode_clear_attr_cache(dvp);
415 	vput(dvp);
416 	return (0);
417 }
418 
419 int
420 fuse_internal_invalidate_inode(struct mount *mp, struct uio *uio)
421 {
422 	struct fuse_notify_inval_inode_out fniio;
423 	struct vnode *vp;
424 	int err;
425 
426 	if ((err = uiomove(&fniio, sizeof(fniio), uio)) != 0)
427 		return (err);
428 
429 	if (fniio.ino == FUSE_ROOT_ID)
430 		err = VFS_ROOT(mp, LK_EXCLUSIVE, &vp);
431 	else
432 		err = fuse_internal_get_cached_vnode(mp, fniio.ino, LK_SHARED,
433 			&vp);
434 	if (err != 0 || vp == NULL)
435 		return (err);
436 	/*
437 	 * XXX we can't check vp's generation because the FUSE invalidate
438 	 * entry message doesn't include it.  Worse case is that we invalidate
439 	 * an inode that didn't need to be invalidated.
440 	 */
441 
442 	/*
443 	 * Flush and invalidate buffers if off >= 0.  Technically we only need
444 	 * to flush and invalidate the range of offsets [off, off + len), but
445 	 * for simplicity's sake we do everything.
446 	 */
447 	if (fniio.off >= 0)
448 		fuse_io_invalbuf(vp, curthread);
449 	fuse_vnode_clear_attr_cache(vp);
450 	vput(vp);
451 	return (0);
452 }
453 
454 /* mknod */
455 int
456 fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp,
457 	struct componentname *cnp, struct vattr *vap)
458 {
459 	struct fuse_data *data;
460 	struct fuse_mknod_in fmni;
461 	size_t insize;
462 
463 	data = fuse_get_mpdata(dvp->v_mount);
464 
465 	fmni.mode = MAKEIMODE(vap->va_type, vap->va_mode);
466 	fmni.rdev = vap->va_rdev;
467 	if (fuse_libabi_geq(data, 7, 12)) {
468 		insize = sizeof(fmni);
469 		fmni.umask = curthread->td_proc->p_fd->fd_cmask;
470 	} else {
471 		insize = FUSE_COMPAT_MKNOD_IN_SIZE;
472 	}
473 	return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKNOD, &fmni,
474 	    insize, vap->va_type));
475 }
476 
477 /* readdir */
478 
479 int
480 fuse_internal_readdir(struct vnode *vp,
481     struct uio *uio,
482     off_t startoff,
483     struct fuse_filehandle *fufh,
484     struct fuse_iov *cookediov,
485     int *ncookies,
486     u_long *cookies)
487 {
488 	int err = 0;
489 	struct fuse_dispatcher fdi;
490 	struct fuse_read_in *fri = NULL;
491 	int fnd_start;
492 
493 	if (uio_resid(uio) == 0)
494 		return 0;
495 	fdisp_init(&fdi, 0);
496 
497 	/*
498 	 * Note that we DO NOT have a UIO_SYSSPACE here (so no need for p2p
499 	 * I/O).
500 	 */
501 
502 	/*
503 	 * fnd_start is set non-zero once the offset in the directory gets
504 	 * to the startoff.  This is done because directories must be read
505 	 * from the beginning (offset == 0) when fuse_vnop_readdir() needs
506 	 * to do an open of the directory.
507 	 * If it is not set non-zero here, it will be set non-zero in
508 	 * fuse_internal_readdir_processdata() when uio_offset == startoff.
509 	 */
510 	fnd_start = 0;
511 	if (uio->uio_offset == startoff)
512 		fnd_start = 1;
513 	while (uio_resid(uio) > 0) {
514 		fdi.iosize = sizeof(*fri);
515 		if (fri == NULL)
516 			fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
517 		else
518 			fdisp_refresh_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
519 
520 		fri = fdi.indata;
521 		fri->fh = fufh->fh_id;
522 		fri->offset = uio_offset(uio);
523 		fri->size = MIN(uio->uio_resid,
524 		    fuse_get_mpdata(vp->v_mount)->max_read);
525 
526 		if ((err = fdisp_wait_answ(&fdi)))
527 			break;
528 		if ((err = fuse_internal_readdir_processdata(uio, startoff,
529 		    &fnd_start, fri->size, fdi.answ, fdi.iosize, cookediov,
530 		    ncookies, &cookies)))
531 			break;
532 	}
533 
534 	fdisp_destroy(&fdi);
535 	return ((err == -1) ? 0 : err);
536 }
537 
538 /*
539  * Return -1 to indicate that this readdir is finished, 0 if it copied
540  * all the directory data read in and it may be possible to read more
541  * and greater than 0 for a failure.
542  */
543 int
544 fuse_internal_readdir_processdata(struct uio *uio,
545     off_t startoff,
546     int *fnd_start,
547     size_t reqsize,
548     void *buf,
549     size_t bufsize,
550     struct fuse_iov *cookediov,
551     int *ncookies,
552     u_long **cookiesp)
553 {
554 	int err = 0;
555 	int bytesavail;
556 	size_t freclen;
557 
558 	struct dirent *de;
559 	struct fuse_dirent *fudge;
560 	u_long *cookies;
561 
562 	cookies = *cookiesp;
563 	if (bufsize < FUSE_NAME_OFFSET)
564 		return -1;
565 	for (;;) {
566 		if (bufsize < FUSE_NAME_OFFSET) {
567 			err = -1;
568 			break;
569 		}
570 		fudge = (struct fuse_dirent *)buf;
571 		freclen = FUSE_DIRENT_SIZE(fudge);
572 
573 		if (bufsize < freclen) {
574 			/*
575 			 * This indicates a partial directory entry at the
576 			 * end of the directory data.
577 			 */
578 			err = -1;
579 			break;
580 		}
581 #ifdef ZERO_PAD_INCOMPLETE_BUFS
582 		if (isbzero(buf, FUSE_NAME_OFFSET)) {
583 			err = -1;
584 			break;
585 		}
586 #endif
587 
588 		if (!fudge->namelen || fudge->namelen > MAXNAMLEN) {
589 			err = EINVAL;
590 			break;
591 		}
592 		bytesavail = GENERIC_DIRSIZ((struct pseudo_dirent *)
593 					    &fudge->namelen);
594 
595 		if (bytesavail > uio_resid(uio)) {
596 			/* Out of space for the dir so we are done. */
597 			err = -1;
598 			break;
599 		}
600 		/*
601 		 * Don't start to copy the directory entries out until
602 		 * the requested offset in the directory is found.
603 		 */
604 		if (*fnd_start != 0) {
605 			fiov_adjust(cookediov, bytesavail);
606 			bzero(cookediov->base, bytesavail);
607 
608 			de = (struct dirent *)cookediov->base;
609 			de->d_fileno = fudge->ino;
610 			de->d_reclen = bytesavail;
611 			de->d_type = fudge->type;
612 			de->d_namlen = fudge->namelen;
613 			memcpy((char *)cookediov->base + sizeof(struct dirent) -
614 			       MAXNAMLEN - 1,
615 			       (char *)buf + FUSE_NAME_OFFSET, fudge->namelen);
616 			dirent_terminate(de);
617 
618 			err = uiomove(cookediov->base, cookediov->len, uio);
619 			if (err)
620 				break;
621 			if (cookies != NULL) {
622 				if (*ncookies == 0) {
623 					err = -1;
624 					break;
625 				}
626 				*cookies = fudge->off;
627 				cookies++;
628 				(*ncookies)--;
629 			}
630 		} else if (startoff == fudge->off)
631 			*fnd_start = 1;
632 		buf = (char *)buf + freclen;
633 		bufsize -= freclen;
634 		uio_setoffset(uio, fudge->off);
635 	}
636 	*cookiesp = cookies;
637 
638 	return err;
639 }
640 
641 /* remove */
642 
643 int
644 fuse_internal_remove(struct vnode *dvp,
645     struct vnode *vp,
646     struct componentname *cnp,
647     enum fuse_opcode op)
648 {
649 	struct fuse_dispatcher fdi;
650 	int err = 0;
651 
652 	fdisp_init(&fdi, cnp->cn_namelen + 1);
653 	fdisp_make_vp(&fdi, op, dvp, cnp->cn_thread, cnp->cn_cred);
654 
655 	memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
656 	((char *)fdi.indata)[cnp->cn_namelen] = '\0';
657 
658 	err = fdisp_wait_answ(&fdi);
659 	fdisp_destroy(&fdi);
660 	return err;
661 }
662 
663 /* rename */
664 
665 int
666 fuse_internal_rename(struct vnode *fdvp,
667     struct componentname *fcnp,
668     struct vnode *tdvp,
669     struct componentname *tcnp)
670 {
671 	struct fuse_dispatcher fdi;
672 	struct fuse_rename_in *fri;
673 	int err = 0;
674 
675 	fdisp_init(&fdi, sizeof(*fri) + fcnp->cn_namelen + tcnp->cn_namelen + 2);
676 	fdisp_make_vp(&fdi, FUSE_RENAME, fdvp, tcnp->cn_thread, tcnp->cn_cred);
677 
678 	fri = fdi.indata;
679 	fri->newdir = VTOI(tdvp);
680 	memcpy((char *)fdi.indata + sizeof(*fri), fcnp->cn_nameptr,
681 	    fcnp->cn_namelen);
682 	((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen] = '\0';
683 	memcpy((char *)fdi.indata + sizeof(*fri) + fcnp->cn_namelen + 1,
684 	    tcnp->cn_nameptr, tcnp->cn_namelen);
685 	((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen +
686 	    tcnp->cn_namelen + 1] = '\0';
687 
688 	err = fdisp_wait_answ(&fdi);
689 	fdisp_destroy(&fdi);
690 	return err;
691 }
692 
693 /* strategy */
694 
695 /* entity creation */
696 
697 void
698 fuse_internal_newentry_makerequest(struct mount *mp,
699     uint64_t dnid,
700     struct componentname *cnp,
701     enum fuse_opcode op,
702     void *buf,
703     size_t bufsize,
704     struct fuse_dispatcher *fdip)
705 {
706 	fdip->iosize = bufsize + cnp->cn_namelen + 1;
707 
708 	fdisp_make(fdip, op, mp, dnid, cnp->cn_thread, cnp->cn_cred);
709 	memcpy(fdip->indata, buf, bufsize);
710 	memcpy((char *)fdip->indata + bufsize, cnp->cn_nameptr, cnp->cn_namelen);
711 	((char *)fdip->indata)[bufsize + cnp->cn_namelen] = '\0';
712 }
713 
714 int
715 fuse_internal_newentry_core(struct vnode *dvp,
716     struct vnode **vpp,
717     struct componentname *cnp,
718     enum vtype vtyp,
719     struct fuse_dispatcher *fdip)
720 {
721 	int err = 0;
722 	struct fuse_entry_out *feo;
723 	struct mount *mp = vnode_mount(dvp);
724 
725 	if ((err = fdisp_wait_answ(fdip))) {
726 		return err;
727 	}
728 	feo = fdip->answ;
729 
730 	if ((err = fuse_internal_checkentry(feo, vtyp))) {
731 		return err;
732 	}
733 	err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vtyp);
734 	if (err) {
735 		fuse_internal_forget_send(mp, cnp->cn_thread, cnp->cn_cred,
736 		    feo->nodeid, 1);
737 		return err;
738 	}
739 
740 	/*
741 	 * Purge the parent's attribute cache because the daemon should've
742 	 * updated its mtime and ctime
743 	 */
744 	fuse_vnode_clear_attr_cache(dvp);
745 
746 	fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid,
747 		feo->attr_valid_nsec, NULL);
748 
749 	return err;
750 }
751 
752 int
753 fuse_internal_newentry(struct vnode *dvp,
754     struct vnode **vpp,
755     struct componentname *cnp,
756     enum fuse_opcode op,
757     void *buf,
758     size_t bufsize,
759     enum vtype vtype)
760 {
761 	int err;
762 	struct fuse_dispatcher fdi;
763 	struct mount *mp = vnode_mount(dvp);
764 
765 	fdisp_init(&fdi, 0);
766 	fuse_internal_newentry_makerequest(mp, VTOI(dvp), cnp, op, buf,
767 	    bufsize, &fdi);
768 	err = fuse_internal_newentry_core(dvp, vpp, cnp, vtype, &fdi);
769 	fdisp_destroy(&fdi);
770 
771 	return err;
772 }
773 
774 /* entity destruction */
775 
776 int
777 fuse_internal_forget_callback(struct fuse_ticket *ftick, struct uio *uio)
778 {
779 	fuse_internal_forget_send(ftick->tk_data->mp, curthread, NULL,
780 	    ((struct fuse_in_header *)ftick->tk_ms_fiov.base)->nodeid, 1);
781 
782 	return 0;
783 }
784 
785 void
786 fuse_internal_forget_send(struct mount *mp,
787     struct thread *td,
788     struct ucred *cred,
789     uint64_t nodeid,
790     uint64_t nlookup)
791 {
792 
793 	struct fuse_dispatcher fdi;
794 	struct fuse_forget_in *ffi;
795 
796 	/*
797          * KASSERT(nlookup > 0, ("zero-times forget for vp #%llu",
798          *         (long long unsigned) nodeid));
799          */
800 
801 	fdisp_init(&fdi, sizeof(*ffi));
802 	fdisp_make(&fdi, FUSE_FORGET, mp, nodeid, td, cred);
803 
804 	ffi = fdi.indata;
805 	ffi->nlookup = nlookup;
806 
807 	fuse_insert_message(fdi.tick, false);
808 	fdisp_destroy(&fdi);
809 }
810 
811 /* Fetch the vnode's attributes from the daemon*/
812 int
813 fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap,
814 	struct ucred *cred, struct thread *td)
815 {
816 	struct fuse_dispatcher fdi;
817 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
818 	struct fuse_getattr_in *fgai;
819 	struct fuse_attr_out *fao;
820 	off_t old_filesize = fvdat->cached_attrs.va_size;
821 	enum vtype vtyp;
822 	int err;
823 
824 	fdisp_init(&fdi, 0);
825 	fdisp_make_vp(&fdi, FUSE_GETATTR, vp, td, cred);
826 	fgai = fdi.indata;
827 	/*
828 	 * We could look up a file handle and set it in fgai->fh, but that
829 	 * involves extra runtime work and I'm unaware of any file systems that
830 	 * care.
831 	 */
832 	fgai->getattr_flags = 0;
833 	if ((err = fdisp_simple_putget_vp(&fdi, FUSE_GETATTR, vp, td, cred))) {
834 		if (err == ENOENT)
835 			fuse_internal_vnode_disappear(vp);
836 		goto out;
837 	}
838 
839 	fao = (struct fuse_attr_out *)fdi.answ;
840 	vtyp = IFTOVT(fao->attr.mode);
841 	if (fvdat->flag & FN_SIZECHANGE)
842 		fao->attr.size = old_filesize;
843 	fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
844 		fao->attr_valid_nsec, vap);
845 	if (vtyp != vnode_vtype(vp)) {
846 		fuse_internal_vnode_disappear(vp);
847 		err = ENOENT;
848 	}
849 
850 out:
851 	fdisp_destroy(&fdi);
852 	return err;
853 }
854 
855 /* Read a vnode's attributes from cache or fetch them from the fuse daemon */
856 int
857 fuse_internal_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred,
858 	struct thread *td)
859 {
860 	struct vattr *attrs;
861 
862 	if ((attrs = VTOVA(vp)) != NULL) {
863 		*vap = *attrs;	/* struct copy */
864 		return 0;
865 	}
866 
867 	return fuse_internal_do_getattr(vp, vap, cred, td);
868 }
869 
870 void
871 fuse_internal_vnode_disappear(struct vnode *vp)
872 {
873 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
874 
875 	ASSERT_VOP_ELOCKED(vp, "fuse_internal_vnode_disappear");
876 	fvdat->flag |= FN_REVOKED;
877 	bintime_clear(&fvdat->attr_cache_timeout);
878 	bintime_clear(&fvdat->entry_cache_timeout);
879 	cache_purge(vp);
880 }
881 
882 /* fuse start/stop */
883 
884 int
885 fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio)
886 {
887 	int err = 0;
888 	struct fuse_data *data = tick->tk_data;
889 	struct fuse_init_out *fiio;
890 
891 	if ((err = tick->tk_aw_ohead.error)) {
892 		goto out;
893 	}
894 	if ((err = fticket_pull(tick, uio))) {
895 		goto out;
896 	}
897 	fiio = fticket_resp(tick)->base;
898 
899 	data->fuse_libabi_major = fiio->major;
900 	data->fuse_libabi_minor = fiio->minor;
901 	if (!fuse_libabi_geq(data, 7, 4)) {
902 		/*
903 		 * With a little work we could support servers as old as 7.1.
904 		 * But there would be little payoff.
905 		 */
906 		SDT_PROBE2(fusefs, , internal, trace, 1,
907 			"userpace version too low");
908 		err = EPROTONOSUPPORT;
909 		goto out;
910 	}
911 
912 	if (fuse_libabi_geq(data, 7, 5)) {
913 		if (fticket_resp(tick)->len == sizeof(struct fuse_init_out) ||
914 		    fticket_resp(tick)->len == FUSE_COMPAT_22_INIT_OUT_SIZE) {
915 			data->max_write = fiio->max_write;
916 			if (fiio->flags & FUSE_ASYNC_READ)
917 				data->dataflags |= FSESS_ASYNC_READ;
918 			if (fiio->flags & FUSE_POSIX_LOCKS)
919 				data->dataflags |= FSESS_POSIX_LOCKS;
920 			if (fiio->flags & FUSE_EXPORT_SUPPORT)
921 				data->dataflags |= FSESS_EXPORT_SUPPORT;
922 			/*
923 			 * Don't bother to check FUSE_BIG_WRITES, because it's
924 			 * redundant with max_write
925 			 */
926 			/*
927 			 * max_background, congestion_threshold, and time_gran
928 			 * are not implemented
929 			 */
930 		} else {
931 			err = EINVAL;
932 		}
933 	} else {
934 		/* Old fixed values */
935 		data->max_write = 4096;
936 	}
937 
938 	if (fuse_libabi_geq(data, 7, 6))
939 		data->max_readahead_blocks = fiio->max_readahead / maxbcachebuf;
940 
941 	if (!fuse_libabi_geq(data, 7, 7))
942 		fsess_set_notimpl(data->mp, FUSE_INTERRUPT);
943 
944 	if (!fuse_libabi_geq(data, 7, 8)) {
945 		fsess_set_notimpl(data->mp, FUSE_BMAP);
946 		fsess_set_notimpl(data->mp, FUSE_DESTROY);
947 	}
948 
949 out:
950 	if (err) {
951 		fdata_set_dead(data);
952 	}
953 	FUSE_LOCK();
954 	data->dataflags |= FSESS_INITED;
955 	wakeup(&data->ticketer);
956 	FUSE_UNLOCK();
957 
958 	return 0;
959 }
960 
961 void
962 fuse_internal_send_init(struct fuse_data *data, struct thread *td)
963 {
964 	struct fuse_init_in *fiii;
965 	struct fuse_dispatcher fdi;
966 
967 	fdisp_init(&fdi, sizeof(*fiii));
968 	fdisp_make(&fdi, FUSE_INIT, data->mp, 0, td, NULL);
969 	fiii = fdi.indata;
970 	fiii->major = FUSE_KERNEL_VERSION;
971 	fiii->minor = FUSE_KERNEL_MINOR_VERSION;
972 	/*
973 	 * fusefs currently reads ahead no more than one cache block at a time.
974 	 * See fuse_read_biobackend
975 	 */
976 	fiii->max_readahead = maxbcachebuf;
977 	/*
978 	 * Unsupported features:
979 	 * FUSE_FILE_OPS: No known FUSE server or client supports it
980 	 * FUSE_ATOMIC_O_TRUNC: our VFS cannot support it
981 	 * FUSE_DONT_MASK: unlike Linux, FreeBSD always applies the umask, even
982 	 *	when default ACLs are in use.
983 	 */
984 	fiii->flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_EXPORT_SUPPORT
985 		| FUSE_BIG_WRITES;
986 
987 	fuse_insert_callback(fdi.tick, fuse_internal_init_callback);
988 	fuse_insert_message(fdi.tick, false);
989 	fdisp_destroy(&fdi);
990 }
991 
992 /*
993  * Send a FUSE_SETATTR operation with no permissions checks.  If cred is NULL,
994  * send the request with root credentials
995  */
996 int fuse_internal_setattr(struct vnode *vp, struct vattr *vap,
997 	struct thread *td, struct ucred *cred)
998 {
999 	struct fuse_dispatcher fdi;
1000 	struct fuse_setattr_in *fsai;
1001 	struct mount *mp;
1002 	pid_t pid = td->td_proc->p_pid;
1003 	struct fuse_data *data;
1004 	int dataflags;
1005 	int err = 0;
1006 	enum vtype vtyp;
1007 	int sizechanged = -1;
1008 	uint64_t newsize = 0;
1009 
1010 	mp = vnode_mount(vp);
1011 	data = fuse_get_mpdata(mp);
1012 	dataflags = data->dataflags;
1013 
1014 	fdisp_init(&fdi, sizeof(*fsai));
1015 	fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred);
1016 	if (!cred) {
1017 		fdi.finh->uid = 0;
1018 		fdi.finh->gid = 0;
1019 	}
1020 	fsai = fdi.indata;
1021 	fsai->valid = 0;
1022 
1023 	if (vap->va_uid != (uid_t)VNOVAL) {
1024 		fsai->uid = vap->va_uid;
1025 		fsai->valid |= FATTR_UID;
1026 	}
1027 	if (vap->va_gid != (gid_t)VNOVAL) {
1028 		fsai->gid = vap->va_gid;
1029 		fsai->valid |= FATTR_GID;
1030 	}
1031 	if (vap->va_size != VNOVAL) {
1032 		struct fuse_filehandle *fufh = NULL;
1033 
1034 		/*Truncate to a new value. */
1035 		fsai->size = vap->va_size;
1036 		sizechanged = 1;
1037 		newsize = vap->va_size;
1038 		fsai->valid |= FATTR_SIZE;
1039 
1040 		fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid);
1041 		if (fufh) {
1042 			fsai->fh = fufh->fh_id;
1043 			fsai->valid |= FATTR_FH;
1044 		}
1045 		VTOFUD(vp)->flag &= ~FN_SIZECHANGE;
1046 	}
1047 	if (vap->va_atime.tv_sec != VNOVAL) {
1048 		fsai->atime = vap->va_atime.tv_sec;
1049 		fsai->atimensec = vap->va_atime.tv_nsec;
1050 		fsai->valid |= FATTR_ATIME;
1051 		if (vap->va_vaflags & VA_UTIMES_NULL)
1052 			fsai->valid |= FATTR_ATIME_NOW;
1053 	}
1054 	if (vap->va_mtime.tv_sec != VNOVAL) {
1055 		fsai->mtime = vap->va_mtime.tv_sec;
1056 		fsai->mtimensec = vap->va_mtime.tv_nsec;
1057 		fsai->valid |= FATTR_MTIME;
1058 		if (vap->va_vaflags & VA_UTIMES_NULL)
1059 			fsai->valid |= FATTR_MTIME_NOW;
1060 	}
1061 	if (vap->va_mode != (mode_t)VNOVAL) {
1062 		fsai->mode = vap->va_mode & ALLPERMS;
1063 		fsai->valid |= FATTR_MODE;
1064 	}
1065 	if (!fsai->valid) {
1066 		goto out;
1067 	}
1068 
1069 	if ((err = fdisp_wait_answ(&fdi)))
1070 		goto out;
1071 	vtyp = IFTOVT(((struct fuse_attr_out *)fdi.answ)->attr.mode);
1072 
1073 	if (vnode_vtype(vp) != vtyp) {
1074 		if (vnode_vtype(vp) == VNON && vtyp != VNON) {
1075 			SDT_PROBE2(fusefs, , internal, trace, 1, "FUSE: Dang! "
1076 				"vnode_vtype is VNON and vtype isn't.");
1077 		} else {
1078 			/*
1079 	                 * STALE vnode, ditch
1080 	                 *
1081 			 * The vnode has changed its type "behind our back".
1082 			 * There's nothing really we can do, so let us just
1083 			 * force an internal revocation and tell the caller to
1084 			 * try again, if interested.
1085 	                 */
1086 			fuse_internal_vnode_disappear(vp);
1087 			err = EAGAIN;
1088 		}
1089 	}
1090 	if (err == 0) {
1091 		struct fuse_attr_out *fao = (struct fuse_attr_out*)fdi.answ;
1092 		fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
1093 			fao->attr_valid_nsec, NULL);
1094 	}
1095 
1096 out:
1097 	fdisp_destroy(&fdi);
1098 	return err;
1099 }
1100 
1101 #ifdef ZERO_PAD_INCOMPLETE_BUFS
1102 static int
1103 isbzero(void *buf, size_t len)
1104 {
1105 	int i;
1106 
1107 	for (i = 0; i < len; i++) {
1108 		if (((char *)buf)[i])
1109 			return (0);
1110 	}
1111 
1112 	return (1);
1113 }
1114 
1115 #endif
1116