xref: /freebsd/sys/fs/fuse/fuse_internal.c (revision 8758bf0a)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2007-2009 Google Inc. and Amit Singh
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  * * Redistributions of source code must retain the above copyright
12  *   notice, this list of conditions and the following disclaimer.
13  * * Redistributions in binary form must reproduce the above
14  *   copyright notice, this list of conditions and the following disclaimer
15  *   in the documentation and/or other materials provided with the
16  *   distribution.
17  * * Neither the name of Google Inc. nor the names of its
18  *   contributors may be used to endorse or promote products derived from
19  *   this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Copyright (C) 2005 Csaba Henk.
34  * All rights reserved.
35  *
36  * Copyright (c) 2019 The FreeBSD Foundation
37  *
38  * Portions of this software were developed by BFF Storage Systems, LLC under
39  * sponsorship from the FreeBSD Foundation.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  *
50  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  */
62 
63 #include <sys/param.h>
64 #include <sys/systm.h>
65 #include <sys/counter.h>
66 #include <sys/module.h>
67 #include <sys/errno.h>
68 #include <sys/kernel.h>
69 #include <sys/conf.h>
70 #include <sys/uio.h>
71 #include <sys/malloc.h>
72 #include <sys/queue.h>
73 #include <sys/lock.h>
74 #include <sys/mutex.h>
75 #include <sys/sdt.h>
76 #include <sys/sx.h>
77 #include <sys/proc.h>
78 #include <sys/mount.h>
79 #include <sys/vnode.h>
80 #include <sys/namei.h>
81 #include <sys/stat.h>
82 #include <sys/unistd.h>
83 #include <sys/filedesc.h>
84 #include <sys/file.h>
85 #include <sys/fcntl.h>
86 #include <sys/dirent.h>
87 #include <sys/bio.h>
88 #include <sys/buf.h>
89 #include <sys/sysctl.h>
90 #include <sys/priv.h>
91 
92 #include "fuse.h"
93 #include "fuse_file.h"
94 #include "fuse_internal.h"
95 #include "fuse_io.h"
96 #include "fuse_ipc.h"
97 #include "fuse_node.h"
98 #include "fuse_file.h"
99 
100 SDT_PROVIDER_DECLARE(fusefs);
101 /*
102  * Fuse trace probe:
103  * arg0: verbosity.  Higher numbers give more verbose messages
104  * arg1: Textual message
105  */
106 SDT_PROBE_DEFINE2(fusefs, , internal, trace, "int", "char*");
107 
108 #ifdef ZERO_PAD_INCOMPLETE_BUFS
109 static int isbzero(void *buf, size_t len);
110 
111 #endif
112 
113 counter_u64_t fuse_lookup_cache_hits;
114 counter_u64_t fuse_lookup_cache_misses;
115 
116 SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, lookup_cache_hits, CTLFLAG_RD,
117     &fuse_lookup_cache_hits, "number of positive cache hits in lookup");
118 
119 SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, lookup_cache_misses, CTLFLAG_RD,
120     &fuse_lookup_cache_misses, "number of cache misses in lookup");
121 
122 int
fuse_internal_get_cached_vnode(struct mount * mp,ino_t ino,int flags,struct vnode ** vpp)123 fuse_internal_get_cached_vnode(struct mount* mp, ino_t ino, int flags,
124 	struct vnode **vpp)
125 {
126 	struct bintime now;
127 	struct thread *td = curthread;
128 	uint64_t nodeid = ino;
129 	int error;
130 
131 	*vpp = NULL;
132 
133 	error = vfs_hash_get(mp, fuse_vnode_hash(nodeid), flags, td, vpp,
134 	    fuse_vnode_cmp, &nodeid);
135 	if (error)
136 		return error;
137 	/*
138 	 * Check the entry cache timeout.  We have to do this within fusefs
139 	 * instead of by using cache_enter_time/cache_lookup because those
140 	 * routines are only intended to work with pathnames, not inodes
141 	 */
142 	if (*vpp != NULL) {
143 		getbinuptime(&now);
144 		if (bintime_cmp(&(VTOFUD(*vpp)->entry_cache_timeout), &now, >)){
145 			counter_u64_add(fuse_lookup_cache_hits, 1);
146 			return 0;
147 		} else {
148 			/* Entry cache timeout */
149 			counter_u64_add(fuse_lookup_cache_misses, 1);
150 			cache_purge(*vpp);
151 			vput(*vpp);
152 			*vpp = NULL;
153 		}
154 	}
155 	return 0;
156 }
157 
158 SDT_PROBE_DEFINE0(fusefs, , internal, access_vadmin);
159 /* Synchronously send a FUSE_ACCESS operation */
160 int
fuse_internal_access(struct vnode * vp,accmode_t mode,struct thread * td,struct ucred * cred)161 fuse_internal_access(struct vnode *vp,
162     accmode_t mode,
163     struct thread *td,
164     struct ucred *cred)
165 {
166 	int err = 0;
167 	uint32_t mask = F_OK;
168 	int dataflags;
169 	struct mount *mp;
170 	struct fuse_dispatcher fdi;
171 	struct fuse_access_in *fai;
172 	struct fuse_data *data;
173 
174 	mp = vnode_mount(vp);
175 
176 	data = fuse_get_mpdata(mp);
177 	dataflags = data->dataflags;
178 
179 	if (mode == 0)
180 		return 0;
181 
182 	if (mode & VMODIFY_PERMS && vfs_isrdonly(mp)) {
183 		switch (vp->v_type) {
184 		case VDIR:
185 			/* FALLTHROUGH */
186 		case VLNK:
187 			/* FALLTHROUGH */
188 		case VREG:
189 			return EROFS;
190 		default:
191 			break;
192 		}
193 	}
194 
195 	/* Unless explicitly permitted, deny everyone except the fs owner. */
196 	if (!(dataflags & FSESS_DAEMON_CAN_SPY)) {
197 		if (fuse_match_cred(data->daemoncred, cred))
198 			return EPERM;
199 	}
200 
201 	if (dataflags & FSESS_DEFAULT_PERMISSIONS) {
202 		struct vattr va;
203 
204 		fuse_internal_getattr(vp, &va, cred, td);
205 		return vaccess(vp->v_type, va.va_mode, va.va_uid,
206 		    va.va_gid, mode, cred);
207 	}
208 
209 	if (mode & VADMIN) {
210 		/*
211 		 * The FUSE protocol doesn't have an equivalent of VADMIN, so
212 		 * it's a bug if we ever reach this point with that bit set.
213 		 */
214 		SDT_PROBE0(fusefs, , internal, access_vadmin);
215 	}
216 
217 	if (fsess_not_impl(mp, FUSE_ACCESS))
218 		return 0;
219 
220 	if ((mode & (VWRITE | VAPPEND)) != 0)
221 		mask |= W_OK;
222 	if ((mode & VREAD) != 0)
223 		mask |= R_OK;
224 	if ((mode & VEXEC) != 0)
225 		mask |= X_OK;
226 
227 	fdisp_init(&fdi, sizeof(*fai));
228 	fdisp_make_vp(&fdi, FUSE_ACCESS, vp, td, cred);
229 
230 	fai = fdi.indata;
231 	fai->mask = mask;
232 
233 	err = fdisp_wait_answ(&fdi);
234 	fdisp_destroy(&fdi);
235 
236 	if (err == ENOSYS) {
237 		fsess_set_notimpl(mp, FUSE_ACCESS);
238 		err = 0;
239 	}
240 	return err;
241 }
242 
243 /*
244  * Cache FUSE attributes from attr, in attribute cache associated with vnode
245  * 'vp'.  Optionally, if argument 'vap' is not NULL, store a copy of the
246  * converted attributes there as well.
247  *
248  * If the nominal attribute cache TTL is zero, do not cache on the 'vp' (but do
249  * return the result to the caller).
250  */
251 void
fuse_internal_cache_attrs(struct vnode * vp,struct fuse_attr * attr,uint64_t attr_valid,uint32_t attr_valid_nsec,struct vattr * vap,bool from_server)252 fuse_internal_cache_attrs(struct vnode *vp, struct fuse_attr *attr,
253 	uint64_t attr_valid, uint32_t attr_valid_nsec, struct vattr *vap,
254 	bool from_server)
255 {
256 	struct mount *mp;
257 	struct fuse_vnode_data *fvdat;
258 	struct fuse_data *data;
259 	struct vattr *vp_cache_at;
260 
261 	mp = vnode_mount(vp);
262 	fvdat = VTOFUD(vp);
263 	data = fuse_get_mpdata(mp);
264 
265 	ASSERT_VOP_ELOCKED(vp, "fuse_internal_cache_attrs");
266 
267 	fuse_validity_2_bintime(attr_valid, attr_valid_nsec,
268 		&fvdat->attr_cache_timeout);
269 
270 	if (vnode_isreg(vp) &&
271 	    fvdat->cached_attrs.va_size != VNOVAL &&
272 	    fvdat->flag & FN_SIZECHANGE &&
273 	    attr->size != fvdat->cached_attrs.va_size)
274 	{
275 		if (data->cache_mode == FUSE_CACHE_WB)
276 		{
277 			const char *msg;
278 
279 			/*
280 			 * The server changed the file's size even though we're
281 			 * using writeback cacheing and and we have outstanding
282 			 * dirty writes!  That's a server bug.
283 			 */
284 			if (fuse_libabi_geq(data, 7, 23)) {
285 				msg = "writeback cache incoherent!."
286 				    "To prevent data corruption, disable "
287 				    "the writeback cache according to your "
288 				    "FUSE server's documentation.";
289 			} else {
290 				msg = "writeback cache incoherent!."
291 				    "To prevent data corruption, disable "
292 				    "the writeback cache by setting "
293 				    "vfs.fusefs.data_cache_mode to 0 or 1.";
294 			}
295 			fuse_warn(data, FSESS_WARN_WB_CACHE_INCOHERENT, msg);
296 		}
297 		if (fuse_vnode_attr_cache_valid(vp) &&
298 		    data->cache_mode != FUSE_CACHE_UC)
299 		{
300 			/*
301 			 * The server changed the file's size even though we
302 			 * have it cached and our cache has not yet expired.
303 			 * That's a bug.
304 			 */
305 			fuse_warn(data, FSESS_WARN_CACHE_INCOHERENT,
306 			    "cache incoherent!  "
307 			    "To prevent "
308 			    "data corruption, disable the data cache "
309 			    "by mounting with -o direct_io, or as "
310 			    "directed otherwise by your FUSE server's "
311 			    "documentation.");
312 		}
313 	}
314 
315 	/* Fix our buffers if the filesize changed without us knowing */
316 	if (vnode_isreg(vp) && attr->size != fvdat->cached_attrs.va_size) {
317 		(void)fuse_vnode_setsize(vp, attr->size, from_server);
318 		fvdat->cached_attrs.va_size = attr->size;
319 	}
320 
321 	if (attr_valid > 0 || attr_valid_nsec > 0)
322 		vp_cache_at = &(fvdat->cached_attrs);
323 	else if (vap != NULL)
324 		vp_cache_at = vap;
325 	else
326 		return;
327 
328 	vp_cache_at->va_fsid = mp->mnt_stat.f_fsid.val[0];
329 	vp_cache_at->va_fileid = attr->ino;
330 	vp_cache_at->va_mode = attr->mode & ~S_IFMT;
331 	vp_cache_at->va_nlink     = attr->nlink;
332 	vp_cache_at->va_uid       = attr->uid;
333 	vp_cache_at->va_gid       = attr->gid;
334 	vp_cache_at->va_rdev      = attr->rdev;
335 	vp_cache_at->va_size      = attr->size;
336 	/* XXX on i386, seconds are truncated to 32 bits */
337 	vp_cache_at->va_atime.tv_sec  = attr->atime;
338 	vp_cache_at->va_atime.tv_nsec = attr->atimensec;
339 	vp_cache_at->va_mtime.tv_sec  = attr->mtime;
340 	vp_cache_at->va_mtime.tv_nsec = attr->mtimensec;
341 	vp_cache_at->va_ctime.tv_sec  = attr->ctime;
342 	vp_cache_at->va_ctime.tv_nsec = attr->ctimensec;
343 	if (fuse_libabi_geq(data, 7, 9) && attr->blksize > 0)
344 		vp_cache_at->va_blocksize = attr->blksize;
345 	else
346 		vp_cache_at->va_blocksize = PAGE_SIZE;
347 	vp_cache_at->va_type = IFTOVT(attr->mode);
348 	vp_cache_at->va_bytes = attr->blocks * S_BLKSIZE;
349 	vp_cache_at->va_flags = 0;
350 
351 	if (vap != vp_cache_at && vap != NULL)
352 		memcpy(vap, vp_cache_at, sizeof(*vap));
353 }
354 
355 /* fsync */
356 
357 int
fuse_internal_fsync_callback(struct fuse_ticket * tick,struct uio * uio)358 fuse_internal_fsync_callback(struct fuse_ticket *tick, struct uio *uio)
359 {
360 	if (tick->tk_aw_ohead.error == ENOSYS) {
361 		fsess_set_notimpl(tick->tk_data->mp, fticket_opcode(tick));
362 	}
363 	return 0;
364 }
365 
366 int
fuse_internal_fsync(struct vnode * vp,struct thread * td,int waitfor,bool datasync)367 fuse_internal_fsync(struct vnode *vp,
368     struct thread *td,
369     int waitfor,
370     bool datasync)
371 {
372 	struct fuse_fsync_in *ffsi = NULL;
373 	struct fuse_dispatcher fdi;
374 	struct fuse_filehandle *fufh;
375 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
376 	struct mount *mp = vnode_mount(vp);
377 	int op = FUSE_FSYNC;
378 	int err = 0;
379 
380 	if (fsess_not_impl(vnode_mount(vp),
381 	    (vnode_vtype(vp) == VDIR ? FUSE_FSYNCDIR : FUSE_FSYNC))) {
382 		return 0;
383 	}
384 	if (vnode_isdir(vp))
385 		op = FUSE_FSYNCDIR;
386 
387 	if (fsess_not_impl(mp, op))
388 		return 0;
389 
390 	fdisp_init(&fdi, sizeof(*ffsi));
391 	/*
392 	 * fsync every open file handle for this file, because we can't be sure
393 	 * which file handle the caller is really referring to.
394 	 */
395 	LIST_FOREACH(fufh, &fvdat->handles, next) {
396 		fdi.iosize = sizeof(*ffsi);
397 		if (ffsi == NULL)
398 			fdisp_make_vp(&fdi, op, vp, td, NULL);
399 		else
400 			fdisp_refresh_vp(&fdi, op, vp, td, NULL);
401 		ffsi = fdi.indata;
402 		ffsi->fh = fufh->fh_id;
403 		ffsi->fsync_flags = 0;
404 
405 		if (datasync)
406 			ffsi->fsync_flags = FUSE_FSYNC_FDATASYNC;
407 
408 		if (waitfor == MNT_WAIT) {
409 			err = fdisp_wait_answ(&fdi);
410 		} else {
411 			fuse_insert_callback(fdi.tick,
412 				fuse_internal_fsync_callback);
413 			fuse_insert_message(fdi.tick, false);
414 		}
415 		if (err == ENOSYS) {
416 			/* ENOSYS means "success, and don't call again" */
417 			fsess_set_notimpl(mp, op);
418 			err = 0;
419 			break;
420 		}
421 	}
422 	fdisp_destroy(&fdi);
423 
424 	return err;
425 }
426 
427 /* Asynchronous invalidation */
428 SDT_PROBE_DEFINE3(fusefs, , internal, invalidate_entry,
429 	"struct vnode*", "struct fuse_notify_inval_entry_out*", "char*");
430 int
fuse_internal_invalidate_entry(struct mount * mp,struct uio * uio)431 fuse_internal_invalidate_entry(struct mount *mp, struct uio *uio)
432 {
433 	struct fuse_notify_inval_entry_out fnieo;
434 	struct componentname cn;
435 	struct vnode *dvp, *vp;
436 	char name[PATH_MAX];
437 	int err;
438 
439 	if ((err = uiomove(&fnieo, sizeof(fnieo), uio)) != 0)
440 		return (err);
441 
442 	if (fnieo.namelen >= sizeof(name))
443 		return (EINVAL);
444 
445 	if ((err = uiomove(name, fnieo.namelen, uio)) != 0)
446 		return (err);
447 	name[fnieo.namelen] = '\0';
448 	/* fusefs does not cache "." or ".." entries */
449 	if (strncmp(name, ".", sizeof(".")) == 0 ||
450 	    strncmp(name, "..", sizeof("..")) == 0)
451 		return (0);
452 
453 	if (fnieo.parent == FUSE_ROOT_ID)
454 		err = VFS_ROOT(mp, LK_SHARED, &dvp);
455 	else
456 		err = fuse_internal_get_cached_vnode( mp, fnieo.parent,
457 			LK_SHARED, &dvp);
458 	SDT_PROBE3(fusefs, , internal, invalidate_entry, dvp, &fnieo, name);
459 	/*
460 	 * If dvp is not in the cache, then it must've been reclaimed.  And
461 	 * since fuse_vnop_reclaim does a cache_purge, name's entry must've
462 	 * been invalidated already.  So we can safely return if dvp == NULL
463 	 */
464 	if (err != 0 || dvp == NULL)
465 		return (err);
466 	/*
467 	 * XXX we can't check dvp's generation because the FUSE invalidate
468 	 * entry message doesn't include it.  Worse case is that we invalidate
469 	 * an entry that didn't need to be invalidated.
470 	 */
471 
472 	cn.cn_nameiop = LOOKUP;
473 	cn.cn_flags = 0;	/* !MAKEENTRY means free cached entry */
474 	cn.cn_cred = curthread->td_ucred;
475 	cn.cn_lkflags = LK_SHARED;
476 	cn.cn_pnbuf = NULL;
477 	cn.cn_nameptr = name;
478 	cn.cn_namelen = fnieo.namelen;
479 	err = cache_lookup(dvp, &vp, &cn, NULL, NULL);
480 	MPASS(err == 0);
481 	fuse_vnode_clear_attr_cache(dvp);
482 	vput(dvp);
483 	return (0);
484 }
485 
486 SDT_PROBE_DEFINE2(fusefs, , internal, invalidate_inode,
487 	"struct vnode*", "struct fuse_notify_inval_inode_out *");
488 int
fuse_internal_invalidate_inode(struct mount * mp,struct uio * uio)489 fuse_internal_invalidate_inode(struct mount *mp, struct uio *uio)
490 {
491 	struct fuse_notify_inval_inode_out fniio;
492 	struct vnode *vp;
493 	int err;
494 
495 	if ((err = uiomove(&fniio, sizeof(fniio), uio)) != 0)
496 		return (err);
497 
498 	if (fniio.ino == FUSE_ROOT_ID)
499 		err = VFS_ROOT(mp, LK_EXCLUSIVE, &vp);
500 	else
501 		err = fuse_internal_get_cached_vnode(mp, fniio.ino, LK_SHARED,
502 			&vp);
503 	SDT_PROBE2(fusefs, , internal, invalidate_inode, vp, &fniio);
504 	if (err != 0 || vp == NULL)
505 		return (err);
506 	/*
507 	 * XXX we can't check vp's generation because the FUSE invalidate
508 	 * entry message doesn't include it.  Worse case is that we invalidate
509 	 * an inode that didn't need to be invalidated.
510 	 */
511 
512 	/*
513 	 * Flush and invalidate buffers if off >= 0.  Technically we only need
514 	 * to flush and invalidate the range of offsets [off, off + len), but
515 	 * for simplicity's sake we do everything.
516 	 */
517 	if (fniio.off >= 0)
518 		fuse_io_invalbuf(vp, curthread);
519 	fuse_vnode_clear_attr_cache(vp);
520 	vput(vp);
521 	return (0);
522 }
523 
524 /* mknod */
525 int
fuse_internal_mknod(struct vnode * dvp,struct vnode ** vpp,struct componentname * cnp,struct vattr * vap)526 fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp,
527 	struct componentname *cnp, struct vattr *vap)
528 {
529 	struct fuse_data *data;
530 	struct fuse_mknod_in fmni;
531 	size_t insize;
532 
533 	data = fuse_get_mpdata(dvp->v_mount);
534 
535 	fmni.mode = MAKEIMODE(vap->va_type, vap->va_mode);
536 	fmni.rdev = vap->va_rdev;
537 	if (fuse_libabi_geq(data, 7, 12)) {
538 		insize = sizeof(fmni);
539 		fmni.umask = curthread->td_proc->p_pd->pd_cmask;
540 		fmni.padding = 0;
541 	} else {
542 		insize = FUSE_COMPAT_MKNOD_IN_SIZE;
543 	}
544 	return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKNOD, &fmni,
545 	    insize, vap->va_type));
546 }
547 
548 /* readdir */
549 
550 int
fuse_internal_readdir(struct vnode * vp,struct uio * uio,struct fuse_filehandle * fufh,struct fuse_iov * cookediov,int * ncookies,uint64_t * cookies)551 fuse_internal_readdir(struct vnode *vp,
552     struct uio *uio,
553     struct fuse_filehandle *fufh,
554     struct fuse_iov *cookediov,
555     int *ncookies,
556     uint64_t *cookies)
557 {
558 	int err = 0;
559 	struct fuse_dispatcher fdi;
560 	struct fuse_read_in *fri = NULL;
561 
562 	if (uio_resid(uio) == 0)
563 		return 0;
564 	fdisp_init(&fdi, 0);
565 
566 	/*
567 	 * Note that we DO NOT have a UIO_SYSSPACE here (so no need for p2p
568 	 * I/O).
569 	 */
570 	while (uio_resid(uio) > 0) {
571 		fdi.iosize = sizeof(*fri);
572 		fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
573 		fri = fdi.indata;
574 		fri->fh = fufh->fh_id;
575 		fri->offset = uio_offset(uio);
576 		fri->size = MIN(uio->uio_resid,
577 		    fuse_get_mpdata(vp->v_mount)->max_read);
578 
579 		if ((err = fdisp_wait_answ(&fdi)))
580 			break;
581 		if ((err = fuse_internal_readdir_processdata(uio, fri->size,
582 			fdi.answ, fdi.iosize, cookediov, ncookies, &cookies)))
583 			break;
584 	}
585 
586 	fdisp_destroy(&fdi);
587 	return ((err == -1) ? 0 : err);
588 }
589 
590 /*
591  * Return -1 to indicate that this readdir is finished, 0 if it copied
592  * all the directory data read in and it may be possible to read more
593  * and greater than 0 for a failure.
594  */
595 int
fuse_internal_readdir_processdata(struct uio * uio,size_t reqsize,void * buf,size_t bufsize,struct fuse_iov * cookediov,int * ncookies,uint64_t ** cookiesp)596 fuse_internal_readdir_processdata(struct uio *uio,
597     size_t reqsize,
598     void *buf,
599     size_t bufsize,
600     struct fuse_iov *cookediov,
601     int *ncookies,
602     uint64_t **cookiesp)
603 {
604 	int err = 0;
605 	int oreclen;
606 	size_t freclen;
607 
608 	struct dirent *de;
609 	struct fuse_dirent *fudge;
610 	uint64_t *cookies;
611 
612 	cookies = *cookiesp;
613 	if (bufsize < FUSE_NAME_OFFSET)
614 		return -1;
615 	for (;;) {
616 		if (bufsize < FUSE_NAME_OFFSET) {
617 			err = -1;
618 			break;
619 		}
620 		fudge = (struct fuse_dirent *)buf;
621 		freclen = FUSE_DIRENT_SIZE(fudge);
622 
623 		if (bufsize < freclen) {
624 			/*
625 			 * This indicates a partial directory entry at the
626 			 * end of the directory data.
627 			 */
628 			err = -1;
629 			break;
630 		}
631 #ifdef ZERO_PAD_INCOMPLETE_BUFS
632 		if (isbzero(buf, FUSE_NAME_OFFSET)) {
633 			err = -1;
634 			break;
635 		}
636 #endif
637 
638 		if (!fudge->namelen || fudge->namelen > MAXNAMLEN) {
639 			err = EINVAL;
640 			break;
641 		}
642 		oreclen = GENERIC_DIRSIZ((struct pseudo_dirent *)
643 					    &fudge->namelen);
644 
645 		if (oreclen > uio_resid(uio)) {
646 			/* Out of space for the dir so we are done. */
647 			err = -1;
648 			break;
649 		}
650 		fiov_adjust(cookediov, oreclen);
651 		bzero(cookediov->base, oreclen);
652 
653 		de = (struct dirent *)cookediov->base;
654 		de->d_fileno = fudge->ino;
655 		de->d_off = fudge->off;
656 		de->d_reclen = oreclen;
657 		de->d_type = fudge->type;
658 		de->d_namlen = fudge->namelen;
659 		memcpy((char *)cookediov->base + sizeof(struct dirent) -
660 		       MAXNAMLEN - 1,
661 		       (char *)buf + FUSE_NAME_OFFSET, fudge->namelen);
662 		dirent_terminate(de);
663 
664 		err = uiomove(cookediov->base, cookediov->len, uio);
665 		if (err)
666 			break;
667 		if (cookies != NULL) {
668 			if (*ncookies == 0) {
669 				err = -1;
670 				break;
671 			}
672 			*cookies = fudge->off;
673 			cookies++;
674 			(*ncookies)--;
675 		}
676 		buf = (char *)buf + freclen;
677 		bufsize -= freclen;
678 		uio_setoffset(uio, fudge->off);
679 	}
680 	*cookiesp = cookies;
681 
682 	return err;
683 }
684 
685 /* remove */
686 
687 int
fuse_internal_remove(struct vnode * dvp,struct vnode * vp,struct componentname * cnp,enum fuse_opcode op)688 fuse_internal_remove(struct vnode *dvp,
689     struct vnode *vp,
690     struct componentname *cnp,
691     enum fuse_opcode op)
692 {
693 	struct fuse_dispatcher fdi;
694 	nlink_t nlink;
695 	int err = 0;
696 
697 	fdisp_init(&fdi, cnp->cn_namelen + 1);
698 	fdisp_make_vp(&fdi, op, dvp, curthread, cnp->cn_cred);
699 
700 	memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
701 	((char *)fdi.indata)[cnp->cn_namelen] = '\0';
702 
703 	err = fdisp_wait_answ(&fdi);
704 	fdisp_destroy(&fdi);
705 
706 	if (err)
707 		return (err);
708 
709 	/*
710 	 * Access the cached nlink even if the attr cached has expired.  If
711 	 * it's inaccurate, the worst that will happen is:
712 	 * 1) We'll recycle the vnode even though the file has another link we
713 	 *    don't know about, costing a bit of cpu time, or
714 	 * 2) We won't recycle the vnode even though all of its links are gone.
715 	 *    It will linger around until vnlru reclaims it, costing a bit of
716 	 *    temporary memory.
717 	 */
718 	nlink = VTOFUD(vp)->cached_attrs.va_nlink--;
719 
720 	/*
721 	 * Purge the parent's attribute cache because the daemon
722 	 * should've updated its mtime and ctime.
723 	 */
724 	fuse_vnode_clear_attr_cache(dvp);
725 
726 	/* NB: nlink could be zero if it was never cached */
727 	if (nlink <= 1 || vnode_vtype(vp) == VDIR) {
728 		fuse_internal_vnode_disappear(vp);
729 	} else {
730 		cache_purge(vp);
731 		fuse_vnode_update(vp, FN_CTIMECHANGE);
732 	}
733 
734 	return err;
735 }
736 
737 /* rename */
738 
739 int
fuse_internal_rename(struct vnode * fdvp,struct componentname * fcnp,struct vnode * tdvp,struct componentname * tcnp)740 fuse_internal_rename(struct vnode *fdvp,
741     struct componentname *fcnp,
742     struct vnode *tdvp,
743     struct componentname *tcnp)
744 {
745 	struct fuse_dispatcher fdi;
746 	struct fuse_rename_in *fri;
747 	int err = 0;
748 
749 	fdisp_init(&fdi, sizeof(*fri) + fcnp->cn_namelen + tcnp->cn_namelen + 2);
750 	fdisp_make_vp(&fdi, FUSE_RENAME, fdvp, curthread, tcnp->cn_cred);
751 
752 	fri = fdi.indata;
753 	fri->newdir = VTOI(tdvp);
754 	memcpy((char *)fdi.indata + sizeof(*fri), fcnp->cn_nameptr,
755 	    fcnp->cn_namelen);
756 	((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen] = '\0';
757 	memcpy((char *)fdi.indata + sizeof(*fri) + fcnp->cn_namelen + 1,
758 	    tcnp->cn_nameptr, tcnp->cn_namelen);
759 	((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen +
760 	    tcnp->cn_namelen + 1] = '\0';
761 
762 	err = fdisp_wait_answ(&fdi);
763 	fdisp_destroy(&fdi);
764 	return err;
765 }
766 
767 /* strategy */
768 
769 /* entity creation */
770 
771 void
fuse_internal_newentry_makerequest(struct mount * mp,uint64_t dnid,struct componentname * cnp,enum fuse_opcode op,void * buf,size_t bufsize,struct fuse_dispatcher * fdip)772 fuse_internal_newentry_makerequest(struct mount *mp,
773     uint64_t dnid,
774     struct componentname *cnp,
775     enum fuse_opcode op,
776     void *buf,
777     size_t bufsize,
778     struct fuse_dispatcher *fdip)
779 {
780 	fdip->iosize = bufsize + cnp->cn_namelen + 1;
781 
782 	fdisp_make(fdip, op, mp, dnid, curthread, cnp->cn_cred);
783 	memcpy(fdip->indata, buf, bufsize);
784 	memcpy((char *)fdip->indata + bufsize, cnp->cn_nameptr, cnp->cn_namelen);
785 	((char *)fdip->indata)[bufsize + cnp->cn_namelen] = '\0';
786 }
787 
788 int
fuse_internal_newentry_core(struct vnode * dvp,struct vnode ** vpp,struct componentname * cnp,__enum_uint8 (vtype)vtyp,struct fuse_dispatcher * fdip)789 fuse_internal_newentry_core(struct vnode *dvp,
790     struct vnode **vpp,
791     struct componentname *cnp,
792     __enum_uint8(vtype) vtyp,
793     struct fuse_dispatcher *fdip)
794 {
795 	int err = 0;
796 	struct fuse_entry_out *feo;
797 	struct mount *mp = vnode_mount(dvp);
798 
799 	if ((err = fdisp_wait_answ(fdip))) {
800 		return err;
801 	}
802 	feo = fdip->answ;
803 
804 	if ((err = fuse_internal_checkentry(feo, vtyp))) {
805 		return err;
806 	}
807 	err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vtyp);
808 	if (err) {
809 		fuse_internal_forget_send(mp, curthread, cnp->cn_cred,
810 		    feo->nodeid, 1);
811 		return err;
812 	}
813 
814 	/*
815 	 * Purge the parent's attribute cache because the daemon should've
816 	 * updated its mtime and ctime
817 	 */
818 	fuse_vnode_clear_attr_cache(dvp);
819 
820 	fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid,
821 		feo->attr_valid_nsec, NULL, true);
822 
823 	return err;
824 }
825 
826 int
fuse_internal_newentry(struct vnode * dvp,struct vnode ** vpp,struct componentname * cnp,enum fuse_opcode op,void * buf,size_t bufsize,__enum_uint8 (vtype)vtype)827 fuse_internal_newentry(struct vnode *dvp,
828     struct vnode **vpp,
829     struct componentname *cnp,
830     enum fuse_opcode op,
831     void *buf,
832     size_t bufsize,
833     __enum_uint8(vtype) vtype)
834 {
835 	int err;
836 	struct fuse_dispatcher fdi;
837 	struct mount *mp = vnode_mount(dvp);
838 
839 	fdisp_init(&fdi, 0);
840 	fuse_internal_newentry_makerequest(mp, VTOI(dvp), cnp, op, buf,
841 	    bufsize, &fdi);
842 	err = fuse_internal_newentry_core(dvp, vpp, cnp, vtype, &fdi);
843 	fdisp_destroy(&fdi);
844 
845 	return err;
846 }
847 
848 /* entity destruction */
849 
850 int
fuse_internal_forget_callback(struct fuse_ticket * ftick,struct uio * uio)851 fuse_internal_forget_callback(struct fuse_ticket *ftick, struct uio *uio)
852 {
853 	fuse_internal_forget_send(ftick->tk_data->mp, curthread, NULL,
854 	    ((struct fuse_in_header *)ftick->tk_ms_fiov.base)->nodeid, 1);
855 
856 	return 0;
857 }
858 
859 void
fuse_internal_forget_send(struct mount * mp,struct thread * td,struct ucred * cred,uint64_t nodeid,uint64_t nlookup)860 fuse_internal_forget_send(struct mount *mp,
861     struct thread *td,
862     struct ucred *cred,
863     uint64_t nodeid,
864     uint64_t nlookup)
865 {
866 
867 	struct fuse_dispatcher fdi;
868 	struct fuse_forget_in *ffi;
869 
870 	/*
871          * KASSERT(nlookup > 0, ("zero-times forget for vp #%llu",
872          *         (long long unsigned) nodeid));
873          */
874 
875 	fdisp_init(&fdi, sizeof(*ffi));
876 	fdisp_make(&fdi, FUSE_FORGET, mp, nodeid, td, cred);
877 
878 	ffi = fdi.indata;
879 	ffi->nlookup = nlookup;
880 
881 	fuse_insert_message(fdi.tick, false);
882 	fdisp_destroy(&fdi);
883 }
884 
885 /* Fetch the vnode's attributes from the daemon*/
886 int
fuse_internal_do_getattr(struct vnode * vp,struct vattr * vap,struct ucred * cred,struct thread * td)887 fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap,
888 	struct ucred *cred, struct thread *td)
889 {
890 	struct fuse_dispatcher fdi;
891 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
892 	struct fuse_getattr_in *fgai;
893 	struct fuse_attr_out *fao;
894 	off_t old_filesize = fvdat->cached_attrs.va_size;
895 	struct timespec old_atime = fvdat->cached_attrs.va_atime;
896 	struct timespec old_ctime = fvdat->cached_attrs.va_ctime;
897 	struct timespec old_mtime = fvdat->cached_attrs.va_mtime;
898 	__enum_uint8(vtype) vtyp;
899 	int err;
900 
901 	ASSERT_VOP_LOCKED(vp, __func__);
902 
903 	fdisp_init(&fdi, sizeof(*fgai));
904 	fdisp_make_vp(&fdi, FUSE_GETATTR, vp, td, cred);
905 	fgai = fdi.indata;
906 	/*
907 	 * We could look up a file handle and set it in fgai->fh, but that
908 	 * involves extra runtime work and I'm unaware of any file systems that
909 	 * care.
910 	 */
911 	fgai->getattr_flags = 0;
912 	if ((err = fdisp_wait_answ(&fdi))) {
913 		if (err == ENOENT)
914 			fuse_internal_vnode_disappear(vp);
915 		goto out;
916 	}
917 
918 	fao = (struct fuse_attr_out *)fdi.answ;
919 	vtyp = IFTOVT(fao->attr.mode);
920 	if (fvdat->flag & FN_SIZECHANGE)
921 		fao->attr.size = old_filesize;
922 	if (fvdat->flag & FN_ATIMECHANGE) {
923 		fao->attr.atime = old_atime.tv_sec;
924 		fao->attr.atimensec = old_atime.tv_nsec;
925 	}
926 	if (fvdat->flag & FN_CTIMECHANGE) {
927 		fao->attr.ctime = old_ctime.tv_sec;
928 		fao->attr.ctimensec = old_ctime.tv_nsec;
929 	}
930 	if (fvdat->flag & FN_MTIMECHANGE) {
931 		fao->attr.mtime = old_mtime.tv_sec;
932 		fao->attr.mtimensec = old_mtime.tv_nsec;
933 	}
934 	fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
935 		fao->attr_valid_nsec, vap, true);
936 	if (vtyp != vnode_vtype(vp)) {
937 		fuse_internal_vnode_disappear(vp);
938 		err = ENOENT;
939 	}
940 
941 out:
942 	fdisp_destroy(&fdi);
943 	return err;
944 }
945 
946 /* Read a vnode's attributes from cache or fetch them from the fuse daemon */
947 int
fuse_internal_getattr(struct vnode * vp,struct vattr * vap,struct ucred * cred,struct thread * td)948 fuse_internal_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred,
949 	struct thread *td)
950 {
951 	struct vattr *attrs;
952 
953 	if ((attrs = VTOVA(vp)) != NULL) {
954 		*vap = *attrs;	/* struct copy */
955 		return 0;
956 	}
957 
958 	return fuse_internal_do_getattr(vp, vap, cred, td);
959 }
960 
961 void
fuse_internal_vnode_disappear(struct vnode * vp)962 fuse_internal_vnode_disappear(struct vnode *vp)
963 {
964 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
965 
966 	ASSERT_VOP_ELOCKED(vp, "fuse_internal_vnode_disappear");
967 	fvdat->flag |= FN_REVOKED;
968 	cache_purge(vp);
969 }
970 
971 /* fuse start/stop */
972 
973 SDT_PROBE_DEFINE2(fusefs, , internal, init_done,
974 	"struct fuse_data*", "struct fuse_init_out*");
975 int
fuse_internal_init_callback(struct fuse_ticket * tick,struct uio * uio)976 fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio)
977 {
978 	int err = 0;
979 	struct fuse_data *data = tick->tk_data;
980 	struct fuse_init_out *fiio = NULL;
981 
982 	if ((err = tick->tk_aw_ohead.error)) {
983 		goto out;
984 	}
985 	if ((err = fticket_pull(tick, uio))) {
986 		goto out;
987 	}
988 	fiio = fticket_resp(tick)->base;
989 
990 	data->fuse_libabi_major = fiio->major;
991 	data->fuse_libabi_minor = fiio->minor;
992 	if (!fuse_libabi_geq(data, 7, 4)) {
993 		/*
994 		 * With a little work we could support servers as old as 7.1.
995 		 * But there would be little payoff.
996 		 */
997 		SDT_PROBE2(fusefs, , internal, trace, 1,
998 			"userspace version too low");
999 		err = EPROTONOSUPPORT;
1000 		goto out;
1001 	}
1002 
1003 	if (fuse_libabi_geq(data, 7, 5)) {
1004 		if (fticket_resp(tick)->len == sizeof(struct fuse_init_out) ||
1005 		    fticket_resp(tick)->len == FUSE_COMPAT_22_INIT_OUT_SIZE) {
1006 			data->max_write = fiio->max_write;
1007 			if (fiio->flags & FUSE_ASYNC_READ)
1008 				data->dataflags |= FSESS_ASYNC_READ;
1009 			if (fiio->flags & FUSE_POSIX_LOCKS)
1010 				data->dataflags |= FSESS_POSIX_LOCKS;
1011 			if (fiio->flags & FUSE_EXPORT_SUPPORT)
1012 				data->dataflags |= FSESS_EXPORT_SUPPORT;
1013 			if (fiio->flags & FUSE_NO_OPEN_SUPPORT)
1014 				data->dataflags |= FSESS_NO_OPEN_SUPPORT;
1015 			if (fiio->flags & FUSE_NO_OPENDIR_SUPPORT)
1016 				data->dataflags |= FSESS_NO_OPENDIR_SUPPORT;
1017 			/*
1018 			 * Don't bother to check FUSE_BIG_WRITES, because it's
1019 			 * redundant with max_write
1020 			 */
1021 			/*
1022 			 * max_background and congestion_threshold are not
1023 			 * implemented
1024 			 */
1025 		} else {
1026 			err = EINVAL;
1027 		}
1028 	} else {
1029 		/* Old fixed values */
1030 		data->max_write = 4096;
1031 	}
1032 
1033 	if (fuse_libabi_geq(data, 7, 6))
1034 		data->max_readahead_blocks = fiio->max_readahead / maxbcachebuf;
1035 
1036 	if (!fuse_libabi_geq(data, 7, 7))
1037 		fsess_set_notimpl(data->mp, FUSE_INTERRUPT);
1038 
1039 	if (!fuse_libabi_geq(data, 7, 8)) {
1040 		fsess_set_notimpl(data->mp, FUSE_BMAP);
1041 		fsess_set_notimpl(data->mp, FUSE_DESTROY);
1042 	}
1043 
1044 	if (!fuse_libabi_geq(data, 7, 19)) {
1045 		fsess_set_notimpl(data->mp, FUSE_FALLOCATE);
1046 	}
1047 
1048 	if (fuse_libabi_geq(data, 7, 23) && fiio->time_gran >= 1 &&
1049 	    fiio->time_gran <= 1000000000)
1050 		data->time_gran = fiio->time_gran;
1051 	else
1052 		data->time_gran = 1;
1053 
1054 	if (!fuse_libabi_geq(data, 7, 23))
1055 		data->cache_mode = fuse_data_cache_mode;
1056 	else if (fiio->flags & FUSE_WRITEBACK_CACHE)
1057 		data->cache_mode = FUSE_CACHE_WB;
1058 	else
1059 		data->cache_mode = FUSE_CACHE_WT;
1060 
1061 	if (!fuse_libabi_geq(data, 7, 24))
1062 		fsess_set_notimpl(data->mp, FUSE_LSEEK);
1063 
1064 	if (!fuse_libabi_geq(data, 7, 28))
1065 		fsess_set_notimpl(data->mp, FUSE_COPY_FILE_RANGE);
1066 
1067 out:
1068 	if (err) {
1069 		fdata_set_dead(data);
1070 	}
1071 	FUSE_LOCK();
1072 	data->dataflags |= FSESS_INITED;
1073 	SDT_PROBE2(fusefs, , internal, init_done, data, fiio);
1074 	wakeup(&data->ticketer);
1075 	FUSE_UNLOCK();
1076 
1077 	return 0;
1078 }
1079 
1080 void
fuse_internal_send_init(struct fuse_data * data,struct thread * td)1081 fuse_internal_send_init(struct fuse_data *data, struct thread *td)
1082 {
1083 	struct fuse_init_in *fiii;
1084 	struct fuse_dispatcher fdi;
1085 
1086 	fdisp_init(&fdi, sizeof(*fiii));
1087 	fdisp_make(&fdi, FUSE_INIT, data->mp, 0, td, NULL);
1088 	fiii = fdi.indata;
1089 	fiii->major = FUSE_KERNEL_VERSION;
1090 	fiii->minor = FUSE_KERNEL_MINOR_VERSION;
1091 	/*
1092 	 * fusefs currently reads ahead no more than one cache block at a time.
1093 	 * See fuse_read_biobackend
1094 	 */
1095 	fiii->max_readahead = maxbcachebuf;
1096 	/*
1097 	 * Unsupported features:
1098 	 * FUSE_FILE_OPS: No known FUSE server or client supports it
1099 	 * FUSE_ATOMIC_O_TRUNC: our VFS cannot support it
1100 	 * FUSE_DONT_MASK: unlike Linux, FreeBSD always applies the umask, even
1101 	 *	when default ACLs are in use.
1102 	 * FUSE_SPLICE_WRITE, FUSE_SPLICE_MOVE, FUSE_SPLICE_READ: FreeBSD
1103 	 *	doesn't have splice(2).
1104 	 * FUSE_FLOCK_LOCKS: not yet implemented
1105 	 * FUSE_HAS_IOCTL_DIR: not yet implemented
1106 	 * FUSE_AUTO_INVAL_DATA: not yet implemented
1107 	 * FUSE_DO_READDIRPLUS: not yet implemented
1108 	 * FUSE_READDIRPLUS_AUTO: not yet implemented
1109 	 * FUSE_ASYNC_DIO: not yet implemented
1110 	 * FUSE_PARALLEL_DIROPS: not yet implemented
1111 	 * FUSE_HANDLE_KILLPRIV: not yet implemented
1112 	 * FUSE_POSIX_ACL: not yet implemented
1113 	 * FUSE_ABORT_ERROR: not yet implemented
1114 	 * FUSE_CACHE_SYMLINKS: not yet implemented
1115 	 * FUSE_MAX_PAGES: not yet implemented
1116 	 */
1117 	fiii->flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_EXPORT_SUPPORT
1118 		| FUSE_BIG_WRITES | FUSE_WRITEBACK_CACHE
1119 		| FUSE_NO_OPEN_SUPPORT | FUSE_NO_OPENDIR_SUPPORT;
1120 
1121 	fuse_insert_callback(fdi.tick, fuse_internal_init_callback);
1122 	fuse_insert_message(fdi.tick, false);
1123 	fdisp_destroy(&fdi);
1124 }
1125 
1126 /*
1127  * Send a FUSE_SETATTR operation with no permissions checks.  If cred is NULL,
1128  * send the request with root credentials
1129  */
fuse_internal_setattr(struct vnode * vp,struct vattr * vap,struct thread * td,struct ucred * cred)1130 int fuse_internal_setattr(struct vnode *vp, struct vattr *vap,
1131 	struct thread *td, struct ucred *cred)
1132 {
1133 	struct fuse_vnode_data *fvdat;
1134 	struct fuse_dispatcher fdi;
1135 	struct fuse_setattr_in *fsai;
1136 	struct mount *mp;
1137 	pid_t pid = td->td_proc->p_pid;
1138 	struct fuse_data *data;
1139 	int err = 0;
1140 	__enum_uint8(vtype) vtyp;
1141 
1142 	ASSERT_VOP_ELOCKED(vp, __func__);
1143 
1144 	mp = vnode_mount(vp);
1145 	fvdat = VTOFUD(vp);
1146 	data = fuse_get_mpdata(mp);
1147 
1148 	fdisp_init(&fdi, sizeof(*fsai));
1149 	fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred);
1150 	if (!cred) {
1151 		fdi.finh->uid = 0;
1152 		fdi.finh->gid = 0;
1153 	}
1154 	fsai = fdi.indata;
1155 	fsai->valid = 0;
1156 
1157 	if (vap->va_uid != (uid_t)VNOVAL) {
1158 		fsai->uid = vap->va_uid;
1159 		fsai->valid |= FATTR_UID;
1160 	}
1161 	if (vap->va_gid != (gid_t)VNOVAL) {
1162 		fsai->gid = vap->va_gid;
1163 		fsai->valid |= FATTR_GID;
1164 	}
1165 	if (vap->va_size != VNOVAL) {
1166 		struct fuse_filehandle *fufh = NULL;
1167 
1168 		/*Truncate to a new value. */
1169 		fsai->size = vap->va_size;
1170 		fsai->valid |= FATTR_SIZE;
1171 
1172 		fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid);
1173 		if (fufh) {
1174 			fsai->fh = fufh->fh_id;
1175 			fsai->valid |= FATTR_FH;
1176 		}
1177 		VTOFUD(vp)->flag &= ~FN_SIZECHANGE;
1178 	}
1179 	if (vap->va_atime.tv_sec != VNOVAL) {
1180 		fsai->atime = vap->va_atime.tv_sec;
1181 		fsai->atimensec = vap->va_atime.tv_nsec;
1182 		fsai->valid |= FATTR_ATIME;
1183 		if (vap->va_vaflags & VA_UTIMES_NULL)
1184 			fsai->valid |= FATTR_ATIME_NOW;
1185 	} else if (fvdat->flag & FN_ATIMECHANGE) {
1186 		fsai->atime = fvdat->cached_attrs.va_atime.tv_sec;
1187 		fsai->atimensec = fvdat->cached_attrs.va_atime.tv_nsec;
1188 		fsai->valid |= FATTR_ATIME;
1189 	}
1190 	if (vap->va_mtime.tv_sec != VNOVAL) {
1191 		fsai->mtime = vap->va_mtime.tv_sec;
1192 		fsai->mtimensec = vap->va_mtime.tv_nsec;
1193 		fsai->valid |= FATTR_MTIME;
1194 		if (vap->va_vaflags & VA_UTIMES_NULL)
1195 			fsai->valid |= FATTR_MTIME_NOW;
1196 	} else if (fvdat->flag & FN_MTIMECHANGE) {
1197 		fsai->mtime = fvdat->cached_attrs.va_mtime.tv_sec;
1198 		fsai->mtimensec = fvdat->cached_attrs.va_mtime.tv_nsec;
1199 		fsai->valid |= FATTR_MTIME;
1200 	}
1201 	if (fuse_libabi_geq(data, 7, 23) && fvdat->flag & FN_CTIMECHANGE) {
1202 		fsai->ctime = fvdat->cached_attrs.va_ctime.tv_sec;
1203 		fsai->ctimensec = fvdat->cached_attrs.va_ctime.tv_nsec;
1204 		fsai->valid |= FATTR_CTIME;
1205 	}
1206 	if (vap->va_mode != (mode_t)VNOVAL) {
1207 		fsai->mode = vap->va_mode & ALLPERMS;
1208 		fsai->valid |= FATTR_MODE;
1209 	}
1210 	if (!fsai->valid) {
1211 		goto out;
1212 	}
1213 
1214 	if ((err = fdisp_wait_answ(&fdi)))
1215 		goto out;
1216 	vtyp = IFTOVT(((struct fuse_attr_out *)fdi.answ)->attr.mode);
1217 
1218 	if (vnode_vtype(vp) != vtyp) {
1219 		if (vnode_vtype(vp) == VNON && vtyp != VNON) {
1220 			SDT_PROBE2(fusefs, , internal, trace, 1, "FUSE: Dang! "
1221 				"vnode_vtype is VNON and vtype isn't.");
1222 		} else {
1223 			/*
1224 	                 * STALE vnode, ditch
1225 	                 *
1226 			 * The vnode has changed its type "behind our back".
1227 			 * This probably means that the file got deleted and
1228 			 * recreated on the server, with the same inode.
1229 			 * There's nothing really we can do, so let us just
1230 			 * return ENOENT.  After all, the entry must not have
1231 			 * existed in the recent past.  If the user tries
1232 			 * again, it will work.
1233 	                 */
1234 			fuse_internal_vnode_disappear(vp);
1235 			err = ENOENT;
1236 		}
1237 	}
1238 	if (err == 0) {
1239 		struct fuse_attr_out *fao = (struct fuse_attr_out*)fdi.answ;
1240 		fuse_vnode_undirty_cached_timestamps(vp, true);
1241 		fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
1242 			fao->attr_valid_nsec, NULL, false);
1243 		getnanouptime(&fvdat->last_local_modify);
1244 	}
1245 
1246 out:
1247 	fdisp_destroy(&fdi);
1248 	return err;
1249 }
1250 
1251 /*
1252  * FreeBSD clears the SUID and SGID bits on any write by a non-root user.
1253  */
1254 void
fuse_internal_clear_suid_on_write(struct vnode * vp,struct ucred * cred,struct thread * td)1255 fuse_internal_clear_suid_on_write(struct vnode *vp, struct ucred *cred,
1256 	struct thread *td)
1257 {
1258 	struct fuse_data *data;
1259 	struct mount *mp;
1260 	struct vattr va;
1261 	int dataflags;
1262 
1263 	mp = vnode_mount(vp);
1264 	data = fuse_get_mpdata(mp);
1265 	dataflags = data->dataflags;
1266 
1267 	ASSERT_VOP_LOCKED(vp, __func__);
1268 
1269 	if (dataflags & FSESS_DEFAULT_PERMISSIONS) {
1270 		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) {
1271 			fuse_internal_getattr(vp, &va, cred, td);
1272 			if (va.va_mode & (S_ISUID | S_ISGID)) {
1273 				mode_t mode = va.va_mode & ~(S_ISUID | S_ISGID);
1274 				/* Clear all vattr fields except mode */
1275 				vattr_null(&va);
1276 				va.va_mode = mode;
1277 
1278 				/*
1279 				 * Ignore fuse_internal_setattr's return value,
1280 				 * because at this point the write operation has
1281 				 * already succeeded and we don't want to return
1282 				 * failing status for that.
1283 				 */
1284 				(void)fuse_internal_setattr(vp, &va, td, NULL);
1285 			}
1286 		}
1287 	}
1288 }
1289 
1290 #ifdef ZERO_PAD_INCOMPLETE_BUFS
1291 static int
isbzero(void * buf,size_t len)1292 isbzero(void *buf, size_t len)
1293 {
1294 	int i;
1295 
1296 	for (i = 0; i < len; i++) {
1297 		if (((char *)buf)[i])
1298 			return (0);
1299 	}
1300 
1301 	return (1);
1302 }
1303 
1304 #endif
1305 
1306 void
fuse_internal_init(void)1307 fuse_internal_init(void)
1308 {
1309 	fuse_lookup_cache_misses = counter_u64_alloc(M_WAITOK);
1310 	fuse_lookup_cache_hits = counter_u64_alloc(M_WAITOK);
1311 }
1312 
1313 void
fuse_internal_destroy(void)1314 fuse_internal_destroy(void)
1315 {
1316 	counter_u64_free(fuse_lookup_cache_hits);
1317 	counter_u64_free(fuse_lookup_cache_misses);
1318 }
1319