1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007-2009 Google Inc. and Amit Singh 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Google Inc. nor the names of its 18 * contributors may be used to endorse or promote products derived from 19 * this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Copyright (C) 2005 Csaba Henk. 34 * All rights reserved. 35 * 36 * Copyright (c) 2019 The FreeBSD Foundation 37 * 38 * Portions of this software were developed by BFF Storage Systems, LLC under 39 * sponsorship from the FreeBSD Foundation. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 50 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 */ 62 63 #include <sys/types.h> 64 #include <sys/systm.h> 65 #include <sys/counter.h> 66 #include <sys/module.h> 67 #include <sys/errno.h> 68 #include <sys/param.h> 69 #include <sys/kernel.h> 70 #include <sys/conf.h> 71 #include <sys/uio.h> 72 #include <sys/malloc.h> 73 #include <sys/queue.h> 74 #include <sys/lock.h> 75 #include <sys/sx.h> 76 #include <sys/mutex.h> 77 #include <sys/proc.h> 78 #include <sys/vnode.h> 79 #include <sys/namei.h> 80 #include <sys/mount.h> 81 #include <sys/sysctl.h> 82 #include <sys/fcntl.h> 83 #include <sys/priv.h> 84 #include <sys/buf.h> 85 #include <security/mac/mac_framework.h> 86 #include <vm/vm.h> 87 #include <vm/vm_extern.h> 88 89 #include "fuse.h" 90 #include "fuse_node.h" 91 #include "fuse_internal.h" 92 #include "fuse_io.h" 93 #include "fuse_ipc.h" 94 95 SDT_PROVIDER_DECLARE(fusefs); 96 /* 97 * Fuse trace probe: 98 * arg0: verbosity. Higher numbers give more verbose messages 99 * arg1: Textual message 100 */ 101 SDT_PROBE_DEFINE2(fusefs, , node, trace, "int", "char*"); 102 103 MALLOC_DEFINE(M_FUSEVN, "fuse_vnode", "fuse vnode private data"); 104 105 static int sysctl_fuse_cache_mode(SYSCTL_HANDLER_ARGS); 106 107 static counter_u64_t fuse_node_count; 108 109 SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, node_count, CTLFLAG_RD, 110 &fuse_node_count, "Count of FUSE vnodes"); 111 112 int fuse_data_cache_mode = FUSE_CACHE_WT; 113 114 /* 115 * DEPRECATED 116 * This sysctl is no longer needed as of fuse protocol 7.23. Individual 117 * servers can select the cache behavior they need for each mountpoint: 118 * - writethrough: the default 119 * - writeback: set FUSE_WRITEBACK_CACHE in fuse_init_out.flags 120 * - uncached: set FOPEN_DIRECT_IO for every file 121 * The sysctl is retained primarily for use by jails supporting older FUSE 122 * protocols. It may be removed entirely once FreeBSD 11.3 and 12.0 are EOL. 123 */ 124 SYSCTL_PROC(_vfs_fusefs, OID_AUTO, data_cache_mode, 125 CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, 126 &fuse_data_cache_mode, 0, sysctl_fuse_cache_mode, "I", 127 "Zero: disable caching of FUSE file data; One: write-through caching " 128 "(default); Two: write-back caching (generally unsafe)"); 129 130 static int 131 sysctl_fuse_cache_mode(SYSCTL_HANDLER_ARGS) 132 { 133 int val, error; 134 135 val = *(int *)arg1; 136 error = sysctl_handle_int(oidp, &val, 0, req); 137 if (error || !req->newptr) 138 return (error); 139 140 switch (val) { 141 case FUSE_CACHE_UC: 142 case FUSE_CACHE_WT: 143 case FUSE_CACHE_WB: 144 *(int *)arg1 = val; 145 break; 146 default: 147 return (EDOM); 148 } 149 return (0); 150 } 151 152 static void 153 fuse_vnode_init(struct vnode *vp, struct fuse_vnode_data *fvdat, 154 uint64_t nodeid, __enum_uint8(vtype) vtyp) 155 { 156 fvdat->nid = nodeid; 157 LIST_INIT(&fvdat->handles); 158 159 vattr_null(&fvdat->cached_attrs); 160 fvdat->cached_attrs.va_birthtime.tv_sec = -1; 161 fvdat->cached_attrs.va_birthtime.tv_nsec = 0; 162 fvdat->cached_attrs.va_fsid = VNOVAL; 163 fvdat->cached_attrs.va_gen = 0; 164 fvdat->cached_attrs.va_rdev = NODEV; 165 166 if (nodeid == FUSE_ROOT_ID) { 167 vp->v_vflag |= VV_ROOT; 168 } 169 vp->v_type = vtyp; 170 vp->v_data = fvdat; 171 cluster_init_vn(&fvdat->clusterw); 172 timespecclear(&fvdat->last_local_modify); 173 174 counter_u64_add(fuse_node_count, 1); 175 } 176 177 void 178 fuse_vnode_destroy(struct vnode *vp) 179 { 180 struct fuse_vnode_data *fvdat = vp->v_data; 181 182 vp->v_data = NULL; 183 KASSERT(LIST_EMPTY(&fvdat->handles), 184 ("Destroying fuse vnode with open files!")); 185 free(fvdat, M_FUSEVN); 186 187 counter_u64_add(fuse_node_count, -1); 188 } 189 190 int 191 fuse_vnode_cmp(struct vnode *vp, void *nidp) 192 { 193 return (VTOI(vp) != *((uint64_t *)nidp)); 194 } 195 196 SDT_PROBE_DEFINE3(fusefs, , node, stale_vnode, "struct vnode*", "uint8_t", 197 "uint64_t"); 198 static int 199 fuse_vnode_alloc(struct mount *mp, 200 struct thread *td, 201 uint64_t nodeid, 202 __enum_uint8(vtype) vtyp, 203 struct vnode **vpp) 204 { 205 struct fuse_data *data; 206 struct fuse_vnode_data *fvdat; 207 struct vnode *vp2; 208 int err = 0; 209 210 data = fuse_get_mpdata(mp); 211 if (vtyp == VNON) { 212 return EINVAL; 213 } 214 *vpp = NULL; 215 err = vfs_hash_get(mp, fuse_vnode_hash(nodeid), LK_EXCLUSIVE, td, vpp, 216 fuse_vnode_cmp, &nodeid); 217 if (err) 218 return (err); 219 220 if (*vpp) { 221 if ((*vpp)->v_type == vtyp) { 222 /* Reuse a vnode that hasn't yet been reclaimed */ 223 MPASS((*vpp)->v_data != NULL); 224 MPASS(VTOFUD(*vpp)->nid == nodeid); 225 SDT_PROBE2(fusefs, , node, trace, 1, 226 "vnode taken from hash"); 227 return (0); 228 } else { 229 /* 230 * The inode changed types! If we get here, we can't 231 * tell whether the inode's entry cache had expired 232 * yet. So this could be the result of a buggy server, 233 * but more likely the server just reused an inode 234 * number following an entry cache expiration. 235 */ 236 SDT_PROBE3(fusefs, , node, stale_vnode, *vpp, vtyp, 237 nodeid); 238 fuse_internal_vnode_disappear(*vpp); 239 vgone(*vpp); 240 lockmgr((*vpp)->v_vnlock, LK_RELEASE, NULL); 241 } 242 } 243 fvdat = malloc(sizeof(*fvdat), M_FUSEVN, M_WAITOK | M_ZERO); 244 switch (vtyp) { 245 case VFIFO: 246 err = getnewvnode("fuse", mp, &fuse_fifoops, vpp); 247 break; 248 default: 249 err = getnewvnode("fuse", mp, &fuse_vnops, vpp); 250 break; 251 } 252 if (err) { 253 free(fvdat, M_FUSEVN); 254 return (err); 255 } 256 lockmgr((*vpp)->v_vnlock, LK_EXCLUSIVE, NULL); 257 fuse_vnode_init(*vpp, fvdat, nodeid, vtyp); 258 err = insmntque(*vpp, mp); 259 ASSERT_VOP_ELOCKED(*vpp, "fuse_vnode_alloc"); 260 if (err) { 261 lockmgr((*vpp)->v_vnlock, LK_RELEASE, NULL); 262 free(fvdat, M_FUSEVN); 263 *vpp = NULL; 264 return (err); 265 } 266 /* Disallow async reads for fifos because UFS does. I don't know why */ 267 if (data->dataflags & FSESS_ASYNC_READ && vtyp != VFIFO) 268 VN_LOCK_ASHARE(*vpp); 269 270 vn_set_state(*vpp, VSTATE_CONSTRUCTED); 271 err = vfs_hash_insert(*vpp, fuse_vnode_hash(nodeid), LK_EXCLUSIVE, 272 td, &vp2, fuse_vnode_cmp, &nodeid); 273 if (err) { 274 lockmgr((*vpp)->v_vnlock, LK_RELEASE, NULL); 275 free(fvdat, M_FUSEVN); 276 *vpp = NULL; 277 return (err); 278 } 279 if (vp2 != NULL) { 280 *vpp = vp2; 281 return (0); 282 } 283 284 ASSERT_VOP_ELOCKED(*vpp, "fuse_vnode_alloc"); 285 286 return (0); 287 } 288 289 int 290 fuse_vnode_get(struct mount *mp, 291 struct fuse_entry_out *feo, 292 uint64_t nodeid, 293 struct vnode *dvp, 294 struct vnode **vpp, 295 struct componentname *cnp, 296 __enum_uint8(vtype) vtyp) 297 { 298 struct thread *td = curthread; 299 /* 300 * feo should only be NULL for the root directory, which (when libfuse 301 * is used) always has generation 0 302 */ 303 uint64_t generation = feo ? feo->generation : 0; 304 int err = 0; 305 306 if (dvp != NULL && VTOFUD(dvp)->nid == nodeid) { 307 fuse_warn(fuse_get_mpdata(mp), FSESS_WARN_ILLEGAL_INODE, 308 "Assigned same inode to both parent and child."); 309 return EIO; 310 } 311 312 err = fuse_vnode_alloc(mp, td, nodeid, vtyp, vpp); 313 if (err) { 314 return err; 315 } 316 if (dvp != NULL) { 317 MPASS(cnp && (cnp->cn_flags & ISDOTDOT) == 0); 318 MPASS(cnp && 319 !(cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.')); 320 fuse_vnode_setparent(*vpp, dvp); 321 } 322 if (dvp != NULL && cnp != NULL && (cnp->cn_flags & MAKEENTRY) != 0 && 323 feo != NULL && 324 (feo->entry_valid != 0 || feo->entry_valid_nsec != 0)) { 325 struct timespec timeout; 326 327 ASSERT_VOP_LOCKED(*vpp, "fuse_vnode_get"); 328 ASSERT_VOP_LOCKED(dvp, "fuse_vnode_get"); 329 330 fuse_validity_2_timespec(feo, &timeout); 331 cache_enter_time(dvp, *vpp, cnp, &timeout, NULL); 332 } 333 334 VTOFUD(*vpp)->generation = generation; 335 /* 336 * In userland, libfuse uses cached lookups for dot and dotdot entries, 337 * thus it does not really bump the nlookup counter for forget. 338 * Follow the same semantic and avoid the bump in order to keep 339 * nlookup counters consistent. 340 */ 341 if (cnp == NULL || ((cnp->cn_flags & ISDOTDOT) == 0 && 342 (cnp->cn_namelen != 1 || cnp->cn_nameptr[0] != '.'))) 343 VTOFUD(*vpp)->nlookup++; 344 345 return 0; 346 } 347 348 /* 349 * Called for every fusefs vnode open to initialize the vnode (not 350 * fuse_filehandle) for use 351 */ 352 void 353 fuse_vnode_open(struct vnode *vp, int32_t fuse_open_flags, struct thread *td) 354 { 355 if (vnode_vtype(vp) == VREG) 356 vnode_create_vobject(vp, 0, td); 357 } 358 359 int 360 fuse_vnode_savesize(struct vnode *vp, struct ucred *cred, pid_t pid) 361 { 362 struct fuse_vnode_data *fvdat = VTOFUD(vp); 363 struct thread *td = curthread; 364 struct fuse_filehandle *fufh = NULL; 365 struct fuse_dispatcher fdi; 366 struct fuse_setattr_in *fsai; 367 int err = 0; 368 369 ASSERT_VOP_ELOCKED(vp, "fuse_io_extend"); 370 371 if (fuse_isdeadfs(vp)) { 372 return EBADF; 373 } 374 if (vnode_vtype(vp) == VDIR) { 375 return EISDIR; 376 } 377 if (vfs_isrdonly(vnode_mount(vp))) { 378 return EROFS; 379 } 380 if (cred == NULL) { 381 cred = td->td_ucred; 382 } 383 fdisp_init(&fdi, sizeof(*fsai)); 384 fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred); 385 fsai = fdi.indata; 386 fsai->valid = 0; 387 388 /* Truncate to a new value. */ 389 MPASS((fvdat->flag & FN_SIZECHANGE) != 0); 390 fsai->size = fvdat->cached_attrs.va_size; 391 fsai->valid |= FATTR_SIZE; 392 393 fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid); 394 if (fufh) { 395 fsai->fh = fufh->fh_id; 396 fsai->valid |= FATTR_FH; 397 } 398 err = fdisp_wait_answ(&fdi); 399 fdisp_destroy(&fdi); 400 if (err == 0) { 401 getnanouptime(&fvdat->last_local_modify); 402 fvdat->flag &= ~FN_SIZECHANGE; 403 } 404 405 return err; 406 } 407 408 /* 409 * Adjust the vnode's size to a new value. 410 * 411 * If the new value came from the server, such as from a FUSE_GETATTR 412 * operation, set `from_server` true. But if it came from a local operation, 413 * such as write(2) or truncate(2), set `from_server` false. 414 */ 415 int 416 fuse_vnode_setsize(struct vnode *vp, off_t newsize, bool from_server) 417 { 418 struct fuse_vnode_data *fvdat = VTOFUD(vp); 419 struct vattr *attrs; 420 off_t oldsize; 421 size_t iosize; 422 struct buf *bp = NULL; 423 int err = 0; 424 425 ASSERT_VOP_ELOCKED(vp, "fuse_vnode_setsize"); 426 427 iosize = fuse_iosize(vp); 428 oldsize = fvdat->cached_attrs.va_size; 429 fvdat->cached_attrs.va_size = newsize; 430 if ((attrs = VTOVA(vp)) != NULL) 431 attrs->va_size = newsize; 432 433 if (newsize < oldsize) { 434 daddr_t lbn; 435 436 err = vtruncbuf(vp, newsize, fuse_iosize(vp)); 437 if (err) 438 goto out; 439 if (newsize % iosize == 0) 440 goto out; 441 /* 442 * Zero the contents of the last partial block. 443 * Sure seems like vtruncbuf should do this for us. 444 */ 445 446 lbn = newsize / iosize; 447 bp = getblk(vp, lbn, iosize, PCATCH, 0, 0); 448 if (!bp) { 449 err = EINTR; 450 goto out; 451 } 452 if (!(bp->b_flags & B_CACHE)) 453 goto out; /* Nothing to do */ 454 MPASS(bp->b_flags & B_VMIO); 455 vfs_bio_clrbuf(bp); 456 bp->b_dirtyend = MIN(bp->b_dirtyend, newsize - lbn * iosize); 457 } else if (from_server && newsize > oldsize && oldsize != VNOVAL) { 458 /* 459 * The FUSE server changed the file size behind our back. We 460 * should invalidate the entire cache. 461 */ 462 daddr_t end_lbn; 463 464 end_lbn = howmany(newsize, iosize); 465 v_inval_buf_range(vp, 0, end_lbn, iosize); 466 } 467 out: 468 if (bp) 469 brelse(bp); 470 vnode_pager_setsize(vp, newsize); 471 return err; 472 } 473 474 /* Get the current, possibly dirty, size of the file */ 475 int 476 fuse_vnode_size(struct vnode *vp, off_t *filesize, struct ucred *cred, 477 struct thread *td) 478 { 479 struct fuse_vnode_data *fvdat = VTOFUD(vp); 480 int error = 0; 481 482 if (!(fvdat->flag & FN_SIZECHANGE) && 483 (!fuse_vnode_attr_cache_valid(vp) || 484 fvdat->cached_attrs.va_size == VNOVAL)) 485 error = fuse_internal_do_getattr(vp, NULL, cred, td); 486 487 if (!error) 488 *filesize = fvdat->cached_attrs.va_size; 489 490 return error; 491 } 492 493 void 494 fuse_vnode_undirty_cached_timestamps(struct vnode *vp, bool atime) 495 { 496 struct fuse_vnode_data *fvdat = VTOFUD(vp); 497 498 fvdat->flag &= ~(FN_MTIMECHANGE | FN_CTIMECHANGE); 499 if (atime) 500 fvdat->flag &= ~FN_ATIMECHANGE; 501 } 502 503 /* Update a fuse file's cached timestamps */ 504 void 505 fuse_vnode_update(struct vnode *vp, int flags) 506 { 507 struct fuse_vnode_data *fvdat = VTOFUD(vp); 508 struct mount *mp = vnode_mount(vp); 509 struct fuse_data *data = fuse_get_mpdata(mp); 510 struct timespec ts; 511 512 vfs_timestamp(&ts); 513 514 if (data->time_gran > 1) 515 ts.tv_nsec = rounddown(ts.tv_nsec, data->time_gran); 516 517 if (mp->mnt_flag & MNT_NOATIME) 518 flags &= ~FN_ATIMECHANGE; 519 520 if (flags & FN_ATIMECHANGE) 521 fvdat->cached_attrs.va_atime = ts; 522 if (flags & FN_MTIMECHANGE) 523 fvdat->cached_attrs.va_mtime = ts; 524 if (flags & FN_CTIMECHANGE) 525 fvdat->cached_attrs.va_ctime = ts; 526 527 fvdat->flag |= flags; 528 } 529 530 void 531 fuse_node_init(void) 532 { 533 fuse_node_count = counter_u64_alloc(M_WAITOK); 534 } 535 536 void 537 fuse_node_destroy(void) 538 { 539 counter_u64_free(fuse_node_count); 540 } 541