1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 25 * Copyright (c) 2014 Integros [integros.com] 26 * Copyright 2017 Nexenta Systems, Inc. 27 */ 28 29 /* Portions Copyright 2007 Jeremy Teo */ 30 /* Portions Copyright 2010 Robert Milkowski */ 31 32 33 #include <sys/types.h> 34 #include <sys/param.h> 35 #include <sys/time.h> 36 #include <sys/systm.h> 37 #include <sys/sysmacros.h> 38 #include <sys/resource.h> 39 #include <sys/vfs.h> 40 #include <sys/endian.h> 41 #include <sys/vm.h> 42 #include <sys/vnode.h> 43 #if __FreeBSD_version >= 1300102 44 #include <sys/smr.h> 45 #endif 46 #include <sys/dirent.h> 47 #include <sys/file.h> 48 #include <sys/stat.h> 49 #include <sys/kmem.h> 50 #include <sys/taskq.h> 51 #include <sys/uio.h> 52 #include <sys/atomic.h> 53 #include <sys/namei.h> 54 #include <sys/mman.h> 55 #include <sys/cmn_err.h> 56 #include <sys/kdb.h> 57 #include <sys/sysproto.h> 58 #include <sys/errno.h> 59 #include <sys/unistd.h> 60 #include <sys/zfs_dir.h> 61 #include <sys/zfs_ioctl.h> 62 #include <sys/fs/zfs.h> 63 #include <sys/dmu.h> 64 #include <sys/dmu_objset.h> 65 #include <sys/spa.h> 66 #include <sys/txg.h> 67 #include <sys/dbuf.h> 68 #include <sys/zap.h> 69 #include <sys/sa.h> 70 #include <sys/policy.h> 71 #include <sys/sunddi.h> 72 #include <sys/filio.h> 73 #include <sys/sid.h> 74 #include <sys/zfs_ctldir.h> 75 #include <sys/zfs_fuid.h> 76 #include <sys/zfs_quota.h> 77 #include <sys/zfs_sa.h> 78 #include <sys/zfs_rlock.h> 79 #include <sys/extdirent.h> 80 #include <sys/bio.h> 81 #include <sys/buf.h> 82 #include <sys/sched.h> 83 #include <sys/acl.h> 84 #include <sys/vmmeter.h> 85 #include <vm/vm_param.h> 86 #include <sys/zil.h> 87 #include <sys/zfs_vnops.h> 88 89 #include <vm/vm_object.h> 90 91 #include <sys/extattr.h> 92 #include <sys/priv.h> 93 94 #ifndef VN_OPEN_INVFS 95 #define VN_OPEN_INVFS 0x0 96 #endif 97 98 VFS_SMR_DECLARE; 99 100 #if __FreeBSD_version >= 1300047 101 #define vm_page_wire_lock(pp) 102 #define vm_page_wire_unlock(pp) 103 #else 104 #define vm_page_wire_lock(pp) vm_page_lock(pp) 105 #define vm_page_wire_unlock(pp) vm_page_unlock(pp) 106 #endif 107 108 #ifdef DEBUG_VFS_LOCKS 109 #define VNCHECKREF(vp) \ 110 VNASSERT((vp)->v_holdcnt > 0 && (vp)->v_usecount > 0, vp, \ 111 ("%s: wrong ref counts", __func__)); 112 #else 113 #define VNCHECKREF(vp) 114 #endif 115 116 #if __FreeBSD_version >= 1400045 117 typedef uint64_t cookie_t; 118 #else 119 typedef ulong_t cookie_t; 120 #endif 121 122 /* 123 * Programming rules. 124 * 125 * Each vnode op performs some logical unit of work. To do this, the ZPL must 126 * properly lock its in-core state, create a DMU transaction, do the work, 127 * record this work in the intent log (ZIL), commit the DMU transaction, 128 * and wait for the intent log to commit if it is a synchronous operation. 129 * Moreover, the vnode ops must work in both normal and log replay context. 130 * The ordering of events is important to avoid deadlocks and references 131 * to freed memory. The example below illustrates the following Big Rules: 132 * 133 * (1) A check must be made in each zfs thread for a mounted file system. 134 * This is done avoiding races using ZFS_ENTER(zfsvfs). 135 * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 136 * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 137 * can return EIO from the calling function. 138 * 139 * (2) VN_RELE() should always be the last thing except for zil_commit() 140 * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 141 * First, if it's the last reference, the vnode/znode 142 * can be freed, so the zp may point to freed memory. Second, the last 143 * reference will call zfs_zinactive(), which may induce a lot of work -- 144 * pushing cached pages (which acquires range locks) and syncing out 145 * cached atime changes. Third, zfs_zinactive() may require a new tx, 146 * which could deadlock the system if you were already holding one. 147 * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 148 * 149 * (3) All range locks must be grabbed before calling dmu_tx_assign(), 150 * as they can span dmu_tx_assign() calls. 151 * 152 * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to 153 * dmu_tx_assign(). This is critical because we don't want to block 154 * while holding locks. 155 * 156 * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This 157 * reduces lock contention and CPU usage when we must wait (note that if 158 * throughput is constrained by the storage, nearly every transaction 159 * must wait). 160 * 161 * Note, in particular, that if a lock is sometimes acquired before 162 * the tx assigns, and sometimes after (e.g. z_lock), then failing 163 * to use a non-blocking assign can deadlock the system. The scenario: 164 * 165 * Thread A has grabbed a lock before calling dmu_tx_assign(). 166 * Thread B is in an already-assigned tx, and blocks for this lock. 167 * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 168 * forever, because the previous txg can't quiesce until B's tx commits. 169 * 170 * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 171 * then drop all locks, call dmu_tx_wait(), and try again. On subsequent 172 * calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT, 173 * to indicate that this operation has already called dmu_tx_wait(). 174 * This will ensure that we don't retry forever, waiting a short bit 175 * each time. 176 * 177 * (5) If the operation succeeded, generate the intent log entry for it 178 * before dropping locks. This ensures that the ordering of events 179 * in the intent log matches the order in which they actually occurred. 180 * During ZIL replay the zfs_log_* functions will update the sequence 181 * number to indicate the zil transaction has replayed. 182 * 183 * (6) At the end of each vnode op, the DMU tx must always commit, 184 * regardless of whether there were any errors. 185 * 186 * (7) After dropping all locks, invoke zil_commit(zilog, foid) 187 * to ensure that synchronous semantics are provided when necessary. 188 * 189 * In general, this is how things should be ordered in each vnode op: 190 * 191 * ZFS_ENTER(zfsvfs); // exit if unmounted 192 * top: 193 * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD()) 194 * rw_enter(...); // grab any other locks you need 195 * tx = dmu_tx_create(...); // get DMU tx 196 * dmu_tx_hold_*(); // hold each object you might modify 197 * error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); 198 * if (error) { 199 * rw_exit(...); // drop locks 200 * zfs_dirent_unlock(dl); // unlock directory entry 201 * VN_RELE(...); // release held vnodes 202 * if (error == ERESTART) { 203 * waited = B_TRUE; 204 * dmu_tx_wait(tx); 205 * dmu_tx_abort(tx); 206 * goto top; 207 * } 208 * dmu_tx_abort(tx); // abort DMU tx 209 * ZFS_EXIT(zfsvfs); // finished in zfs 210 * return (error); // really out of space 211 * } 212 * error = do_real_work(); // do whatever this VOP does 213 * if (error == 0) 214 * zfs_log_*(...); // on success, make ZIL entry 215 * dmu_tx_commit(tx); // commit DMU tx -- error or not 216 * rw_exit(...); // drop locks 217 * zfs_dirent_unlock(dl); // unlock directory entry 218 * VN_RELE(...); // release held vnodes 219 * zil_commit(zilog, foid); // synchronous when necessary 220 * ZFS_EXIT(zfsvfs); // finished in zfs 221 * return (error); // done, report error 222 */ 223 static int 224 zfs_open(vnode_t **vpp, int flag, cred_t *cr) 225 { 226 (void) cr; 227 znode_t *zp = VTOZ(*vpp); 228 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 229 230 ZFS_ENTER(zfsvfs); 231 ZFS_VERIFY_ZP(zp); 232 233 if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 234 ((flag & FAPPEND) == 0)) { 235 ZFS_EXIT(zfsvfs); 236 return (SET_ERROR(EPERM)); 237 } 238 239 /* Keep a count of the synchronous opens in the znode */ 240 if (flag & (FSYNC | FDSYNC)) 241 atomic_inc_32(&zp->z_sync_cnt); 242 243 ZFS_EXIT(zfsvfs); 244 return (0); 245 } 246 247 static int 248 zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr) 249 { 250 (void) offset, (void) cr; 251 znode_t *zp = VTOZ(vp); 252 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 253 254 ZFS_ENTER(zfsvfs); 255 ZFS_VERIFY_ZP(zp); 256 257 /* Decrement the synchronous opens in the znode */ 258 if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 259 atomic_dec_32(&zp->z_sync_cnt); 260 261 ZFS_EXIT(zfsvfs); 262 return (0); 263 } 264 265 static int 266 zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag, cred_t *cred, 267 int *rvalp) 268 { 269 (void) flag, (void) cred, (void) rvalp; 270 loff_t off; 271 int error; 272 273 switch (com) { 274 case _FIOFFS: 275 { 276 return (0); 277 278 /* 279 * The following two ioctls are used by bfu. Faking out, 280 * necessary to avoid bfu errors. 281 */ 282 } 283 case _FIOGDIO: 284 case _FIOSDIO: 285 { 286 return (0); 287 } 288 289 case F_SEEK_DATA: 290 case F_SEEK_HOLE: 291 { 292 off = *(offset_t *)data; 293 /* offset parameter is in/out */ 294 error = zfs_holey(VTOZ(vp), com, &off); 295 if (error) 296 return (error); 297 *(offset_t *)data = off; 298 return (0); 299 } 300 } 301 return (SET_ERROR(ENOTTY)); 302 } 303 304 static vm_page_t 305 page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 306 { 307 vm_object_t obj; 308 vm_page_t pp; 309 int64_t end; 310 311 /* 312 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE 313 * aligned boundaries, if the range is not aligned. As a result a 314 * DEV_BSIZE subrange with partially dirty data may get marked as clean. 315 * It may happen that all DEV_BSIZE subranges are marked clean and thus 316 * the whole page would be considered clean despite have some 317 * dirty data. 318 * For this reason we should shrink the range to DEV_BSIZE aligned 319 * boundaries before calling vm_page_clear_dirty. 320 */ 321 end = rounddown2(off + nbytes, DEV_BSIZE); 322 off = roundup2(off, DEV_BSIZE); 323 nbytes = end - off; 324 325 obj = vp->v_object; 326 zfs_vmobject_assert_wlocked_12(obj); 327 #if __FreeBSD_version < 1300050 328 for (;;) { 329 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 330 pp->valid) { 331 if (vm_page_xbusied(pp)) { 332 /* 333 * Reference the page before unlocking and 334 * sleeping so that the page daemon is less 335 * likely to reclaim it. 336 */ 337 vm_page_reference(pp); 338 vm_page_lock(pp); 339 zfs_vmobject_wunlock(obj); 340 vm_page_busy_sleep(pp, "zfsmwb", true); 341 zfs_vmobject_wlock(obj); 342 continue; 343 } 344 vm_page_sbusy(pp); 345 } else if (pp != NULL) { 346 ASSERT(!pp->valid); 347 pp = NULL; 348 } 349 if (pp != NULL) { 350 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 351 vm_object_pip_add(obj, 1); 352 pmap_remove_write(pp); 353 if (nbytes != 0) 354 vm_page_clear_dirty(pp, off, nbytes); 355 } 356 break; 357 } 358 #else 359 vm_page_grab_valid_unlocked(&pp, obj, OFF_TO_IDX(start), 360 VM_ALLOC_NOCREAT | VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | 361 VM_ALLOC_IGN_SBUSY); 362 if (pp != NULL) { 363 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 364 vm_object_pip_add(obj, 1); 365 pmap_remove_write(pp); 366 if (nbytes != 0) 367 vm_page_clear_dirty(pp, off, nbytes); 368 } 369 #endif 370 return (pp); 371 } 372 373 static void 374 page_unbusy(vm_page_t pp) 375 { 376 377 vm_page_sunbusy(pp); 378 #if __FreeBSD_version >= 1300041 379 vm_object_pip_wakeup(pp->object); 380 #else 381 vm_object_pip_subtract(pp->object, 1); 382 #endif 383 } 384 385 #if __FreeBSD_version > 1300051 386 static vm_page_t 387 page_hold(vnode_t *vp, int64_t start) 388 { 389 vm_object_t obj; 390 vm_page_t m; 391 392 obj = vp->v_object; 393 vm_page_grab_valid_unlocked(&m, obj, OFF_TO_IDX(start), 394 VM_ALLOC_NOCREAT | VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY | 395 VM_ALLOC_NOBUSY); 396 return (m); 397 } 398 #else 399 static vm_page_t 400 page_hold(vnode_t *vp, int64_t start) 401 { 402 vm_object_t obj; 403 vm_page_t pp; 404 405 obj = vp->v_object; 406 zfs_vmobject_assert_wlocked(obj); 407 408 for (;;) { 409 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 410 pp->valid) { 411 if (vm_page_xbusied(pp)) { 412 /* 413 * Reference the page before unlocking and 414 * sleeping so that the page daemon is less 415 * likely to reclaim it. 416 */ 417 vm_page_reference(pp); 418 vm_page_lock(pp); 419 zfs_vmobject_wunlock(obj); 420 vm_page_busy_sleep(pp, "zfsmwb", true); 421 zfs_vmobject_wlock(obj); 422 continue; 423 } 424 425 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 426 vm_page_wire_lock(pp); 427 vm_page_hold(pp); 428 vm_page_wire_unlock(pp); 429 430 } else 431 pp = NULL; 432 break; 433 } 434 return (pp); 435 } 436 #endif 437 438 static void 439 page_unhold(vm_page_t pp) 440 { 441 442 vm_page_wire_lock(pp); 443 #if __FreeBSD_version >= 1300035 444 vm_page_unwire(pp, PQ_ACTIVE); 445 #else 446 vm_page_unhold(pp); 447 #endif 448 vm_page_wire_unlock(pp); 449 } 450 451 /* 452 * When a file is memory mapped, we must keep the IO data synchronized 453 * between the DMU cache and the memory mapped pages. What this means: 454 * 455 * On Write: If we find a memory mapped page, we write to *both* 456 * the page and the dmu buffer. 457 */ 458 void 459 update_pages(znode_t *zp, int64_t start, int len, objset_t *os) 460 { 461 vm_object_t obj; 462 struct sf_buf *sf; 463 vnode_t *vp = ZTOV(zp); 464 caddr_t va; 465 int off; 466 467 ASSERT3P(vp->v_mount, !=, NULL); 468 obj = vp->v_object; 469 ASSERT3P(obj, !=, NULL); 470 471 off = start & PAGEOFFSET; 472 zfs_vmobject_wlock_12(obj); 473 #if __FreeBSD_version >= 1300041 474 vm_object_pip_add(obj, 1); 475 #endif 476 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 477 vm_page_t pp; 478 int nbytes = imin(PAGESIZE - off, len); 479 480 if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { 481 zfs_vmobject_wunlock_12(obj); 482 483 va = zfs_map_page(pp, &sf); 484 (void) dmu_read(os, zp->z_id, start + off, nbytes, 485 va + off, DMU_READ_PREFETCH); 486 zfs_unmap_page(sf); 487 488 zfs_vmobject_wlock_12(obj); 489 page_unbusy(pp); 490 } 491 len -= nbytes; 492 off = 0; 493 } 494 #if __FreeBSD_version >= 1300041 495 vm_object_pip_wakeup(obj); 496 #else 497 vm_object_pip_wakeupn(obj, 0); 498 #endif 499 zfs_vmobject_wunlock_12(obj); 500 } 501 502 /* 503 * Read with UIO_NOCOPY flag means that sendfile(2) requests 504 * ZFS to populate a range of page cache pages with data. 505 * 506 * NOTE: this function could be optimized to pre-allocate 507 * all pages in advance, drain exclusive busy on all of them, 508 * map them into contiguous KVA region and populate them 509 * in one single dmu_read() call. 510 */ 511 int 512 mappedread_sf(znode_t *zp, int nbytes, zfs_uio_t *uio) 513 { 514 vnode_t *vp = ZTOV(zp); 515 objset_t *os = zp->z_zfsvfs->z_os; 516 struct sf_buf *sf; 517 vm_object_t obj; 518 vm_page_t pp; 519 int64_t start; 520 caddr_t va; 521 int len = nbytes; 522 int error = 0; 523 524 ASSERT3U(zfs_uio_segflg(uio), ==, UIO_NOCOPY); 525 ASSERT3P(vp->v_mount, !=, NULL); 526 obj = vp->v_object; 527 ASSERT3P(obj, !=, NULL); 528 ASSERT0(zfs_uio_offset(uio) & PAGEOFFSET); 529 530 zfs_vmobject_wlock_12(obj); 531 for (start = zfs_uio_offset(uio); len > 0; start += PAGESIZE) { 532 int bytes = MIN(PAGESIZE, len); 533 534 pp = vm_page_grab_unlocked(obj, OFF_TO_IDX(start), 535 VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); 536 if (vm_page_none_valid(pp)) { 537 zfs_vmobject_wunlock_12(obj); 538 va = zfs_map_page(pp, &sf); 539 error = dmu_read(os, zp->z_id, start, bytes, va, 540 DMU_READ_PREFETCH); 541 if (bytes != PAGESIZE && error == 0) 542 bzero(va + bytes, PAGESIZE - bytes); 543 zfs_unmap_page(sf); 544 zfs_vmobject_wlock_12(obj); 545 #if __FreeBSD_version >= 1300081 546 if (error == 0) { 547 vm_page_valid(pp); 548 vm_page_activate(pp); 549 vm_page_do_sunbusy(pp); 550 } else { 551 zfs_vmobject_wlock(obj); 552 if (!vm_page_wired(pp) && pp->valid == 0 && 553 vm_page_busy_tryupgrade(pp)) 554 vm_page_free(pp); 555 else 556 vm_page_sunbusy(pp); 557 zfs_vmobject_wunlock(obj); 558 } 559 #else 560 vm_page_do_sunbusy(pp); 561 vm_page_lock(pp); 562 if (error) { 563 if (pp->wire_count == 0 && pp->valid == 0 && 564 !vm_page_busied(pp)) 565 vm_page_free(pp); 566 } else { 567 pp->valid = VM_PAGE_BITS_ALL; 568 vm_page_activate(pp); 569 } 570 vm_page_unlock(pp); 571 #endif 572 } else { 573 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 574 vm_page_do_sunbusy(pp); 575 } 576 if (error) 577 break; 578 zfs_uio_advance(uio, bytes); 579 len -= bytes; 580 } 581 zfs_vmobject_wunlock_12(obj); 582 return (error); 583 } 584 585 /* 586 * When a file is memory mapped, we must keep the IO data synchronized 587 * between the DMU cache and the memory mapped pages. What this means: 588 * 589 * On Read: We "read" preferentially from memory mapped pages, 590 * else we default from the dmu buffer. 591 * 592 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 593 * the file is memory mapped. 594 */ 595 int 596 mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio) 597 { 598 vnode_t *vp = ZTOV(zp); 599 vm_object_t obj; 600 int64_t start; 601 int len = nbytes; 602 int off; 603 int error = 0; 604 605 ASSERT3P(vp->v_mount, !=, NULL); 606 obj = vp->v_object; 607 ASSERT3P(obj, !=, NULL); 608 609 start = zfs_uio_offset(uio); 610 off = start & PAGEOFFSET; 611 zfs_vmobject_wlock_12(obj); 612 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 613 vm_page_t pp; 614 uint64_t bytes = MIN(PAGESIZE - off, len); 615 616 if ((pp = page_hold(vp, start))) { 617 struct sf_buf *sf; 618 caddr_t va; 619 620 zfs_vmobject_wunlock_12(obj); 621 va = zfs_map_page(pp, &sf); 622 error = vn_io_fault_uiomove(va + off, bytes, 623 GET_UIO_STRUCT(uio)); 624 zfs_unmap_page(sf); 625 zfs_vmobject_wlock_12(obj); 626 page_unhold(pp); 627 } else { 628 zfs_vmobject_wunlock_12(obj); 629 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 630 uio, bytes); 631 zfs_vmobject_wlock_12(obj); 632 } 633 len -= bytes; 634 off = 0; 635 if (error) 636 break; 637 } 638 zfs_vmobject_wunlock_12(obj); 639 return (error); 640 } 641 642 int 643 zfs_write_simple(znode_t *zp, const void *data, size_t len, 644 loff_t pos, size_t *presid) 645 { 646 int error = 0; 647 ssize_t resid; 648 649 error = vn_rdwr(UIO_WRITE, ZTOV(zp), __DECONST(void *, data), len, pos, 650 UIO_SYSSPACE, IO_SYNC, kcred, NOCRED, &resid, curthread); 651 652 if (error) { 653 return (SET_ERROR(error)); 654 } else if (presid == NULL) { 655 if (resid != 0) { 656 error = SET_ERROR(EIO); 657 } 658 } else { 659 *presid = resid; 660 } 661 return (error); 662 } 663 664 void 665 zfs_zrele_async(znode_t *zp) 666 { 667 vnode_t *vp = ZTOV(zp); 668 objset_t *os = ITOZSB(vp)->z_os; 669 670 VN_RELE_ASYNC(vp, dsl_pool_zrele_taskq(dmu_objset_pool(os))); 671 } 672 673 static int 674 zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) 675 { 676 int error; 677 678 *vpp = arg; 679 error = vn_lock(*vpp, lkflags); 680 if (error != 0) 681 vrele(*vpp); 682 return (error); 683 } 684 685 static int 686 zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags) 687 { 688 znode_t *zdp = VTOZ(dvp); 689 zfsvfs_t *zfsvfs __unused = zdp->z_zfsvfs; 690 int error; 691 int ltype; 692 693 if (zfsvfs->z_replay == B_FALSE) 694 ASSERT_VOP_LOCKED(dvp, __func__); 695 696 if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { 697 ASSERT3P(dvp, ==, vp); 698 vref(dvp); 699 ltype = lkflags & LK_TYPE_MASK; 700 if (ltype != VOP_ISLOCKED(dvp)) { 701 if (ltype == LK_EXCLUSIVE) 702 vn_lock(dvp, LK_UPGRADE | LK_RETRY); 703 else /* if (ltype == LK_SHARED) */ 704 vn_lock(dvp, LK_DOWNGRADE | LK_RETRY); 705 706 /* 707 * Relock for the "." case could leave us with 708 * reclaimed vnode. 709 */ 710 if (VN_IS_DOOMED(dvp)) { 711 vrele(dvp); 712 return (SET_ERROR(ENOENT)); 713 } 714 } 715 return (0); 716 } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { 717 /* 718 * Note that in this case, dvp is the child vnode, and we 719 * are looking up the parent vnode - exactly reverse from 720 * normal operation. Unlocking dvp requires some rather 721 * tricky unlock/relock dance to prevent mp from being freed; 722 * use vn_vget_ino_gen() which takes care of all that. 723 * 724 * XXX Note that there is a time window when both vnodes are 725 * unlocked. It is possible, although highly unlikely, that 726 * during that window the parent-child relationship between 727 * the vnodes may change, for example, get reversed. 728 * In that case we would have a wrong lock order for the vnodes. 729 * All other filesystems seem to ignore this problem, so we 730 * do the same here. 731 * A potential solution could be implemented as follows: 732 * - using LK_NOWAIT when locking the second vnode and retrying 733 * if necessary 734 * - checking that the parent-child relationship still holds 735 * after locking both vnodes and retrying if it doesn't 736 */ 737 error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp); 738 return (error); 739 } else { 740 error = vn_lock(vp, lkflags); 741 if (error != 0) 742 vrele(vp); 743 return (error); 744 } 745 } 746 747 /* 748 * Lookup an entry in a directory, or an extended attribute directory. 749 * If it exists, return a held vnode reference for it. 750 * 751 * IN: dvp - vnode of directory to search. 752 * nm - name of entry to lookup. 753 * pnp - full pathname to lookup [UNUSED]. 754 * flags - LOOKUP_XATTR set if looking for an attribute. 755 * rdir - root directory vnode [UNUSED]. 756 * cr - credentials of caller. 757 * ct - caller context 758 * 759 * OUT: vpp - vnode of located entry, NULL if not found. 760 * 761 * RETURN: 0 on success, error code on failure. 762 * 763 * Timestamps: 764 * NA 765 */ 766 static int 767 zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, 768 struct componentname *cnp, int nameiop, cred_t *cr, int flags, 769 boolean_t cached) 770 { 771 znode_t *zdp = VTOZ(dvp); 772 znode_t *zp; 773 zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 774 #if __FreeBSD_version > 1300124 775 seqc_t dvp_seqc; 776 #endif 777 int error = 0; 778 779 /* 780 * Fast path lookup, however we must skip DNLC lookup 781 * for case folding or normalizing lookups because the 782 * DNLC code only stores the passed in name. This means 783 * creating 'a' and removing 'A' on a case insensitive 784 * file system would work, but DNLC still thinks 'a' 785 * exists and won't let you create it again on the next 786 * pass through fast path. 787 */ 788 if (!(flags & LOOKUP_XATTR)) { 789 if (dvp->v_type != VDIR) { 790 return (SET_ERROR(ENOTDIR)); 791 } else if (zdp->z_sa_hdl == NULL) { 792 return (SET_ERROR(EIO)); 793 } 794 } 795 796 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, 797 const char *, nm); 798 799 ZFS_ENTER(zfsvfs); 800 ZFS_VERIFY_ZP(zdp); 801 802 #if __FreeBSD_version > 1300124 803 dvp_seqc = vn_seqc_read_notmodify(dvp); 804 #endif 805 806 *vpp = NULL; 807 808 if (flags & LOOKUP_XATTR) { 809 /* 810 * If the xattr property is off, refuse the lookup request. 811 */ 812 if (!(zfsvfs->z_flags & ZSB_XATTR)) { 813 ZFS_EXIT(zfsvfs); 814 return (SET_ERROR(EOPNOTSUPP)); 815 } 816 817 /* 818 * We don't allow recursive attributes.. 819 * Maybe someday we will. 820 */ 821 if (zdp->z_pflags & ZFS_XATTR) { 822 ZFS_EXIT(zfsvfs); 823 return (SET_ERROR(EINVAL)); 824 } 825 826 if ((error = zfs_get_xattrdir(VTOZ(dvp), &zp, cr, flags))) { 827 ZFS_EXIT(zfsvfs); 828 return (error); 829 } 830 *vpp = ZTOV(zp); 831 832 /* 833 * Do we have permission to get into attribute directory? 834 */ 835 error = zfs_zaccess(zp, ACE_EXECUTE, 0, B_FALSE, cr); 836 if (error) { 837 vrele(ZTOV(zp)); 838 } 839 840 ZFS_EXIT(zfsvfs); 841 return (error); 842 } 843 844 /* 845 * Check accessibility of directory if we're not coming in via 846 * VOP_CACHEDLOOKUP. 847 */ 848 if (!cached) { 849 #ifdef NOEXECCHECK 850 if ((cnp->cn_flags & NOEXECCHECK) != 0) { 851 cnp->cn_flags &= ~NOEXECCHECK; 852 } else 853 #endif 854 if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) { 855 ZFS_EXIT(zfsvfs); 856 return (error); 857 } 858 } 859 860 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 861 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 862 ZFS_EXIT(zfsvfs); 863 return (SET_ERROR(EILSEQ)); 864 } 865 866 867 /* 868 * First handle the special cases. 869 */ 870 if ((cnp->cn_flags & ISDOTDOT) != 0) { 871 /* 872 * If we are a snapshot mounted under .zfs, return 873 * the vp for the snapshot directory. 874 */ 875 if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 876 struct componentname cn; 877 vnode_t *zfsctl_vp; 878 int ltype; 879 880 ZFS_EXIT(zfsvfs); 881 ltype = VOP_ISLOCKED(dvp); 882 VOP_UNLOCK1(dvp); 883 error = zfsctl_root(zfsvfs->z_parent, LK_SHARED, 884 &zfsctl_vp); 885 if (error == 0) { 886 cn.cn_nameptr = "snapshot"; 887 cn.cn_namelen = strlen(cn.cn_nameptr); 888 cn.cn_nameiop = cnp->cn_nameiop; 889 cn.cn_flags = cnp->cn_flags & ~ISDOTDOT; 890 cn.cn_lkflags = cnp->cn_lkflags; 891 error = VOP_LOOKUP(zfsctl_vp, vpp, &cn); 892 vput(zfsctl_vp); 893 } 894 vn_lock(dvp, ltype | LK_RETRY); 895 return (error); 896 } 897 } 898 if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) { 899 ZFS_EXIT(zfsvfs); 900 if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP) 901 return (SET_ERROR(ENOTSUP)); 902 error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp); 903 return (error); 904 } 905 906 /* 907 * The loop is retry the lookup if the parent-child relationship 908 * changes during the dot-dot locking complexities. 909 */ 910 for (;;) { 911 uint64_t parent; 912 913 error = zfs_dirlook(zdp, nm, &zp); 914 if (error == 0) 915 *vpp = ZTOV(zp); 916 917 ZFS_EXIT(zfsvfs); 918 if (error != 0) 919 break; 920 921 error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags); 922 if (error != 0) { 923 /* 924 * If we've got a locking error, then the vnode 925 * got reclaimed because of a force unmount. 926 * We never enter doomed vnodes into the name cache. 927 */ 928 *vpp = NULL; 929 return (error); 930 } 931 932 if ((cnp->cn_flags & ISDOTDOT) == 0) 933 break; 934 935 ZFS_ENTER(zfsvfs); 936 if (zdp->z_sa_hdl == NULL) { 937 error = SET_ERROR(EIO); 938 } else { 939 error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 940 &parent, sizeof (parent)); 941 } 942 if (error != 0) { 943 ZFS_EXIT(zfsvfs); 944 vput(ZTOV(zp)); 945 break; 946 } 947 if (zp->z_id == parent) { 948 ZFS_EXIT(zfsvfs); 949 break; 950 } 951 vput(ZTOV(zp)); 952 } 953 954 if (error != 0) 955 *vpp = NULL; 956 957 /* Translate errors and add SAVENAME when needed. */ 958 if (cnp->cn_flags & ISLASTCN) { 959 switch (nameiop) { 960 case CREATE: 961 case RENAME: 962 if (error == ENOENT) { 963 error = EJUSTRETURN; 964 cnp->cn_flags |= SAVENAME; 965 break; 966 } 967 zfs_fallthrough; 968 case DELETE: 969 if (error == 0) 970 cnp->cn_flags |= SAVENAME; 971 break; 972 } 973 } 974 975 #if __FreeBSD_version > 1300124 976 if ((cnp->cn_flags & ISDOTDOT) != 0) { 977 /* 978 * FIXME: zfs_lookup_lock relocks vnodes and does nothing to 979 * handle races. In particular different callers may end up 980 * with different vnodes and will try to add conflicting 981 * entries to the namecache. 982 * 983 * While finding different result may be acceptable in face 984 * of concurrent modification, adding conflicting entries 985 * trips over an assert in the namecache. 986 * 987 * Ultimately let an entry through once everything settles. 988 */ 989 if (!vn_seqc_consistent(dvp, dvp_seqc)) { 990 cnp->cn_flags &= ~MAKEENTRY; 991 } 992 } 993 #endif 994 995 /* Insert name into cache (as non-existent) if appropriate. */ 996 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay && 997 error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0) 998 cache_enter(dvp, NULL, cnp); 999 1000 /* Insert name into cache if appropriate. */ 1001 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay && 1002 error == 0 && (cnp->cn_flags & MAKEENTRY)) { 1003 if (!(cnp->cn_flags & ISLASTCN) || 1004 (nameiop != DELETE && nameiop != RENAME)) { 1005 cache_enter(dvp, *vpp, cnp); 1006 } 1007 } 1008 1009 return (error); 1010 } 1011 1012 /* 1013 * Attempt to create a new entry in a directory. If the entry 1014 * already exists, truncate the file if permissible, else return 1015 * an error. Return the vp of the created or trunc'd file. 1016 * 1017 * IN: dvp - vnode of directory to put new file entry in. 1018 * name - name of new file entry. 1019 * vap - attributes of new file. 1020 * excl - flag indicating exclusive or non-exclusive mode. 1021 * mode - mode to open file with. 1022 * cr - credentials of caller. 1023 * flag - large file flag [UNUSED]. 1024 * ct - caller context 1025 * vsecp - ACL to be set 1026 * 1027 * OUT: vpp - vnode of created or trunc'd entry. 1028 * 1029 * RETURN: 0 on success, error code on failure. 1030 * 1031 * Timestamps: 1032 * dvp - ctime|mtime updated if new entry created 1033 * vp - ctime|mtime always, atime if new 1034 */ 1035 int 1036 zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode, 1037 znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp) 1038 { 1039 (void) excl, (void) mode, (void) flag; 1040 znode_t *zp; 1041 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1042 zilog_t *zilog; 1043 objset_t *os; 1044 dmu_tx_t *tx; 1045 int error; 1046 ksid_t *ksid; 1047 uid_t uid; 1048 gid_t gid = crgetgid(cr); 1049 uint64_t projid = ZFS_DEFAULT_PROJID; 1050 zfs_acl_ids_t acl_ids; 1051 boolean_t fuid_dirtied; 1052 uint64_t txtype; 1053 #ifdef DEBUG_VFS_LOCKS 1054 vnode_t *dvp = ZTOV(dzp); 1055 #endif 1056 1057 /* 1058 * If we have an ephemeral id, ACL, or XVATTR then 1059 * make sure file system is at proper version 1060 */ 1061 1062 ksid = crgetsid(cr, KSID_OWNER); 1063 if (ksid) 1064 uid = ksid_getid(ksid); 1065 else 1066 uid = crgetuid(cr); 1067 1068 if (zfsvfs->z_use_fuids == B_FALSE && 1069 (vsecp || (vap->va_mask & AT_XVATTR) || 1070 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1071 return (SET_ERROR(EINVAL)); 1072 1073 ZFS_ENTER(zfsvfs); 1074 ZFS_VERIFY_ZP(dzp); 1075 os = zfsvfs->z_os; 1076 zilog = zfsvfs->z_log; 1077 1078 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1079 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1080 ZFS_EXIT(zfsvfs); 1081 return (SET_ERROR(EILSEQ)); 1082 } 1083 1084 if (vap->va_mask & AT_XVATTR) { 1085 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap, 1086 crgetuid(cr), cr, vap->va_type)) != 0) { 1087 ZFS_EXIT(zfsvfs); 1088 return (error); 1089 } 1090 } 1091 1092 *zpp = NULL; 1093 1094 if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1095 vap->va_mode &= ~S_ISVTX; 1096 1097 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 1098 if (error) { 1099 ZFS_EXIT(zfsvfs); 1100 return (error); 1101 } 1102 ASSERT3P(zp, ==, NULL); 1103 1104 /* 1105 * Create a new file object and update the directory 1106 * to reference it. 1107 */ 1108 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) { 1109 goto out; 1110 } 1111 1112 /* 1113 * We only support the creation of regular files in 1114 * extended attribute directories. 1115 */ 1116 1117 if ((dzp->z_pflags & ZFS_XATTR) && 1118 (vap->va_type != VREG)) { 1119 error = SET_ERROR(EINVAL); 1120 goto out; 1121 } 1122 1123 if ((error = zfs_acl_ids_create(dzp, 0, vap, 1124 cr, vsecp, &acl_ids)) != 0) 1125 goto out; 1126 1127 if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode)) 1128 projid = zfs_inherit_projid(dzp); 1129 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) { 1130 zfs_acl_ids_free(&acl_ids); 1131 error = SET_ERROR(EDQUOT); 1132 goto out; 1133 } 1134 1135 getnewvnode_reserve_(); 1136 1137 tx = dmu_tx_create(os); 1138 1139 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1140 ZFS_SA_BASE_ATTR_SIZE); 1141 1142 fuid_dirtied = zfsvfs->z_fuid_dirty; 1143 if (fuid_dirtied) 1144 zfs_fuid_txhold(zfsvfs, tx); 1145 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1146 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 1147 if (!zfsvfs->z_use_sa && 1148 acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1149 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1150 0, acl_ids.z_aclp->z_acl_bytes); 1151 } 1152 error = dmu_tx_assign(tx, TXG_WAIT); 1153 if (error) { 1154 zfs_acl_ids_free(&acl_ids); 1155 dmu_tx_abort(tx); 1156 getnewvnode_drop_reserve(); 1157 ZFS_EXIT(zfsvfs); 1158 return (error); 1159 } 1160 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1161 if (fuid_dirtied) 1162 zfs_fuid_sync(zfsvfs, tx); 1163 1164 (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 1165 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1166 zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1167 vsecp, acl_ids.z_fuidp, vap); 1168 zfs_acl_ids_free(&acl_ids); 1169 dmu_tx_commit(tx); 1170 1171 getnewvnode_drop_reserve(); 1172 1173 out: 1174 VNCHECKREF(dvp); 1175 if (error == 0) { 1176 *zpp = zp; 1177 } 1178 1179 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1180 zil_commit(zilog, 0); 1181 1182 ZFS_EXIT(zfsvfs); 1183 return (error); 1184 } 1185 1186 /* 1187 * Remove an entry from a directory. 1188 * 1189 * IN: dvp - vnode of directory to remove entry from. 1190 * name - name of entry to remove. 1191 * cr - credentials of caller. 1192 * ct - caller context 1193 * flags - case flags 1194 * 1195 * RETURN: 0 on success, error code on failure. 1196 * 1197 * Timestamps: 1198 * dvp - ctime|mtime 1199 * vp - ctime (if nlink > 0) 1200 */ 1201 static int 1202 zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr) 1203 { 1204 znode_t *dzp = VTOZ(dvp); 1205 znode_t *zp; 1206 znode_t *xzp; 1207 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1208 zilog_t *zilog; 1209 uint64_t xattr_obj; 1210 uint64_t obj = 0; 1211 dmu_tx_t *tx; 1212 boolean_t unlinked; 1213 uint64_t txtype; 1214 int error; 1215 1216 1217 ZFS_ENTER(zfsvfs); 1218 ZFS_VERIFY_ZP(dzp); 1219 zp = VTOZ(vp); 1220 ZFS_VERIFY_ZP(zp); 1221 zilog = zfsvfs->z_log; 1222 1223 xattr_obj = 0; 1224 xzp = NULL; 1225 1226 if ((error = zfs_zaccess_delete(dzp, zp, cr))) { 1227 goto out; 1228 } 1229 1230 /* 1231 * Need to use rmdir for removing directories. 1232 */ 1233 if (vp->v_type == VDIR) { 1234 error = SET_ERROR(EPERM); 1235 goto out; 1236 } 1237 1238 vnevent_remove(vp, dvp, name, ct); 1239 1240 obj = zp->z_id; 1241 1242 /* are there any extended attributes? */ 1243 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1244 &xattr_obj, sizeof (xattr_obj)); 1245 if (error == 0 && xattr_obj) { 1246 error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1247 ASSERT0(error); 1248 } 1249 1250 /* 1251 * We may delete the znode now, or we may put it in the unlinked set; 1252 * it depends on whether we're the last link, and on whether there are 1253 * other holds on the vnode. So we dmu_tx_hold() the right things to 1254 * allow for either case. 1255 */ 1256 tx = dmu_tx_create(zfsvfs->z_os); 1257 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1258 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1259 zfs_sa_upgrade_txholds(tx, zp); 1260 zfs_sa_upgrade_txholds(tx, dzp); 1261 1262 if (xzp) { 1263 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 1264 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1265 } 1266 1267 /* charge as an update -- would be nice not to charge at all */ 1268 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1269 1270 /* 1271 * Mark this transaction as typically resulting in a net free of space 1272 */ 1273 dmu_tx_mark_netfree(tx); 1274 1275 error = dmu_tx_assign(tx, TXG_WAIT); 1276 if (error) { 1277 dmu_tx_abort(tx); 1278 ZFS_EXIT(zfsvfs); 1279 return (error); 1280 } 1281 1282 /* 1283 * Remove the directory entry. 1284 */ 1285 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked); 1286 1287 if (error) { 1288 dmu_tx_commit(tx); 1289 goto out; 1290 } 1291 1292 if (unlinked) { 1293 zfs_unlinked_add(zp, tx); 1294 vp->v_vflag |= VV_NOSYNC; 1295 } 1296 /* XXX check changes to linux vnops */ 1297 txtype = TX_REMOVE; 1298 zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked); 1299 1300 dmu_tx_commit(tx); 1301 out: 1302 1303 if (xzp) 1304 vrele(ZTOV(xzp)); 1305 1306 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1307 zil_commit(zilog, 0); 1308 1309 1310 ZFS_EXIT(zfsvfs); 1311 return (error); 1312 } 1313 1314 1315 static int 1316 zfs_lookup_internal(znode_t *dzp, const char *name, vnode_t **vpp, 1317 struct componentname *cnp, int nameiop) 1318 { 1319 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1320 int error; 1321 1322 cnp->cn_nameptr = __DECONST(char *, name); 1323 cnp->cn_namelen = strlen(name); 1324 cnp->cn_nameiop = nameiop; 1325 cnp->cn_flags = ISLASTCN | SAVENAME; 1326 cnp->cn_lkflags = LK_EXCLUSIVE | LK_RETRY; 1327 cnp->cn_cred = kcred; 1328 #if __FreeBSD_version < 1400037 1329 cnp->cn_thread = curthread; 1330 #endif 1331 1332 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay) { 1333 struct vop_lookup_args a; 1334 1335 a.a_gen.a_desc = &vop_lookup_desc; 1336 a.a_dvp = ZTOV(dzp); 1337 a.a_vpp = vpp; 1338 a.a_cnp = cnp; 1339 error = vfs_cache_lookup(&a); 1340 } else { 1341 error = zfs_lookup(ZTOV(dzp), name, vpp, cnp, nameiop, kcred, 0, 1342 B_FALSE); 1343 } 1344 #ifdef ZFS_DEBUG 1345 if (error) { 1346 printf("got error %d on name %s on op %d\n", error, name, 1347 nameiop); 1348 kdb_backtrace(); 1349 } 1350 #endif 1351 return (error); 1352 } 1353 1354 int 1355 zfs_remove(znode_t *dzp, const char *name, cred_t *cr, int flags) 1356 { 1357 vnode_t *vp; 1358 int error; 1359 struct componentname cn; 1360 1361 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE))) 1362 return (error); 1363 1364 error = zfs_remove_(ZTOV(dzp), vp, name, cr); 1365 vput(vp); 1366 return (error); 1367 } 1368 /* 1369 * Create a new directory and insert it into dvp using the name 1370 * provided. Return a pointer to the inserted directory. 1371 * 1372 * IN: dvp - vnode of directory to add subdir to. 1373 * dirname - name of new directory. 1374 * vap - attributes of new directory. 1375 * cr - credentials of caller. 1376 * ct - caller context 1377 * flags - case flags 1378 * vsecp - ACL to be set 1379 * 1380 * OUT: vpp - vnode of created directory. 1381 * 1382 * RETURN: 0 on success, error code on failure. 1383 * 1384 * Timestamps: 1385 * dvp - ctime|mtime updated 1386 * vp - ctime|mtime|atime updated 1387 */ 1388 int 1389 zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp, 1390 cred_t *cr, int flags, vsecattr_t *vsecp) 1391 { 1392 (void) flags, (void) vsecp; 1393 znode_t *zp; 1394 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1395 zilog_t *zilog; 1396 uint64_t txtype; 1397 dmu_tx_t *tx; 1398 int error; 1399 ksid_t *ksid; 1400 uid_t uid; 1401 gid_t gid = crgetgid(cr); 1402 zfs_acl_ids_t acl_ids; 1403 boolean_t fuid_dirtied; 1404 1405 ASSERT3U(vap->va_type, ==, VDIR); 1406 1407 /* 1408 * If we have an ephemeral id, ACL, or XVATTR then 1409 * make sure file system is at proper version 1410 */ 1411 1412 ksid = crgetsid(cr, KSID_OWNER); 1413 if (ksid) 1414 uid = ksid_getid(ksid); 1415 else 1416 uid = crgetuid(cr); 1417 if (zfsvfs->z_use_fuids == B_FALSE && 1418 ((vap->va_mask & AT_XVATTR) || 1419 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1420 return (SET_ERROR(EINVAL)); 1421 1422 ZFS_ENTER(zfsvfs); 1423 ZFS_VERIFY_ZP(dzp); 1424 zilog = zfsvfs->z_log; 1425 1426 if (dzp->z_pflags & ZFS_XATTR) { 1427 ZFS_EXIT(zfsvfs); 1428 return (SET_ERROR(EINVAL)); 1429 } 1430 1431 if (zfsvfs->z_utf8 && u8_validate(dirname, 1432 strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1433 ZFS_EXIT(zfsvfs); 1434 return (SET_ERROR(EILSEQ)); 1435 } 1436 1437 if (vap->va_mask & AT_XVATTR) { 1438 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap, 1439 crgetuid(cr), cr, vap->va_type)) != 0) { 1440 ZFS_EXIT(zfsvfs); 1441 return (error); 1442 } 1443 } 1444 1445 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 1446 NULL, &acl_ids)) != 0) { 1447 ZFS_EXIT(zfsvfs); 1448 return (error); 1449 } 1450 1451 /* 1452 * First make sure the new directory doesn't exist. 1453 * 1454 * Existence is checked first to make sure we don't return 1455 * EACCES instead of EEXIST which can cause some applications 1456 * to fail. 1457 */ 1458 *zpp = NULL; 1459 1460 if ((error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW))) { 1461 zfs_acl_ids_free(&acl_ids); 1462 ZFS_EXIT(zfsvfs); 1463 return (error); 1464 } 1465 ASSERT3P(zp, ==, NULL); 1466 1467 if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) { 1468 zfs_acl_ids_free(&acl_ids); 1469 ZFS_EXIT(zfsvfs); 1470 return (error); 1471 } 1472 1473 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) { 1474 zfs_acl_ids_free(&acl_ids); 1475 ZFS_EXIT(zfsvfs); 1476 return (SET_ERROR(EDQUOT)); 1477 } 1478 1479 /* 1480 * Add a new entry to the directory. 1481 */ 1482 getnewvnode_reserve_(); 1483 tx = dmu_tx_create(zfsvfs->z_os); 1484 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 1485 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 1486 fuid_dirtied = zfsvfs->z_fuid_dirty; 1487 if (fuid_dirtied) 1488 zfs_fuid_txhold(zfsvfs, tx); 1489 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1490 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 1491 acl_ids.z_aclp->z_acl_bytes); 1492 } 1493 1494 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1495 ZFS_SA_BASE_ATTR_SIZE); 1496 1497 error = dmu_tx_assign(tx, TXG_WAIT); 1498 if (error) { 1499 zfs_acl_ids_free(&acl_ids); 1500 dmu_tx_abort(tx); 1501 getnewvnode_drop_reserve(); 1502 ZFS_EXIT(zfsvfs); 1503 return (error); 1504 } 1505 1506 /* 1507 * Create new node. 1508 */ 1509 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1510 1511 if (fuid_dirtied) 1512 zfs_fuid_sync(zfsvfs, tx); 1513 1514 /* 1515 * Now put new name in parent dir. 1516 */ 1517 (void) zfs_link_create(dzp, dirname, zp, tx, ZNEW); 1518 1519 *zpp = zp; 1520 1521 txtype = zfs_log_create_txtype(Z_DIR, NULL, vap); 1522 zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL, 1523 acl_ids.z_fuidp, vap); 1524 1525 zfs_acl_ids_free(&acl_ids); 1526 1527 dmu_tx_commit(tx); 1528 1529 getnewvnode_drop_reserve(); 1530 1531 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1532 zil_commit(zilog, 0); 1533 1534 ZFS_EXIT(zfsvfs); 1535 return (0); 1536 } 1537 1538 #if __FreeBSD_version < 1300124 1539 static void 1540 cache_vop_rmdir(struct vnode *dvp, struct vnode *vp) 1541 { 1542 1543 cache_purge(dvp); 1544 cache_purge(vp); 1545 } 1546 #endif 1547 1548 /* 1549 * Remove a directory subdir entry. If the current working 1550 * directory is the same as the subdir to be removed, the 1551 * remove will fail. 1552 * 1553 * IN: dvp - vnode of directory to remove from. 1554 * name - name of directory to be removed. 1555 * cwd - vnode of current working directory. 1556 * cr - credentials of caller. 1557 * ct - caller context 1558 * flags - case flags 1559 * 1560 * RETURN: 0 on success, error code on failure. 1561 * 1562 * Timestamps: 1563 * dvp - ctime|mtime updated 1564 */ 1565 static int 1566 zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr) 1567 { 1568 znode_t *dzp = VTOZ(dvp); 1569 znode_t *zp = VTOZ(vp); 1570 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1571 zilog_t *zilog; 1572 dmu_tx_t *tx; 1573 int error; 1574 1575 ZFS_ENTER(zfsvfs); 1576 ZFS_VERIFY_ZP(dzp); 1577 ZFS_VERIFY_ZP(zp); 1578 zilog = zfsvfs->z_log; 1579 1580 1581 if ((error = zfs_zaccess_delete(dzp, zp, cr))) { 1582 goto out; 1583 } 1584 1585 if (vp->v_type != VDIR) { 1586 error = SET_ERROR(ENOTDIR); 1587 goto out; 1588 } 1589 1590 vnevent_rmdir(vp, dvp, name, ct); 1591 1592 tx = dmu_tx_create(zfsvfs->z_os); 1593 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1594 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1595 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1596 zfs_sa_upgrade_txholds(tx, zp); 1597 zfs_sa_upgrade_txholds(tx, dzp); 1598 dmu_tx_mark_netfree(tx); 1599 error = dmu_tx_assign(tx, TXG_WAIT); 1600 if (error) { 1601 dmu_tx_abort(tx); 1602 ZFS_EXIT(zfsvfs); 1603 return (error); 1604 } 1605 1606 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL); 1607 1608 if (error == 0) { 1609 uint64_t txtype = TX_RMDIR; 1610 zfs_log_remove(zilog, tx, txtype, dzp, name, 1611 ZFS_NO_OBJECT, B_FALSE); 1612 } 1613 1614 dmu_tx_commit(tx); 1615 1616 cache_vop_rmdir(dvp, vp); 1617 out: 1618 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1619 zil_commit(zilog, 0); 1620 1621 ZFS_EXIT(zfsvfs); 1622 return (error); 1623 } 1624 1625 int 1626 zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd, cred_t *cr, int flags) 1627 { 1628 struct componentname cn; 1629 vnode_t *vp; 1630 int error; 1631 1632 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE))) 1633 return (error); 1634 1635 error = zfs_rmdir_(ZTOV(dzp), vp, name, cr); 1636 vput(vp); 1637 return (error); 1638 } 1639 1640 /* 1641 * Read as many directory entries as will fit into the provided 1642 * buffer from the given directory cursor position (specified in 1643 * the uio structure). 1644 * 1645 * IN: vp - vnode of directory to read. 1646 * uio - structure supplying read location, range info, 1647 * and return buffer. 1648 * cr - credentials of caller. 1649 * ct - caller context 1650 * flags - case flags 1651 * 1652 * OUT: uio - updated offset and range, buffer filled. 1653 * eofp - set to true if end-of-file detected. 1654 * 1655 * RETURN: 0 on success, error code on failure. 1656 * 1657 * Timestamps: 1658 * vp - atime updated 1659 * 1660 * Note that the low 4 bits of the cookie returned by zap is always zero. 1661 * This allows us to use the low range for "special" directory entries: 1662 * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 1663 * we use the offset 2 for the '.zfs' directory. 1664 */ 1665 static int 1666 zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp, 1667 int *ncookies, cookie_t **cookies) 1668 { 1669 znode_t *zp = VTOZ(vp); 1670 iovec_t *iovp; 1671 edirent_t *eodp; 1672 dirent64_t *odp; 1673 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1674 objset_t *os; 1675 caddr_t outbuf; 1676 size_t bufsize; 1677 zap_cursor_t zc; 1678 zap_attribute_t zap; 1679 uint_t bytes_wanted; 1680 uint64_t offset; /* must be unsigned; checks for < 1 */ 1681 uint64_t parent; 1682 int local_eof; 1683 int outcount; 1684 int error; 1685 uint8_t prefetch; 1686 boolean_t check_sysattrs; 1687 uint8_t type; 1688 int ncooks; 1689 cookie_t *cooks = NULL; 1690 int flags = 0; 1691 1692 ZFS_ENTER(zfsvfs); 1693 ZFS_VERIFY_ZP(zp); 1694 1695 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 1696 &parent, sizeof (parent))) != 0) { 1697 ZFS_EXIT(zfsvfs); 1698 return (error); 1699 } 1700 1701 /* 1702 * If we are not given an eof variable, 1703 * use a local one. 1704 */ 1705 if (eofp == NULL) 1706 eofp = &local_eof; 1707 1708 /* 1709 * Check for valid iov_len. 1710 */ 1711 if (GET_UIO_STRUCT(uio)->uio_iov->iov_len <= 0) { 1712 ZFS_EXIT(zfsvfs); 1713 return (SET_ERROR(EINVAL)); 1714 } 1715 1716 /* 1717 * Quit if directory has been removed (posix) 1718 */ 1719 if ((*eofp = zp->z_unlinked) != 0) { 1720 ZFS_EXIT(zfsvfs); 1721 return (0); 1722 } 1723 1724 error = 0; 1725 os = zfsvfs->z_os; 1726 offset = zfs_uio_offset(uio); 1727 prefetch = zp->z_zn_prefetch; 1728 1729 /* 1730 * Initialize the iterator cursor. 1731 */ 1732 if (offset <= 3) { 1733 /* 1734 * Start iteration from the beginning of the directory. 1735 */ 1736 zap_cursor_init(&zc, os, zp->z_id); 1737 } else { 1738 /* 1739 * The offset is a serialized cursor. 1740 */ 1741 zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 1742 } 1743 1744 /* 1745 * Get space to change directory entries into fs independent format. 1746 */ 1747 iovp = GET_UIO_STRUCT(uio)->uio_iov; 1748 bytes_wanted = iovp->iov_len; 1749 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) { 1750 bufsize = bytes_wanted; 1751 outbuf = kmem_alloc(bufsize, KM_SLEEP); 1752 odp = (struct dirent64 *)outbuf; 1753 } else { 1754 bufsize = bytes_wanted; 1755 outbuf = NULL; 1756 odp = (struct dirent64 *)iovp->iov_base; 1757 } 1758 eodp = (struct edirent *)odp; 1759 1760 if (ncookies != NULL) { 1761 /* 1762 * Minimum entry size is dirent size and 1 byte for a file name. 1763 */ 1764 ncooks = zfs_uio_resid(uio) / (sizeof (struct dirent) - 1765 sizeof (((struct dirent *)NULL)->d_name) + 1); 1766 cooks = malloc(ncooks * sizeof (*cooks), M_TEMP, M_WAITOK); 1767 *cookies = cooks; 1768 *ncookies = ncooks; 1769 } 1770 /* 1771 * If this VFS supports the system attribute view interface; and 1772 * we're looking at an extended attribute directory; and we care 1773 * about normalization conflicts on this vfs; then we must check 1774 * for normalization conflicts with the sysattr name space. 1775 */ 1776 #ifdef TODO 1777 check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 1778 (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 1779 (flags & V_RDDIR_ENTFLAGS); 1780 #else 1781 check_sysattrs = 0; 1782 #endif 1783 1784 /* 1785 * Transform to file-system independent format 1786 */ 1787 outcount = 0; 1788 while (outcount < bytes_wanted) { 1789 ino64_t objnum; 1790 ushort_t reclen; 1791 off64_t *next = NULL; 1792 1793 /* 1794 * Special case `.', `..', and `.zfs'. 1795 */ 1796 if (offset == 0) { 1797 (void) strcpy(zap.za_name, "."); 1798 zap.za_normalization_conflict = 0; 1799 objnum = zp->z_id; 1800 type = DT_DIR; 1801 } else if (offset == 1) { 1802 (void) strcpy(zap.za_name, ".."); 1803 zap.za_normalization_conflict = 0; 1804 objnum = parent; 1805 type = DT_DIR; 1806 } else if (offset == 2 && zfs_show_ctldir(zp)) { 1807 (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 1808 zap.za_normalization_conflict = 0; 1809 objnum = ZFSCTL_INO_ROOT; 1810 type = DT_DIR; 1811 } else { 1812 /* 1813 * Grab next entry. 1814 */ 1815 if ((error = zap_cursor_retrieve(&zc, &zap))) { 1816 if ((*eofp = (error == ENOENT)) != 0) 1817 break; 1818 else 1819 goto update; 1820 } 1821 1822 if (zap.za_integer_length != 8 || 1823 zap.za_num_integers != 1) { 1824 cmn_err(CE_WARN, "zap_readdir: bad directory " 1825 "entry, obj = %lld, offset = %lld\n", 1826 (u_longlong_t)zp->z_id, 1827 (u_longlong_t)offset); 1828 error = SET_ERROR(ENXIO); 1829 goto update; 1830 } 1831 1832 objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 1833 /* 1834 * MacOS X can extract the object type here such as: 1835 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 1836 */ 1837 type = ZFS_DIRENT_TYPE(zap.za_first_integer); 1838 1839 if (check_sysattrs && !zap.za_normalization_conflict) { 1840 #ifdef TODO 1841 zap.za_normalization_conflict = 1842 xattr_sysattr_casechk(zap.za_name); 1843 #else 1844 panic("%s:%u: TODO", __func__, __LINE__); 1845 #endif 1846 } 1847 } 1848 1849 if (flags & V_RDDIR_ACCFILTER) { 1850 /* 1851 * If we have no access at all, don't include 1852 * this entry in the returned information 1853 */ 1854 znode_t *ezp; 1855 if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 1856 goto skip_entry; 1857 if (!zfs_has_access(ezp, cr)) { 1858 vrele(ZTOV(ezp)); 1859 goto skip_entry; 1860 } 1861 vrele(ZTOV(ezp)); 1862 } 1863 1864 if (flags & V_RDDIR_ENTFLAGS) 1865 reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 1866 else 1867 reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 1868 1869 /* 1870 * Will this entry fit in the buffer? 1871 */ 1872 if (outcount + reclen > bufsize) { 1873 /* 1874 * Did we manage to fit anything in the buffer? 1875 */ 1876 if (!outcount) { 1877 error = SET_ERROR(EINVAL); 1878 goto update; 1879 } 1880 break; 1881 } 1882 if (flags & V_RDDIR_ENTFLAGS) { 1883 /* 1884 * Add extended flag entry: 1885 */ 1886 eodp->ed_ino = objnum; 1887 eodp->ed_reclen = reclen; 1888 /* NOTE: ed_off is the offset for the *next* entry */ 1889 next = &(eodp->ed_off); 1890 eodp->ed_eflags = zap.za_normalization_conflict ? 1891 ED_CASE_CONFLICT : 0; 1892 (void) strncpy(eodp->ed_name, zap.za_name, 1893 EDIRENT_NAMELEN(reclen)); 1894 eodp = (edirent_t *)((intptr_t)eodp + reclen); 1895 } else { 1896 /* 1897 * Add normal entry: 1898 */ 1899 odp->d_ino = objnum; 1900 odp->d_reclen = reclen; 1901 odp->d_namlen = strlen(zap.za_name); 1902 /* NOTE: d_off is the offset for the *next* entry. */ 1903 next = &odp->d_off; 1904 strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 1905 odp->d_type = type; 1906 dirent_terminate(odp); 1907 odp = (dirent64_t *)((intptr_t)odp + reclen); 1908 } 1909 outcount += reclen; 1910 1911 ASSERT3S(outcount, <=, bufsize); 1912 1913 /* Prefetch znode */ 1914 if (prefetch) 1915 dmu_prefetch(os, objnum, 0, 0, 0, 1916 ZIO_PRIORITY_SYNC_READ); 1917 1918 skip_entry: 1919 /* 1920 * Move to the next entry, fill in the previous offset. 1921 */ 1922 if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 1923 zap_cursor_advance(&zc); 1924 offset = zap_cursor_serialize(&zc); 1925 } else { 1926 offset += 1; 1927 } 1928 1929 /* Fill the offset right after advancing the cursor. */ 1930 if (next != NULL) 1931 *next = offset; 1932 if (cooks != NULL) { 1933 *cooks++ = offset; 1934 ncooks--; 1935 KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 1936 } 1937 } 1938 zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 1939 1940 /* Subtract unused cookies */ 1941 if (ncookies != NULL) 1942 *ncookies -= ncooks; 1943 1944 if (zfs_uio_segflg(uio) == UIO_SYSSPACE && zfs_uio_iovcnt(uio) == 1) { 1945 iovp->iov_base += outcount; 1946 iovp->iov_len -= outcount; 1947 zfs_uio_resid(uio) -= outcount; 1948 } else if ((error = 1949 zfs_uiomove(outbuf, (long)outcount, UIO_READ, uio))) { 1950 /* 1951 * Reset the pointer. 1952 */ 1953 offset = zfs_uio_offset(uio); 1954 } 1955 1956 update: 1957 zap_cursor_fini(&zc); 1958 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) 1959 kmem_free(outbuf, bufsize); 1960 1961 if (error == ENOENT) 1962 error = 0; 1963 1964 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 1965 1966 zfs_uio_setoffset(uio, offset); 1967 ZFS_EXIT(zfsvfs); 1968 if (error != 0 && cookies != NULL) { 1969 free(*cookies, M_TEMP); 1970 *cookies = NULL; 1971 *ncookies = 0; 1972 } 1973 return (error); 1974 } 1975 1976 /* 1977 * Get the requested file attributes and place them in the provided 1978 * vattr structure. 1979 * 1980 * IN: vp - vnode of file. 1981 * vap - va_mask identifies requested attributes. 1982 * If AT_XVATTR set, then optional attrs are requested 1983 * flags - ATTR_NOACLCHECK (CIFS server context) 1984 * cr - credentials of caller. 1985 * 1986 * OUT: vap - attribute values. 1987 * 1988 * RETURN: 0 (always succeeds). 1989 */ 1990 static int 1991 zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr) 1992 { 1993 znode_t *zp = VTOZ(vp); 1994 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1995 int error = 0; 1996 uint32_t blksize; 1997 u_longlong_t nblocks; 1998 uint64_t mtime[2], ctime[2], crtime[2], rdev; 1999 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2000 xoptattr_t *xoap = NULL; 2001 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2002 sa_bulk_attr_t bulk[4]; 2003 int count = 0; 2004 2005 ZFS_ENTER(zfsvfs); 2006 ZFS_VERIFY_ZP(zp); 2007 2008 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2009 2010 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 2011 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 2012 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); 2013 if (vp->v_type == VBLK || vp->v_type == VCHR) 2014 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 2015 &rdev, 8); 2016 2017 if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 2018 ZFS_EXIT(zfsvfs); 2019 return (error); 2020 } 2021 2022 /* 2023 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2024 * Also, if we are the owner don't bother, since owner should 2025 * always be allowed to read basic attributes of file. 2026 */ 2027 if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2028 (vap->va_uid != crgetuid(cr))) { 2029 if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2030 skipaclchk, cr))) { 2031 ZFS_EXIT(zfsvfs); 2032 return (error); 2033 } 2034 } 2035 2036 /* 2037 * Return all attributes. It's cheaper to provide the answer 2038 * than to determine whether we were asked the question. 2039 */ 2040 2041 vap->va_type = IFTOVT(zp->z_mode); 2042 vap->va_mode = zp->z_mode & ~S_IFMT; 2043 vn_fsid(vp, vap); 2044 vap->va_nodeid = zp->z_id; 2045 vap->va_nlink = zp->z_links; 2046 if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp) && 2047 zp->z_links < ZFS_LINK_MAX) 2048 vap->va_nlink++; 2049 vap->va_size = zp->z_size; 2050 if (vp->v_type == VBLK || vp->v_type == VCHR) 2051 vap->va_rdev = zfs_cmpldev(rdev); 2052 vap->va_gen = zp->z_gen; 2053 vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2054 vap->va_filerev = zp->z_seq; 2055 2056 /* 2057 * Add in any requested optional attributes and the create time. 2058 * Also set the corresponding bits in the returned attribute bitmap. 2059 */ 2060 if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2061 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2062 xoap->xoa_archive = 2063 ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2064 XVA_SET_RTN(xvap, XAT_ARCHIVE); 2065 } 2066 2067 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2068 xoap->xoa_readonly = 2069 ((zp->z_pflags & ZFS_READONLY) != 0); 2070 XVA_SET_RTN(xvap, XAT_READONLY); 2071 } 2072 2073 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2074 xoap->xoa_system = 2075 ((zp->z_pflags & ZFS_SYSTEM) != 0); 2076 XVA_SET_RTN(xvap, XAT_SYSTEM); 2077 } 2078 2079 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2080 xoap->xoa_hidden = 2081 ((zp->z_pflags & ZFS_HIDDEN) != 0); 2082 XVA_SET_RTN(xvap, XAT_HIDDEN); 2083 } 2084 2085 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2086 xoap->xoa_nounlink = 2087 ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2088 XVA_SET_RTN(xvap, XAT_NOUNLINK); 2089 } 2090 2091 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2092 xoap->xoa_immutable = 2093 ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2094 XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2095 } 2096 2097 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2098 xoap->xoa_appendonly = 2099 ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2100 XVA_SET_RTN(xvap, XAT_APPENDONLY); 2101 } 2102 2103 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2104 xoap->xoa_nodump = 2105 ((zp->z_pflags & ZFS_NODUMP) != 0); 2106 XVA_SET_RTN(xvap, XAT_NODUMP); 2107 } 2108 2109 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2110 xoap->xoa_opaque = 2111 ((zp->z_pflags & ZFS_OPAQUE) != 0); 2112 XVA_SET_RTN(xvap, XAT_OPAQUE); 2113 } 2114 2115 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2116 xoap->xoa_av_quarantined = 2117 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2118 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2119 } 2120 2121 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2122 xoap->xoa_av_modified = 2123 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2124 XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2125 } 2126 2127 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2128 vp->v_type == VREG) { 2129 zfs_sa_get_scanstamp(zp, xvap); 2130 } 2131 2132 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2133 xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 2134 XVA_SET_RTN(xvap, XAT_REPARSE); 2135 } 2136 if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 2137 xoap->xoa_generation = zp->z_gen; 2138 XVA_SET_RTN(xvap, XAT_GEN); 2139 } 2140 2141 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2142 xoap->xoa_offline = 2143 ((zp->z_pflags & ZFS_OFFLINE) != 0); 2144 XVA_SET_RTN(xvap, XAT_OFFLINE); 2145 } 2146 2147 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2148 xoap->xoa_sparse = 2149 ((zp->z_pflags & ZFS_SPARSE) != 0); 2150 XVA_SET_RTN(xvap, XAT_SPARSE); 2151 } 2152 2153 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) { 2154 xoap->xoa_projinherit = 2155 ((zp->z_pflags & ZFS_PROJINHERIT) != 0); 2156 XVA_SET_RTN(xvap, XAT_PROJINHERIT); 2157 } 2158 2159 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) { 2160 xoap->xoa_projid = zp->z_projid; 2161 XVA_SET_RTN(xvap, XAT_PROJID); 2162 } 2163 } 2164 2165 ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 2166 ZFS_TIME_DECODE(&vap->va_mtime, mtime); 2167 ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2168 ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 2169 2170 2171 sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 2172 vap->va_blksize = blksize; 2173 vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2174 2175 if (zp->z_blksz == 0) { 2176 /* 2177 * Block size hasn't been set; suggest maximal I/O transfers. 2178 */ 2179 vap->va_blksize = zfsvfs->z_max_blksz; 2180 } 2181 2182 ZFS_EXIT(zfsvfs); 2183 return (0); 2184 } 2185 2186 /* 2187 * Set the file attributes to the values contained in the 2188 * vattr structure. 2189 * 2190 * IN: zp - znode of file to be modified. 2191 * vap - new attribute values. 2192 * If AT_XVATTR set, then optional attrs are being set 2193 * flags - ATTR_UTIME set if non-default time values provided. 2194 * - ATTR_NOACLCHECK (CIFS context only). 2195 * cr - credentials of caller. 2196 * ct - caller context 2197 * 2198 * RETURN: 0 on success, error code on failure. 2199 * 2200 * Timestamps: 2201 * vp - ctime updated, mtime updated if size changed. 2202 */ 2203 int 2204 zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr) 2205 { 2206 vnode_t *vp = ZTOV(zp); 2207 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2208 objset_t *os; 2209 zilog_t *zilog; 2210 dmu_tx_t *tx; 2211 vattr_t oldva; 2212 xvattr_t tmpxvattr; 2213 uint_t mask = vap->va_mask; 2214 uint_t saved_mask = 0; 2215 uint64_t saved_mode; 2216 int trim_mask = 0; 2217 uint64_t new_mode; 2218 uint64_t new_uid, new_gid; 2219 uint64_t xattr_obj; 2220 uint64_t mtime[2], ctime[2]; 2221 uint64_t projid = ZFS_INVALID_PROJID; 2222 znode_t *attrzp; 2223 int need_policy = FALSE; 2224 int err, err2; 2225 zfs_fuid_info_t *fuidp = NULL; 2226 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2227 xoptattr_t *xoap; 2228 zfs_acl_t *aclp; 2229 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2230 boolean_t fuid_dirtied = B_FALSE; 2231 sa_bulk_attr_t bulk[7], xattr_bulk[7]; 2232 int count = 0, xattr_count = 0; 2233 2234 if (mask == 0) 2235 return (0); 2236 2237 if (mask & AT_NOSET) 2238 return (SET_ERROR(EINVAL)); 2239 2240 ZFS_ENTER(zfsvfs); 2241 ZFS_VERIFY_ZP(zp); 2242 2243 os = zfsvfs->z_os; 2244 zilog = zfsvfs->z_log; 2245 2246 /* 2247 * Make sure that if we have ephemeral uid/gid or xvattr specified 2248 * that file system is at proper version level 2249 */ 2250 2251 if (zfsvfs->z_use_fuids == B_FALSE && 2252 (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2253 ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 2254 (mask & AT_XVATTR))) { 2255 ZFS_EXIT(zfsvfs); 2256 return (SET_ERROR(EINVAL)); 2257 } 2258 2259 if (mask & AT_SIZE && vp->v_type == VDIR) { 2260 ZFS_EXIT(zfsvfs); 2261 return (SET_ERROR(EISDIR)); 2262 } 2263 2264 if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 2265 ZFS_EXIT(zfsvfs); 2266 return (SET_ERROR(EINVAL)); 2267 } 2268 2269 /* 2270 * If this is an xvattr_t, then get a pointer to the structure of 2271 * optional attributes. If this is NULL, then we have a vattr_t. 2272 */ 2273 xoap = xva_getxoptattr(xvap); 2274 2275 xva_init(&tmpxvattr); 2276 2277 /* 2278 * Immutable files can only alter immutable bit and atime 2279 */ 2280 if ((zp->z_pflags & ZFS_IMMUTABLE) && 2281 ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 2282 ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 2283 ZFS_EXIT(zfsvfs); 2284 return (SET_ERROR(EPERM)); 2285 } 2286 2287 /* 2288 * Note: ZFS_READONLY is handled in zfs_zaccess_common. 2289 */ 2290 2291 /* 2292 * Verify timestamps doesn't overflow 32 bits. 2293 * ZFS can handle large timestamps, but 32bit syscalls can't 2294 * handle times greater than 2039. This check should be removed 2295 * once large timestamps are fully supported. 2296 */ 2297 if (mask & (AT_ATIME | AT_MTIME)) { 2298 if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 2299 ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 2300 ZFS_EXIT(zfsvfs); 2301 return (SET_ERROR(EOVERFLOW)); 2302 } 2303 } 2304 if (xoap != NULL && (mask & AT_XVATTR)) { 2305 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME) && 2306 TIMESPEC_OVERFLOW(&vap->va_birthtime)) { 2307 ZFS_EXIT(zfsvfs); 2308 return (SET_ERROR(EOVERFLOW)); 2309 } 2310 2311 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) { 2312 if (!dmu_objset_projectquota_enabled(os) || 2313 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode))) { 2314 ZFS_EXIT(zfsvfs); 2315 return (SET_ERROR(EOPNOTSUPP)); 2316 } 2317 2318 projid = xoap->xoa_projid; 2319 if (unlikely(projid == ZFS_INVALID_PROJID)) { 2320 ZFS_EXIT(zfsvfs); 2321 return (SET_ERROR(EINVAL)); 2322 } 2323 2324 if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID) 2325 projid = ZFS_INVALID_PROJID; 2326 else 2327 need_policy = TRUE; 2328 } 2329 2330 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) && 2331 (xoap->xoa_projinherit != 2332 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) && 2333 (!dmu_objset_projectquota_enabled(os) || 2334 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode)))) { 2335 ZFS_EXIT(zfsvfs); 2336 return (SET_ERROR(EOPNOTSUPP)); 2337 } 2338 } 2339 2340 attrzp = NULL; 2341 aclp = NULL; 2342 2343 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2344 ZFS_EXIT(zfsvfs); 2345 return (SET_ERROR(EROFS)); 2346 } 2347 2348 /* 2349 * First validate permissions 2350 */ 2351 2352 if (mask & AT_SIZE) { 2353 /* 2354 * XXX - Note, we are not providing any open 2355 * mode flags here (like FNDELAY), so we may 2356 * block if there are locks present... this 2357 * should be addressed in openat(). 2358 */ 2359 /* XXX - would it be OK to generate a log record here? */ 2360 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 2361 if (err) { 2362 ZFS_EXIT(zfsvfs); 2363 return (err); 2364 } 2365 } 2366 2367 if (mask & (AT_ATIME|AT_MTIME) || 2368 ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 2369 XVA_ISSET_REQ(xvap, XAT_READONLY) || 2370 XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 2371 XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 2372 XVA_ISSET_REQ(xvap, XAT_SPARSE) || 2373 XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 2374 XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 2375 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 2376 skipaclchk, cr); 2377 } 2378 2379 if (mask & (AT_UID|AT_GID)) { 2380 int idmask = (mask & (AT_UID|AT_GID)); 2381 int take_owner; 2382 int take_group; 2383 2384 /* 2385 * NOTE: even if a new mode is being set, 2386 * we may clear S_ISUID/S_ISGID bits. 2387 */ 2388 2389 if (!(mask & AT_MODE)) 2390 vap->va_mode = zp->z_mode; 2391 2392 /* 2393 * Take ownership or chgrp to group we are a member of 2394 */ 2395 2396 take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 2397 take_group = (mask & AT_GID) && 2398 zfs_groupmember(zfsvfs, vap->va_gid, cr); 2399 2400 /* 2401 * If both AT_UID and AT_GID are set then take_owner and 2402 * take_group must both be set in order to allow taking 2403 * ownership. 2404 * 2405 * Otherwise, send the check through secpolicy_vnode_setattr() 2406 * 2407 */ 2408 2409 if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 2410 ((idmask == AT_UID) && take_owner) || 2411 ((idmask == AT_GID) && take_group)) { 2412 if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 2413 skipaclchk, cr) == 0) { 2414 /* 2415 * Remove setuid/setgid for non-privileged users 2416 */ 2417 secpolicy_setid_clear(vap, vp, cr); 2418 trim_mask = (mask & (AT_UID|AT_GID)); 2419 } else { 2420 need_policy = TRUE; 2421 } 2422 } else { 2423 need_policy = TRUE; 2424 } 2425 } 2426 2427 oldva.va_mode = zp->z_mode; 2428 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 2429 if (mask & AT_XVATTR) { 2430 /* 2431 * Update xvattr mask to include only those attributes 2432 * that are actually changing. 2433 * 2434 * the bits will be restored prior to actually setting 2435 * the attributes so the caller thinks they were set. 2436 */ 2437 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2438 if (xoap->xoa_appendonly != 2439 ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 2440 need_policy = TRUE; 2441 } else { 2442 XVA_CLR_REQ(xvap, XAT_APPENDONLY); 2443 XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 2444 } 2445 } 2446 2447 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) { 2448 if (xoap->xoa_projinherit != 2449 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) { 2450 need_policy = TRUE; 2451 } else { 2452 XVA_CLR_REQ(xvap, XAT_PROJINHERIT); 2453 XVA_SET_REQ(&tmpxvattr, XAT_PROJINHERIT); 2454 } 2455 } 2456 2457 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2458 if (xoap->xoa_nounlink != 2459 ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 2460 need_policy = TRUE; 2461 } else { 2462 XVA_CLR_REQ(xvap, XAT_NOUNLINK); 2463 XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 2464 } 2465 } 2466 2467 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2468 if (xoap->xoa_immutable != 2469 ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 2470 need_policy = TRUE; 2471 } else { 2472 XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 2473 XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 2474 } 2475 } 2476 2477 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2478 if (xoap->xoa_nodump != 2479 ((zp->z_pflags & ZFS_NODUMP) != 0)) { 2480 need_policy = TRUE; 2481 } else { 2482 XVA_CLR_REQ(xvap, XAT_NODUMP); 2483 XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 2484 } 2485 } 2486 2487 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2488 if (xoap->xoa_av_modified != 2489 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 2490 need_policy = TRUE; 2491 } else { 2492 XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 2493 XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 2494 } 2495 } 2496 2497 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2498 if ((vp->v_type != VREG && 2499 xoap->xoa_av_quarantined) || 2500 xoap->xoa_av_quarantined != 2501 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 2502 need_policy = TRUE; 2503 } else { 2504 XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 2505 XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 2506 } 2507 } 2508 2509 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2510 ZFS_EXIT(zfsvfs); 2511 return (SET_ERROR(EPERM)); 2512 } 2513 2514 if (need_policy == FALSE && 2515 (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 2516 XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 2517 need_policy = TRUE; 2518 } 2519 } 2520 2521 if (mask & AT_MODE) { 2522 if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 2523 err = secpolicy_setid_setsticky_clear(vp, vap, 2524 &oldva, cr); 2525 if (err) { 2526 ZFS_EXIT(zfsvfs); 2527 return (err); 2528 } 2529 trim_mask |= AT_MODE; 2530 } else { 2531 need_policy = TRUE; 2532 } 2533 } 2534 2535 if (need_policy) { 2536 /* 2537 * If trim_mask is set then take ownership 2538 * has been granted or write_acl is present and user 2539 * has the ability to modify mode. In that case remove 2540 * UID|GID and or MODE from mask so that 2541 * secpolicy_vnode_setattr() doesn't revoke it. 2542 */ 2543 2544 if (trim_mask) { 2545 saved_mask = vap->va_mask; 2546 vap->va_mask &= ~trim_mask; 2547 if (trim_mask & AT_MODE) { 2548 /* 2549 * Save the mode, as secpolicy_vnode_setattr() 2550 * will overwrite it with ova.va_mode. 2551 */ 2552 saved_mode = vap->va_mode; 2553 } 2554 } 2555 err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 2556 (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 2557 if (err) { 2558 ZFS_EXIT(zfsvfs); 2559 return (err); 2560 } 2561 2562 if (trim_mask) { 2563 vap->va_mask |= saved_mask; 2564 if (trim_mask & AT_MODE) { 2565 /* 2566 * Recover the mode after 2567 * secpolicy_vnode_setattr(). 2568 */ 2569 vap->va_mode = saved_mode; 2570 } 2571 } 2572 } 2573 2574 /* 2575 * secpolicy_vnode_setattr, or take ownership may have 2576 * changed va_mask 2577 */ 2578 mask = vap->va_mask; 2579 2580 if ((mask & (AT_UID | AT_GID)) || projid != ZFS_INVALID_PROJID) { 2581 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 2582 &xattr_obj, sizeof (xattr_obj)); 2583 2584 if (err == 0 && xattr_obj) { 2585 err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 2586 if (err == 0) { 2587 err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE); 2588 if (err != 0) 2589 vrele(ZTOV(attrzp)); 2590 } 2591 if (err) 2592 goto out2; 2593 } 2594 if (mask & AT_UID) { 2595 new_uid = zfs_fuid_create(zfsvfs, 2596 (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 2597 if (new_uid != zp->z_uid && 2598 zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT, 2599 new_uid)) { 2600 if (attrzp) 2601 vput(ZTOV(attrzp)); 2602 err = SET_ERROR(EDQUOT); 2603 goto out2; 2604 } 2605 } 2606 2607 if (mask & AT_GID) { 2608 new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 2609 cr, ZFS_GROUP, &fuidp); 2610 if (new_gid != zp->z_gid && 2611 zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT, 2612 new_gid)) { 2613 if (attrzp) 2614 vput(ZTOV(attrzp)); 2615 err = SET_ERROR(EDQUOT); 2616 goto out2; 2617 } 2618 } 2619 2620 if (projid != ZFS_INVALID_PROJID && 2621 zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) { 2622 if (attrzp) 2623 vput(ZTOV(attrzp)); 2624 err = SET_ERROR(EDQUOT); 2625 goto out2; 2626 } 2627 } 2628 tx = dmu_tx_create(os); 2629 2630 if (mask & AT_MODE) { 2631 uint64_t pmode = zp->z_mode; 2632 uint64_t acl_obj; 2633 new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 2634 2635 if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 2636 !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 2637 err = SET_ERROR(EPERM); 2638 goto out; 2639 } 2640 2641 if ((err = zfs_acl_chmod_setattr(zp, &aclp, new_mode))) 2642 goto out; 2643 2644 if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 2645 /* 2646 * Are we upgrading ACL from old V0 format 2647 * to V1 format? 2648 */ 2649 if (zfsvfs->z_version >= ZPL_VERSION_FUID && 2650 zfs_znode_acl_version(zp) == 2651 ZFS_ACL_VERSION_INITIAL) { 2652 dmu_tx_hold_free(tx, acl_obj, 0, 2653 DMU_OBJECT_END); 2654 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 2655 0, aclp->z_acl_bytes); 2656 } else { 2657 dmu_tx_hold_write(tx, acl_obj, 0, 2658 aclp->z_acl_bytes); 2659 } 2660 } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2661 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 2662 0, aclp->z_acl_bytes); 2663 } 2664 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 2665 } else { 2666 if (((mask & AT_XVATTR) && 2667 XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) || 2668 (projid != ZFS_INVALID_PROJID && 2669 !(zp->z_pflags & ZFS_PROJID))) 2670 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 2671 else 2672 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2673 } 2674 2675 if (attrzp) { 2676 dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 2677 } 2678 2679 fuid_dirtied = zfsvfs->z_fuid_dirty; 2680 if (fuid_dirtied) 2681 zfs_fuid_txhold(zfsvfs, tx); 2682 2683 zfs_sa_upgrade_txholds(tx, zp); 2684 2685 err = dmu_tx_assign(tx, TXG_WAIT); 2686 if (err) 2687 goto out; 2688 2689 count = 0; 2690 /* 2691 * Set each attribute requested. 2692 * We group settings according to the locks they need to acquire. 2693 * 2694 * Note: you cannot set ctime directly, although it will be 2695 * updated as a side-effect of calling this function. 2696 */ 2697 2698 if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) { 2699 /* 2700 * For the existed object that is upgraded from old system, 2701 * its on-disk layout has no slot for the project ID attribute. 2702 * But quota accounting logic needs to access related slots by 2703 * offset directly. So we need to adjust old objects' layout 2704 * to make the project ID to some unified and fixed offset. 2705 */ 2706 if (attrzp) 2707 err = sa_add_projid(attrzp->z_sa_hdl, tx, projid); 2708 if (err == 0) 2709 err = sa_add_projid(zp->z_sa_hdl, tx, projid); 2710 2711 if (unlikely(err == EEXIST)) 2712 err = 0; 2713 else if (err != 0) 2714 goto out; 2715 else 2716 projid = ZFS_INVALID_PROJID; 2717 } 2718 2719 if (mask & (AT_UID|AT_GID|AT_MODE)) 2720 mutex_enter(&zp->z_acl_lock); 2721 2722 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 2723 &zp->z_pflags, sizeof (zp->z_pflags)); 2724 2725 if (attrzp) { 2726 if (mask & (AT_UID|AT_GID|AT_MODE)) 2727 mutex_enter(&attrzp->z_acl_lock); 2728 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2729 SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 2730 sizeof (attrzp->z_pflags)); 2731 if (projid != ZFS_INVALID_PROJID) { 2732 attrzp->z_projid = projid; 2733 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2734 SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid, 2735 sizeof (attrzp->z_projid)); 2736 } 2737 } 2738 2739 if (mask & (AT_UID|AT_GID)) { 2740 2741 if (mask & AT_UID) { 2742 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 2743 &new_uid, sizeof (new_uid)); 2744 zp->z_uid = new_uid; 2745 if (attrzp) { 2746 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2747 SA_ZPL_UID(zfsvfs), NULL, &new_uid, 2748 sizeof (new_uid)); 2749 attrzp->z_uid = new_uid; 2750 } 2751 } 2752 2753 if (mask & AT_GID) { 2754 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 2755 NULL, &new_gid, sizeof (new_gid)); 2756 zp->z_gid = new_gid; 2757 if (attrzp) { 2758 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2759 SA_ZPL_GID(zfsvfs), NULL, &new_gid, 2760 sizeof (new_gid)); 2761 attrzp->z_gid = new_gid; 2762 } 2763 } 2764 if (!(mask & AT_MODE)) { 2765 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 2766 NULL, &new_mode, sizeof (new_mode)); 2767 new_mode = zp->z_mode; 2768 } 2769 err = zfs_acl_chown_setattr(zp); 2770 ASSERT0(err); 2771 if (attrzp) { 2772 vn_seqc_write_begin(ZTOV(attrzp)); 2773 err = zfs_acl_chown_setattr(attrzp); 2774 vn_seqc_write_end(ZTOV(attrzp)); 2775 ASSERT0(err); 2776 } 2777 } 2778 2779 if (mask & AT_MODE) { 2780 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 2781 &new_mode, sizeof (new_mode)); 2782 zp->z_mode = new_mode; 2783 ASSERT3P(aclp, !=, NULL); 2784 err = zfs_aclset_common(zp, aclp, cr, tx); 2785 ASSERT0(err); 2786 if (zp->z_acl_cached) 2787 zfs_acl_free(zp->z_acl_cached); 2788 zp->z_acl_cached = aclp; 2789 aclp = NULL; 2790 } 2791 2792 2793 if (mask & AT_ATIME) { 2794 ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 2795 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 2796 &zp->z_atime, sizeof (zp->z_atime)); 2797 } 2798 2799 if (mask & AT_MTIME) { 2800 ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 2801 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 2802 mtime, sizeof (mtime)); 2803 } 2804 2805 if (projid != ZFS_INVALID_PROJID) { 2806 zp->z_projid = projid; 2807 SA_ADD_BULK_ATTR(bulk, count, 2808 SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid, 2809 sizeof (zp->z_projid)); 2810 } 2811 2812 /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 2813 if (mask & AT_SIZE && !(mask & AT_MTIME)) { 2814 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 2815 NULL, mtime, sizeof (mtime)); 2816 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 2817 &ctime, sizeof (ctime)); 2818 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); 2819 } else if (mask != 0) { 2820 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 2821 &ctime, sizeof (ctime)); 2822 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime); 2823 if (attrzp) { 2824 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2825 SA_ZPL_CTIME(zfsvfs), NULL, 2826 &ctime, sizeof (ctime)); 2827 zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 2828 mtime, ctime); 2829 } 2830 } 2831 2832 /* 2833 * Do this after setting timestamps to prevent timestamp 2834 * update from toggling bit 2835 */ 2836 2837 if (xoap && (mask & AT_XVATTR)) { 2838 2839 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) 2840 xoap->xoa_createtime = vap->va_birthtime; 2841 /* 2842 * restore trimmed off masks 2843 * so that return masks can be set for caller. 2844 */ 2845 2846 if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 2847 XVA_SET_REQ(xvap, XAT_APPENDONLY); 2848 } 2849 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 2850 XVA_SET_REQ(xvap, XAT_NOUNLINK); 2851 } 2852 if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 2853 XVA_SET_REQ(xvap, XAT_IMMUTABLE); 2854 } 2855 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 2856 XVA_SET_REQ(xvap, XAT_NODUMP); 2857 } 2858 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 2859 XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 2860 } 2861 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 2862 XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 2863 } 2864 if (XVA_ISSET_REQ(&tmpxvattr, XAT_PROJINHERIT)) { 2865 XVA_SET_REQ(xvap, XAT_PROJINHERIT); 2866 } 2867 2868 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 2869 ASSERT3S(vp->v_type, ==, VREG); 2870 2871 zfs_xvattr_set(zp, xvap, tx); 2872 } 2873 2874 if (fuid_dirtied) 2875 zfs_fuid_sync(zfsvfs, tx); 2876 2877 if (mask != 0) 2878 zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 2879 2880 if (mask & (AT_UID|AT_GID|AT_MODE)) 2881 mutex_exit(&zp->z_acl_lock); 2882 2883 if (attrzp) { 2884 if (mask & (AT_UID|AT_GID|AT_MODE)) 2885 mutex_exit(&attrzp->z_acl_lock); 2886 } 2887 out: 2888 if (err == 0 && attrzp) { 2889 err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 2890 xattr_count, tx); 2891 ASSERT0(err2); 2892 } 2893 2894 if (attrzp) 2895 vput(ZTOV(attrzp)); 2896 2897 if (aclp) 2898 zfs_acl_free(aclp); 2899 2900 if (fuidp) { 2901 zfs_fuid_info_free(fuidp); 2902 fuidp = NULL; 2903 } 2904 2905 if (err) { 2906 dmu_tx_abort(tx); 2907 } else { 2908 err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 2909 dmu_tx_commit(tx); 2910 } 2911 2912 out2: 2913 if (os->os_sync == ZFS_SYNC_ALWAYS) 2914 zil_commit(zilog, 0); 2915 2916 ZFS_EXIT(zfsvfs); 2917 return (err); 2918 } 2919 2920 /* 2921 * Look up the directory entries corresponding to the source and target 2922 * directory/name pairs. 2923 */ 2924 static int 2925 zfs_rename_relock_lookup(znode_t *sdzp, const struct componentname *scnp, 2926 znode_t **szpp, znode_t *tdzp, const struct componentname *tcnp, 2927 znode_t **tzpp) 2928 { 2929 zfsvfs_t *zfsvfs; 2930 znode_t *szp, *tzp; 2931 int error; 2932 2933 /* 2934 * Before using sdzp and tdzp we must ensure that they are live. 2935 * As a porting legacy from illumos we have two things to worry 2936 * about. One is typical for FreeBSD and it is that the vnode is 2937 * not reclaimed (doomed). The other is that the znode is live. 2938 * The current code can invalidate the znode without acquiring the 2939 * corresponding vnode lock if the object represented by the znode 2940 * and vnode is no longer valid after a rollback or receive operation. 2941 * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock 2942 * that protects the znodes from the invalidation. 2943 */ 2944 zfsvfs = sdzp->z_zfsvfs; 2945 ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs); 2946 ZFS_ENTER(zfsvfs); 2947 ZFS_VERIFY_ZP(sdzp); 2948 ZFS_VERIFY_ZP(tdzp); 2949 2950 /* 2951 * Re-resolve svp to be certain it still exists and fetch the 2952 * correct vnode. 2953 */ 2954 error = zfs_dirent_lookup(sdzp, scnp->cn_nameptr, &szp, ZEXISTS); 2955 if (error != 0) { 2956 /* Source entry invalid or not there. */ 2957 if ((scnp->cn_flags & ISDOTDOT) != 0 || 2958 (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.')) 2959 error = SET_ERROR(EINVAL); 2960 goto out; 2961 } 2962 *szpp = szp; 2963 2964 /* 2965 * Re-resolve tvp, if it disappeared we just carry on. 2966 */ 2967 error = zfs_dirent_lookup(tdzp, tcnp->cn_nameptr, &tzp, 0); 2968 if (error != 0) { 2969 vrele(ZTOV(szp)); 2970 if ((tcnp->cn_flags & ISDOTDOT) != 0) 2971 error = SET_ERROR(EINVAL); 2972 goto out; 2973 } 2974 *tzpp = tzp; 2975 out: 2976 ZFS_EXIT(zfsvfs); 2977 return (error); 2978 } 2979 2980 /* 2981 * We acquire all but fdvp locks using non-blocking acquisitions. If we 2982 * fail to acquire any lock in the path we will drop all held locks, 2983 * acquire the new lock in a blocking fashion, and then release it and 2984 * restart the rename. This acquire/release step ensures that we do not 2985 * spin on a lock waiting for release. On error release all vnode locks 2986 * and decrement references the way tmpfs_rename() would do. 2987 */ 2988 static int 2989 zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp, 2990 struct vnode *tdvp, struct vnode **tvpp, 2991 const struct componentname *scnp, const struct componentname *tcnp) 2992 { 2993 struct vnode *nvp, *svp, *tvp; 2994 znode_t *sdzp, *tdzp, *szp, *tzp; 2995 int error; 2996 2997 VOP_UNLOCK1(tdvp); 2998 if (*tvpp != NULL && *tvpp != tdvp) 2999 VOP_UNLOCK1(*tvpp); 3000 3001 relock: 3002 error = vn_lock(sdvp, LK_EXCLUSIVE); 3003 if (error) 3004 goto out; 3005 error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT); 3006 if (error != 0) { 3007 VOP_UNLOCK1(sdvp); 3008 if (error != EBUSY) 3009 goto out; 3010 error = vn_lock(tdvp, LK_EXCLUSIVE); 3011 if (error) 3012 goto out; 3013 VOP_UNLOCK1(tdvp); 3014 goto relock; 3015 } 3016 tdzp = VTOZ(tdvp); 3017 sdzp = VTOZ(sdvp); 3018 3019 error = zfs_rename_relock_lookup(sdzp, scnp, &szp, tdzp, tcnp, &tzp); 3020 if (error != 0) { 3021 VOP_UNLOCK1(sdvp); 3022 VOP_UNLOCK1(tdvp); 3023 goto out; 3024 } 3025 svp = ZTOV(szp); 3026 tvp = tzp != NULL ? ZTOV(tzp) : NULL; 3027 3028 /* 3029 * Now try acquire locks on svp and tvp. 3030 */ 3031 nvp = svp; 3032 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 3033 if (error != 0) { 3034 VOP_UNLOCK1(sdvp); 3035 VOP_UNLOCK1(tdvp); 3036 if (tvp != NULL) 3037 vrele(tvp); 3038 if (error != EBUSY) { 3039 vrele(nvp); 3040 goto out; 3041 } 3042 error = vn_lock(nvp, LK_EXCLUSIVE); 3043 if (error != 0) { 3044 vrele(nvp); 3045 goto out; 3046 } 3047 VOP_UNLOCK1(nvp); 3048 /* 3049 * Concurrent rename race. 3050 * XXX ? 3051 */ 3052 if (nvp == tdvp) { 3053 vrele(nvp); 3054 error = SET_ERROR(EINVAL); 3055 goto out; 3056 } 3057 vrele(*svpp); 3058 *svpp = nvp; 3059 goto relock; 3060 } 3061 vrele(*svpp); 3062 *svpp = nvp; 3063 3064 if (*tvpp != NULL) 3065 vrele(*tvpp); 3066 *tvpp = NULL; 3067 if (tvp != NULL) { 3068 nvp = tvp; 3069 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 3070 if (error != 0) { 3071 VOP_UNLOCK1(sdvp); 3072 VOP_UNLOCK1(tdvp); 3073 VOP_UNLOCK1(*svpp); 3074 if (error != EBUSY) { 3075 vrele(nvp); 3076 goto out; 3077 } 3078 error = vn_lock(nvp, LK_EXCLUSIVE); 3079 if (error != 0) { 3080 vrele(nvp); 3081 goto out; 3082 } 3083 vput(nvp); 3084 goto relock; 3085 } 3086 *tvpp = nvp; 3087 } 3088 3089 return (0); 3090 3091 out: 3092 return (error); 3093 } 3094 3095 /* 3096 * Note that we must use VRELE_ASYNC in this function as it walks 3097 * up the directory tree and vrele may need to acquire an exclusive 3098 * lock if a last reference to a vnode is dropped. 3099 */ 3100 static int 3101 zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp) 3102 { 3103 zfsvfs_t *zfsvfs; 3104 znode_t *zp, *zp1; 3105 uint64_t parent; 3106 int error; 3107 3108 zfsvfs = tdzp->z_zfsvfs; 3109 if (tdzp == szp) 3110 return (SET_ERROR(EINVAL)); 3111 if (tdzp == sdzp) 3112 return (0); 3113 if (tdzp->z_id == zfsvfs->z_root) 3114 return (0); 3115 zp = tdzp; 3116 for (;;) { 3117 ASSERT(!zp->z_unlinked); 3118 if ((error = sa_lookup(zp->z_sa_hdl, 3119 SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) 3120 break; 3121 3122 if (parent == szp->z_id) { 3123 error = SET_ERROR(EINVAL); 3124 break; 3125 } 3126 if (parent == zfsvfs->z_root) 3127 break; 3128 if (parent == sdzp->z_id) 3129 break; 3130 3131 error = zfs_zget(zfsvfs, parent, &zp1); 3132 if (error != 0) 3133 break; 3134 3135 if (zp != tdzp) 3136 VN_RELE_ASYNC(ZTOV(zp), 3137 dsl_pool_zrele_taskq( 3138 dmu_objset_pool(zfsvfs->z_os))); 3139 zp = zp1; 3140 } 3141 3142 if (error == ENOTDIR) 3143 panic("checkpath: .. not a directory\n"); 3144 if (zp != tdzp) 3145 VN_RELE_ASYNC(ZTOV(zp), 3146 dsl_pool_zrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 3147 return (error); 3148 } 3149 3150 #if __FreeBSD_version < 1300124 3151 static void 3152 cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp, 3153 struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp) 3154 { 3155 3156 cache_purge(fvp); 3157 if (tvp != NULL) 3158 cache_purge(tvp); 3159 cache_purge_negative(tdvp); 3160 } 3161 #endif 3162 3163 static int 3164 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, 3165 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, 3166 cred_t *cr); 3167 3168 /* 3169 * Move an entry from the provided source directory to the target 3170 * directory. Change the entry name as indicated. 3171 * 3172 * IN: sdvp - Source directory containing the "old entry". 3173 * scnp - Old entry name. 3174 * tdvp - Target directory to contain the "new entry". 3175 * tcnp - New entry name. 3176 * cr - credentials of caller. 3177 * INOUT: svpp - Source file 3178 * tvpp - Target file, may point to NULL initially 3179 * 3180 * RETURN: 0 on success, error code on failure. 3181 * 3182 * Timestamps: 3183 * sdvp,tdvp - ctime|mtime updated 3184 */ 3185 static int 3186 zfs_do_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, 3187 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, 3188 cred_t *cr) 3189 { 3190 int error; 3191 3192 ASSERT_VOP_ELOCKED(tdvp, __func__); 3193 if (*tvpp != NULL) 3194 ASSERT_VOP_ELOCKED(*tvpp, __func__); 3195 3196 /* Reject renames across filesystems. */ 3197 if ((*svpp)->v_mount != tdvp->v_mount || 3198 ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) { 3199 error = SET_ERROR(EXDEV); 3200 goto out; 3201 } 3202 3203 if (zfsctl_is_node(tdvp)) { 3204 error = SET_ERROR(EXDEV); 3205 goto out; 3206 } 3207 3208 /* 3209 * Lock all four vnodes to ensure safety and semantics of renaming. 3210 */ 3211 error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp); 3212 if (error != 0) { 3213 /* no vnodes are locked in the case of error here */ 3214 return (error); 3215 } 3216 3217 error = zfs_do_rename_impl(sdvp, svpp, scnp, tdvp, tvpp, tcnp, cr); 3218 VOP_UNLOCK1(sdvp); 3219 VOP_UNLOCK1(*svpp); 3220 out: 3221 if (*tvpp != NULL) 3222 VOP_UNLOCK1(*tvpp); 3223 if (tdvp != *tvpp) 3224 VOP_UNLOCK1(tdvp); 3225 3226 return (error); 3227 } 3228 3229 static int 3230 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, 3231 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, 3232 cred_t *cr) 3233 { 3234 dmu_tx_t *tx; 3235 zfsvfs_t *zfsvfs; 3236 zilog_t *zilog; 3237 znode_t *tdzp, *sdzp, *tzp, *szp; 3238 const char *snm = scnp->cn_nameptr; 3239 const char *tnm = tcnp->cn_nameptr; 3240 int error; 3241 3242 tdzp = VTOZ(tdvp); 3243 sdzp = VTOZ(sdvp); 3244 zfsvfs = tdzp->z_zfsvfs; 3245 3246 ZFS_ENTER(zfsvfs); 3247 ZFS_VERIFY_ZP(tdzp); 3248 ZFS_VERIFY_ZP(sdzp); 3249 zilog = zfsvfs->z_log; 3250 3251 if (zfsvfs->z_utf8 && u8_validate(tnm, 3252 strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3253 error = SET_ERROR(EILSEQ); 3254 goto out; 3255 } 3256 3257 /* If source and target are the same file, there is nothing to do. */ 3258 if ((*svpp) == (*tvpp)) { 3259 error = 0; 3260 goto out; 3261 } 3262 3263 if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) || 3264 ((*tvpp) != NULL && (*tvpp)->v_type == VDIR && 3265 (*tvpp)->v_mountedhere != NULL)) { 3266 error = SET_ERROR(EXDEV); 3267 goto out; 3268 } 3269 3270 szp = VTOZ(*svpp); 3271 ZFS_VERIFY_ZP(szp); 3272 tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp); 3273 if (tzp != NULL) 3274 ZFS_VERIFY_ZP(tzp); 3275 3276 /* 3277 * This is to prevent the creation of links into attribute space 3278 * by renaming a linked file into/outof an attribute directory. 3279 * See the comment in zfs_link() for why this is considered bad. 3280 */ 3281 if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3282 error = SET_ERROR(EINVAL); 3283 goto out; 3284 } 3285 3286 /* 3287 * If we are using project inheritance, means if the directory has 3288 * ZFS_PROJINHERIT set, then its descendant directories will inherit 3289 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under 3290 * such case, we only allow renames into our tree when the project 3291 * IDs are the same. 3292 */ 3293 if (tdzp->z_pflags & ZFS_PROJINHERIT && 3294 tdzp->z_projid != szp->z_projid) { 3295 error = SET_ERROR(EXDEV); 3296 goto out; 3297 } 3298 3299 /* 3300 * Must have write access at the source to remove the old entry 3301 * and write access at the target to create the new entry. 3302 * Note that if target and source are the same, this can be 3303 * done in a single check. 3304 */ 3305 if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr))) 3306 goto out; 3307 3308 if ((*svpp)->v_type == VDIR) { 3309 /* 3310 * Avoid ".", "..", and aliases of "." for obvious reasons. 3311 */ 3312 if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') || 3313 sdzp == szp || 3314 (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { 3315 error = EINVAL; 3316 goto out; 3317 } 3318 3319 /* 3320 * Check to make sure rename is valid. 3321 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3322 */ 3323 if ((error = zfs_rename_check(szp, sdzp, tdzp))) 3324 goto out; 3325 } 3326 3327 /* 3328 * Does target exist? 3329 */ 3330 if (tzp) { 3331 /* 3332 * Source and target must be the same type. 3333 */ 3334 if ((*svpp)->v_type == VDIR) { 3335 if ((*tvpp)->v_type != VDIR) { 3336 error = SET_ERROR(ENOTDIR); 3337 goto out; 3338 } else { 3339 cache_purge(tdvp); 3340 if (sdvp != tdvp) 3341 cache_purge(sdvp); 3342 } 3343 } else { 3344 if ((*tvpp)->v_type == VDIR) { 3345 error = SET_ERROR(EISDIR); 3346 goto out; 3347 } 3348 } 3349 } 3350 3351 vn_seqc_write_begin(*svpp); 3352 vn_seqc_write_begin(sdvp); 3353 if (*tvpp != NULL) 3354 vn_seqc_write_begin(*tvpp); 3355 if (tdvp != *tvpp) 3356 vn_seqc_write_begin(tdvp); 3357 3358 vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct); 3359 if (tzp) 3360 vnevent_rename_dest(*tvpp, tdvp, tnm, ct); 3361 3362 /* 3363 * notify the target directory if it is not the same 3364 * as source directory. 3365 */ 3366 if (tdvp != sdvp) { 3367 vnevent_rename_dest_dir(tdvp, ct); 3368 } 3369 3370 tx = dmu_tx_create(zfsvfs->z_os); 3371 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3372 dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3373 dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3374 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3375 if (sdzp != tdzp) { 3376 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 3377 zfs_sa_upgrade_txholds(tx, tdzp); 3378 } 3379 if (tzp) { 3380 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 3381 zfs_sa_upgrade_txholds(tx, tzp); 3382 } 3383 3384 zfs_sa_upgrade_txholds(tx, szp); 3385 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3386 error = dmu_tx_assign(tx, TXG_WAIT); 3387 if (error) { 3388 dmu_tx_abort(tx); 3389 goto out_seq; 3390 } 3391 3392 if (tzp) /* Attempt to remove the existing target */ 3393 error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL); 3394 3395 if (error == 0) { 3396 error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING); 3397 if (error == 0) { 3398 szp->z_pflags |= ZFS_AV_MODIFIED; 3399 3400 error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 3401 (void *)&szp->z_pflags, sizeof (uint64_t), tx); 3402 ASSERT0(error); 3403 3404 error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING, 3405 NULL); 3406 if (error == 0) { 3407 zfs_log_rename(zilog, tx, TX_RENAME, sdzp, 3408 snm, tdzp, tnm, szp); 3409 3410 /* 3411 * Update path information for the target vnode 3412 */ 3413 vn_renamepath(tdvp, *svpp, tnm, strlen(tnm)); 3414 } else { 3415 /* 3416 * At this point, we have successfully created 3417 * the target name, but have failed to remove 3418 * the source name. Since the create was done 3419 * with the ZRENAMING flag, there are 3420 * complications; for one, the link count is 3421 * wrong. The easiest way to deal with this 3422 * is to remove the newly created target, and 3423 * return the original error. This must 3424 * succeed; fortunately, it is very unlikely to 3425 * fail, since we just created it. 3426 */ 3427 VERIFY0(zfs_link_destroy(tdzp, tnm, szp, tx, 3428 ZRENAMING, NULL)); 3429 } 3430 } 3431 if (error == 0) { 3432 cache_vop_rename(sdvp, *svpp, tdvp, *tvpp, scnp, tcnp); 3433 } 3434 } 3435 3436 dmu_tx_commit(tx); 3437 3438 out_seq: 3439 vn_seqc_write_end(*svpp); 3440 vn_seqc_write_end(sdvp); 3441 if (*tvpp != NULL) 3442 vn_seqc_write_end(*tvpp); 3443 if (tdvp != *tvpp) 3444 vn_seqc_write_end(tdvp); 3445 3446 out: 3447 if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3448 zil_commit(zilog, 0); 3449 ZFS_EXIT(zfsvfs); 3450 3451 return (error); 3452 } 3453 3454 int 3455 zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname, 3456 cred_t *cr, int flags) 3457 { 3458 struct componentname scn, tcn; 3459 vnode_t *sdvp, *tdvp; 3460 vnode_t *svp, *tvp; 3461 int error; 3462 svp = tvp = NULL; 3463 3464 sdvp = ZTOV(sdzp); 3465 tdvp = ZTOV(tdzp); 3466 error = zfs_lookup_internal(sdzp, sname, &svp, &scn, DELETE); 3467 if (sdzp->z_zfsvfs->z_replay == B_FALSE) 3468 VOP_UNLOCK1(sdvp); 3469 if (error != 0) 3470 goto fail; 3471 VOP_UNLOCK1(svp); 3472 3473 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 3474 error = zfs_lookup_internal(tdzp, tname, &tvp, &tcn, RENAME); 3475 if (error == EJUSTRETURN) 3476 tvp = NULL; 3477 else if (error != 0) { 3478 VOP_UNLOCK1(tdvp); 3479 goto fail; 3480 } 3481 3482 error = zfs_do_rename(sdvp, &svp, &scn, tdvp, &tvp, &tcn, cr); 3483 fail: 3484 if (svp != NULL) 3485 vrele(svp); 3486 if (tvp != NULL) 3487 vrele(tvp); 3488 3489 return (error); 3490 } 3491 3492 /* 3493 * Insert the indicated symbolic reference entry into the directory. 3494 * 3495 * IN: dvp - Directory to contain new symbolic link. 3496 * link - Name for new symlink entry. 3497 * vap - Attributes of new entry. 3498 * cr - credentials of caller. 3499 * ct - caller context 3500 * flags - case flags 3501 * 3502 * RETURN: 0 on success, error code on failure. 3503 * 3504 * Timestamps: 3505 * dvp - ctime|mtime updated 3506 */ 3507 int 3508 zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap, 3509 const char *link, znode_t **zpp, cred_t *cr, int flags) 3510 { 3511 (void) flags; 3512 znode_t *zp; 3513 dmu_tx_t *tx; 3514 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3515 zilog_t *zilog; 3516 uint64_t len = strlen(link); 3517 int error; 3518 zfs_acl_ids_t acl_ids; 3519 boolean_t fuid_dirtied; 3520 uint64_t txtype = TX_SYMLINK; 3521 3522 ASSERT3S(vap->va_type, ==, VLNK); 3523 3524 ZFS_ENTER(zfsvfs); 3525 ZFS_VERIFY_ZP(dzp); 3526 zilog = zfsvfs->z_log; 3527 3528 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 3529 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3530 ZFS_EXIT(zfsvfs); 3531 return (SET_ERROR(EILSEQ)); 3532 } 3533 3534 if (len > MAXPATHLEN) { 3535 ZFS_EXIT(zfsvfs); 3536 return (SET_ERROR(ENAMETOOLONG)); 3537 } 3538 3539 if ((error = zfs_acl_ids_create(dzp, 0, 3540 vap, cr, NULL, &acl_ids)) != 0) { 3541 ZFS_EXIT(zfsvfs); 3542 return (error); 3543 } 3544 3545 /* 3546 * Attempt to lock directory; fail if entry already exists. 3547 */ 3548 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 3549 if (error) { 3550 zfs_acl_ids_free(&acl_ids); 3551 ZFS_EXIT(zfsvfs); 3552 return (error); 3553 } 3554 3555 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) { 3556 zfs_acl_ids_free(&acl_ids); 3557 ZFS_EXIT(zfsvfs); 3558 return (error); 3559 } 3560 3561 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, 3562 0 /* projid */)) { 3563 zfs_acl_ids_free(&acl_ids); 3564 ZFS_EXIT(zfsvfs); 3565 return (SET_ERROR(EDQUOT)); 3566 } 3567 3568 getnewvnode_reserve_(); 3569 tx = dmu_tx_create(zfsvfs->z_os); 3570 fuid_dirtied = zfsvfs->z_fuid_dirty; 3571 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 3572 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 3573 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 3574 ZFS_SA_BASE_ATTR_SIZE + len); 3575 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 3576 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3577 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 3578 acl_ids.z_aclp->z_acl_bytes); 3579 } 3580 if (fuid_dirtied) 3581 zfs_fuid_txhold(zfsvfs, tx); 3582 error = dmu_tx_assign(tx, TXG_WAIT); 3583 if (error) { 3584 zfs_acl_ids_free(&acl_ids); 3585 dmu_tx_abort(tx); 3586 getnewvnode_drop_reserve(); 3587 ZFS_EXIT(zfsvfs); 3588 return (error); 3589 } 3590 3591 /* 3592 * Create a new object for the symlink. 3593 * for version 4 ZPL datasets the symlink will be an SA attribute 3594 */ 3595 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 3596 3597 if (fuid_dirtied) 3598 zfs_fuid_sync(zfsvfs, tx); 3599 3600 if (zp->z_is_sa) 3601 error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 3602 __DECONST(void *, link), len, tx); 3603 else 3604 zfs_sa_symlink(zp, __DECONST(char *, link), len, tx); 3605 3606 zp->z_size = len; 3607 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 3608 &zp->z_size, sizeof (zp->z_size), tx); 3609 /* 3610 * Insert the new object into the directory. 3611 */ 3612 (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 3613 3614 zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 3615 *zpp = zp; 3616 3617 zfs_acl_ids_free(&acl_ids); 3618 3619 dmu_tx_commit(tx); 3620 3621 getnewvnode_drop_reserve(); 3622 3623 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3624 zil_commit(zilog, 0); 3625 3626 ZFS_EXIT(zfsvfs); 3627 return (error); 3628 } 3629 3630 /* 3631 * Return, in the buffer contained in the provided uio structure, 3632 * the symbolic path referred to by vp. 3633 * 3634 * IN: vp - vnode of symbolic link. 3635 * uio - structure to contain the link path. 3636 * cr - credentials of caller. 3637 * ct - caller context 3638 * 3639 * OUT: uio - structure containing the link path. 3640 * 3641 * RETURN: 0 on success, error code on failure. 3642 * 3643 * Timestamps: 3644 * vp - atime updated 3645 */ 3646 static int 3647 zfs_readlink(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, caller_context_t *ct) 3648 { 3649 (void) cr, (void) ct; 3650 znode_t *zp = VTOZ(vp); 3651 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3652 int error; 3653 3654 ZFS_ENTER(zfsvfs); 3655 ZFS_VERIFY_ZP(zp); 3656 3657 if (zp->z_is_sa) 3658 error = sa_lookup_uio(zp->z_sa_hdl, 3659 SA_ZPL_SYMLINK(zfsvfs), uio); 3660 else 3661 error = zfs_sa_readlink(zp, uio); 3662 3663 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 3664 3665 ZFS_EXIT(zfsvfs); 3666 return (error); 3667 } 3668 3669 /* 3670 * Insert a new entry into directory tdvp referencing svp. 3671 * 3672 * IN: tdvp - Directory to contain new entry. 3673 * svp - vnode of new entry. 3674 * name - name of new entry. 3675 * cr - credentials of caller. 3676 * 3677 * RETURN: 0 on success, error code on failure. 3678 * 3679 * Timestamps: 3680 * tdvp - ctime|mtime updated 3681 * svp - ctime updated 3682 */ 3683 int 3684 zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr, 3685 int flags) 3686 { 3687 (void) flags; 3688 znode_t *tzp; 3689 zfsvfs_t *zfsvfs = tdzp->z_zfsvfs; 3690 zilog_t *zilog; 3691 dmu_tx_t *tx; 3692 int error; 3693 uint64_t parent; 3694 uid_t owner; 3695 3696 ASSERT3S(ZTOV(tdzp)->v_type, ==, VDIR); 3697 3698 ZFS_ENTER(zfsvfs); 3699 ZFS_VERIFY_ZP(tdzp); 3700 zilog = zfsvfs->z_log; 3701 3702 /* 3703 * POSIX dictates that we return EPERM here. 3704 * Better choices include ENOTSUP or EISDIR. 3705 */ 3706 if (ZTOV(szp)->v_type == VDIR) { 3707 ZFS_EXIT(zfsvfs); 3708 return (SET_ERROR(EPERM)); 3709 } 3710 3711 ZFS_VERIFY_ZP(szp); 3712 3713 /* 3714 * If we are using project inheritance, means if the directory has 3715 * ZFS_PROJINHERIT set, then its descendant directories will inherit 3716 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under 3717 * such case, we only allow hard link creation in our tree when the 3718 * project IDs are the same. 3719 */ 3720 if (tdzp->z_pflags & ZFS_PROJINHERIT && 3721 tdzp->z_projid != szp->z_projid) { 3722 ZFS_EXIT(zfsvfs); 3723 return (SET_ERROR(EXDEV)); 3724 } 3725 3726 if (szp->z_pflags & (ZFS_APPENDONLY | 3727 ZFS_IMMUTABLE | ZFS_READONLY)) { 3728 ZFS_EXIT(zfsvfs); 3729 return (SET_ERROR(EPERM)); 3730 } 3731 3732 /* Prevent links to .zfs/shares files */ 3733 3734 if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 3735 &parent, sizeof (uint64_t))) != 0) { 3736 ZFS_EXIT(zfsvfs); 3737 return (error); 3738 } 3739 if (parent == zfsvfs->z_shares_dir) { 3740 ZFS_EXIT(zfsvfs); 3741 return (SET_ERROR(EPERM)); 3742 } 3743 3744 if (zfsvfs->z_utf8 && u8_validate(name, 3745 strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3746 ZFS_EXIT(zfsvfs); 3747 return (SET_ERROR(EILSEQ)); 3748 } 3749 3750 /* 3751 * We do not support links between attributes and non-attributes 3752 * because of the potential security risk of creating links 3753 * into "normal" file space in order to circumvent restrictions 3754 * imposed in attribute space. 3755 */ 3756 if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) { 3757 ZFS_EXIT(zfsvfs); 3758 return (SET_ERROR(EINVAL)); 3759 } 3760 3761 3762 owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 3763 if (owner != crgetuid(cr) && secpolicy_basic_link(ZTOV(szp), cr) != 0) { 3764 ZFS_EXIT(zfsvfs); 3765 return (SET_ERROR(EPERM)); 3766 } 3767 3768 if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr))) { 3769 ZFS_EXIT(zfsvfs); 3770 return (error); 3771 } 3772 3773 /* 3774 * Attempt to lock directory; fail if entry already exists. 3775 */ 3776 error = zfs_dirent_lookup(tdzp, name, &tzp, ZNEW); 3777 if (error) { 3778 ZFS_EXIT(zfsvfs); 3779 return (error); 3780 } 3781 3782 tx = dmu_tx_create(zfsvfs->z_os); 3783 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3784 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name); 3785 zfs_sa_upgrade_txholds(tx, szp); 3786 zfs_sa_upgrade_txholds(tx, tdzp); 3787 error = dmu_tx_assign(tx, TXG_WAIT); 3788 if (error) { 3789 dmu_tx_abort(tx); 3790 ZFS_EXIT(zfsvfs); 3791 return (error); 3792 } 3793 3794 error = zfs_link_create(tdzp, name, szp, tx, 0); 3795 3796 if (error == 0) { 3797 uint64_t txtype = TX_LINK; 3798 zfs_log_link(zilog, tx, txtype, tdzp, szp, name); 3799 } 3800 3801 dmu_tx_commit(tx); 3802 3803 if (error == 0) { 3804 vnevent_link(ZTOV(szp), ct); 3805 } 3806 3807 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3808 zil_commit(zilog, 0); 3809 3810 ZFS_EXIT(zfsvfs); 3811 return (error); 3812 } 3813 3814 /* 3815 * Free or allocate space in a file. Currently, this function only 3816 * supports the `F_FREESP' command. However, this command is somewhat 3817 * misnamed, as its functionality includes the ability to allocate as 3818 * well as free space. 3819 * 3820 * IN: ip - inode of file to free data in. 3821 * cmd - action to take (only F_FREESP supported). 3822 * bfp - section of file to free/alloc. 3823 * flag - current file open mode flags. 3824 * offset - current file offset. 3825 * cr - credentials of caller. 3826 * 3827 * RETURN: 0 on success, error code on failure. 3828 * 3829 * Timestamps: 3830 * ip - ctime|mtime updated 3831 */ 3832 int 3833 zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag, 3834 offset_t offset, cred_t *cr) 3835 { 3836 (void) offset; 3837 zfsvfs_t *zfsvfs = ZTOZSB(zp); 3838 uint64_t off, len; 3839 int error; 3840 3841 ZFS_ENTER(zfsvfs); 3842 ZFS_VERIFY_ZP(zp); 3843 3844 if (cmd != F_FREESP) { 3845 ZFS_EXIT(zfsvfs); 3846 return (SET_ERROR(EINVAL)); 3847 } 3848 3849 /* 3850 * Callers might not be able to detect properly that we are read-only, 3851 * so check it explicitly here. 3852 */ 3853 if (zfs_is_readonly(zfsvfs)) { 3854 ZFS_EXIT(zfsvfs); 3855 return (SET_ERROR(EROFS)); 3856 } 3857 3858 if (bfp->l_len < 0) { 3859 ZFS_EXIT(zfsvfs); 3860 return (SET_ERROR(EINVAL)); 3861 } 3862 3863 /* 3864 * Permissions aren't checked on Solaris because on this OS 3865 * zfs_space() can only be called with an opened file handle. 3866 * On Linux we can get here through truncate_range() which 3867 * operates directly on inodes, so we need to check access rights. 3868 */ 3869 if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) { 3870 ZFS_EXIT(zfsvfs); 3871 return (error); 3872 } 3873 3874 off = bfp->l_start; 3875 len = bfp->l_len; /* 0 means from off to end of file */ 3876 3877 error = zfs_freesp(zp, off, len, flag, TRUE); 3878 3879 ZFS_EXIT(zfsvfs); 3880 return (error); 3881 } 3882 3883 static void 3884 zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 3885 { 3886 (void) cr, (void) ct; 3887 znode_t *zp = VTOZ(vp); 3888 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3889 int error; 3890 3891 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs); 3892 if (zp->z_sa_hdl == NULL) { 3893 /* 3894 * The fs has been unmounted, or we did a 3895 * suspend/resume and this file no longer exists. 3896 */ 3897 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 3898 vrecycle(vp); 3899 return; 3900 } 3901 3902 if (zp->z_unlinked) { 3903 /* 3904 * Fast path to recycle a vnode of a removed file. 3905 */ 3906 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 3907 vrecycle(vp); 3908 return; 3909 } 3910 3911 if (zp->z_atime_dirty && zp->z_unlinked == 0) { 3912 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 3913 3914 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3915 zfs_sa_upgrade_txholds(tx, zp); 3916 error = dmu_tx_assign(tx, TXG_WAIT); 3917 if (error) { 3918 dmu_tx_abort(tx); 3919 } else { 3920 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 3921 (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 3922 zp->z_atime_dirty = 0; 3923 dmu_tx_commit(tx); 3924 } 3925 } 3926 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 3927 } 3928 3929 3930 _Static_assert(sizeof (struct zfid_short) <= sizeof (struct fid), 3931 "struct zfid_short bigger than struct fid"); 3932 _Static_assert(sizeof (struct zfid_long) <= sizeof (struct fid), 3933 "struct zfid_long bigger than struct fid"); 3934 3935 static int 3936 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 3937 { 3938 (void) ct; 3939 znode_t *zp = VTOZ(vp); 3940 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3941 uint32_t gen; 3942 uint64_t gen64; 3943 uint64_t object = zp->z_id; 3944 zfid_short_t *zfid; 3945 int size, i, error; 3946 3947 ZFS_ENTER(zfsvfs); 3948 ZFS_VERIFY_ZP(zp); 3949 3950 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 3951 &gen64, sizeof (uint64_t))) != 0) { 3952 ZFS_EXIT(zfsvfs); 3953 return (error); 3954 } 3955 3956 gen = (uint32_t)gen64; 3957 3958 size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 3959 fidp->fid_len = size; 3960 3961 zfid = (zfid_short_t *)fidp; 3962 3963 zfid->zf_len = size; 3964 3965 for (i = 0; i < sizeof (zfid->zf_object); i++) 3966 zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 3967 3968 /* Must have a non-zero generation number to distinguish from .zfs */ 3969 if (gen == 0) 3970 gen = 1; 3971 for (i = 0; i < sizeof (zfid->zf_gen); i++) 3972 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 3973 3974 if (size == LONG_FID_LEN) { 3975 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 3976 zfid_long_t *zlfid; 3977 3978 zlfid = (zfid_long_t *)fidp; 3979 3980 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 3981 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 3982 3983 /* XXX - this should be the generation number for the objset */ 3984 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 3985 zlfid->zf_setgen[i] = 0; 3986 } 3987 3988 ZFS_EXIT(zfsvfs); 3989 return (0); 3990 } 3991 3992 static int 3993 zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 3994 caller_context_t *ct) 3995 { 3996 znode_t *zp; 3997 zfsvfs_t *zfsvfs; 3998 3999 switch (cmd) { 4000 case _PC_LINK_MAX: 4001 *valp = MIN(LONG_MAX, ZFS_LINK_MAX); 4002 return (0); 4003 4004 case _PC_FILESIZEBITS: 4005 *valp = 64; 4006 return (0); 4007 case _PC_MIN_HOLE_SIZE: 4008 *valp = (int)SPA_MINBLOCKSIZE; 4009 return (0); 4010 case _PC_ACL_EXTENDED: 4011 #if 0 /* POSIX ACLs are not implemented for ZFS on FreeBSD yet. */ 4012 zp = VTOZ(vp); 4013 zfsvfs = zp->z_zfsvfs; 4014 ZFS_ENTER(zfsvfs); 4015 ZFS_VERIFY_ZP(zp); 4016 *valp = zfsvfs->z_acl_type == ZFSACLTYPE_POSIX ? 1 : 0; 4017 ZFS_EXIT(zfsvfs); 4018 #else 4019 *valp = 0; 4020 #endif 4021 return (0); 4022 4023 case _PC_ACL_NFS4: 4024 zp = VTOZ(vp); 4025 zfsvfs = zp->z_zfsvfs; 4026 ZFS_ENTER(zfsvfs); 4027 ZFS_VERIFY_ZP(zp); 4028 *valp = zfsvfs->z_acl_type == ZFS_ACLTYPE_NFSV4 ? 1 : 0; 4029 ZFS_EXIT(zfsvfs); 4030 return (0); 4031 4032 case _PC_ACL_PATH_MAX: 4033 *valp = ACL_MAX_ENTRIES; 4034 return (0); 4035 4036 default: 4037 return (EOPNOTSUPP); 4038 } 4039 } 4040 4041 static int 4042 zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, 4043 int *rahead) 4044 { 4045 znode_t *zp = VTOZ(vp); 4046 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4047 zfs_locked_range_t *lr; 4048 vm_object_t object; 4049 off_t start, end, obj_size; 4050 uint_t blksz; 4051 int pgsin_b, pgsin_a; 4052 int error; 4053 4054 ZFS_ENTER(zfsvfs); 4055 ZFS_VERIFY_ZP(zp); 4056 4057 start = IDX_TO_OFF(ma[0]->pindex); 4058 end = IDX_TO_OFF(ma[count - 1]->pindex + 1); 4059 4060 /* 4061 * Lock a range covering all required and optional pages. 4062 * Note that we need to handle the case of the block size growing. 4063 */ 4064 for (;;) { 4065 blksz = zp->z_blksz; 4066 lr = zfs_rangelock_tryenter(&zp->z_rangelock, 4067 rounddown(start, blksz), 4068 roundup(end, blksz) - rounddown(start, blksz), RL_READER); 4069 if (lr == NULL) { 4070 if (rahead != NULL) { 4071 *rahead = 0; 4072 rahead = NULL; 4073 } 4074 if (rbehind != NULL) { 4075 *rbehind = 0; 4076 rbehind = NULL; 4077 } 4078 break; 4079 } 4080 if (blksz == zp->z_blksz) 4081 break; 4082 zfs_rangelock_exit(lr); 4083 } 4084 4085 object = ma[0]->object; 4086 zfs_vmobject_wlock(object); 4087 obj_size = object->un_pager.vnp.vnp_size; 4088 zfs_vmobject_wunlock(object); 4089 if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) { 4090 if (lr != NULL) 4091 zfs_rangelock_exit(lr); 4092 ZFS_EXIT(zfsvfs); 4093 return (zfs_vm_pagerret_bad); 4094 } 4095 4096 pgsin_b = 0; 4097 if (rbehind != NULL) { 4098 pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz)); 4099 pgsin_b = MIN(*rbehind, pgsin_b); 4100 } 4101 4102 pgsin_a = 0; 4103 if (rahead != NULL) { 4104 pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end); 4105 if (end + IDX_TO_OFF(pgsin_a) >= obj_size) 4106 pgsin_a = OFF_TO_IDX(round_page(obj_size) - end); 4107 pgsin_a = MIN(*rahead, pgsin_a); 4108 } 4109 4110 /* 4111 * NB: we need to pass the exact byte size of the data that we expect 4112 * to read after accounting for the file size. This is required because 4113 * ZFS will panic if we request DMU to read beyond the end of the last 4114 * allocated block. 4115 */ 4116 error = dmu_read_pages(zfsvfs->z_os, zp->z_id, ma, count, &pgsin_b, 4117 &pgsin_a, MIN(end, obj_size) - (end - PAGE_SIZE)); 4118 4119 if (lr != NULL) 4120 zfs_rangelock_exit(lr); 4121 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4122 4123 dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, count*PAGE_SIZE); 4124 4125 ZFS_EXIT(zfsvfs); 4126 4127 if (error != 0) 4128 return (zfs_vm_pagerret_error); 4129 4130 VM_CNT_INC(v_vnodein); 4131 VM_CNT_ADD(v_vnodepgsin, count + pgsin_b + pgsin_a); 4132 if (rbehind != NULL) 4133 *rbehind = pgsin_b; 4134 if (rahead != NULL) 4135 *rahead = pgsin_a; 4136 return (zfs_vm_pagerret_ok); 4137 } 4138 4139 #ifndef _SYS_SYSPROTO_H_ 4140 struct vop_getpages_args { 4141 struct vnode *a_vp; 4142 vm_page_t *a_m; 4143 int a_count; 4144 int *a_rbehind; 4145 int *a_rahead; 4146 }; 4147 #endif 4148 4149 static int 4150 zfs_freebsd_getpages(struct vop_getpages_args *ap) 4151 { 4152 4153 return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, 4154 ap->a_rahead)); 4155 } 4156 4157 static int 4158 zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, 4159 int *rtvals) 4160 { 4161 znode_t *zp = VTOZ(vp); 4162 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4163 zfs_locked_range_t *lr; 4164 dmu_tx_t *tx; 4165 struct sf_buf *sf; 4166 vm_object_t object; 4167 vm_page_t m; 4168 caddr_t va; 4169 size_t tocopy; 4170 size_t lo_len; 4171 vm_ooffset_t lo_off; 4172 vm_ooffset_t off; 4173 uint_t blksz; 4174 int ncount; 4175 int pcount; 4176 int err; 4177 int i; 4178 4179 ZFS_ENTER(zfsvfs); 4180 ZFS_VERIFY_ZP(zp); 4181 4182 object = vp->v_object; 4183 pcount = btoc(len); 4184 ncount = pcount; 4185 4186 KASSERT(ma[0]->object == object, ("mismatching object")); 4187 KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length")); 4188 4189 for (i = 0; i < pcount; i++) 4190 rtvals[i] = zfs_vm_pagerret_error; 4191 4192 off = IDX_TO_OFF(ma[0]->pindex); 4193 blksz = zp->z_blksz; 4194 lo_off = rounddown(off, blksz); 4195 lo_len = roundup(len + (off - lo_off), blksz); 4196 lr = zfs_rangelock_enter(&zp->z_rangelock, lo_off, lo_len, RL_WRITER); 4197 4198 zfs_vmobject_wlock(object); 4199 if (len + off > object->un_pager.vnp.vnp_size) { 4200 if (object->un_pager.vnp.vnp_size > off) { 4201 int pgoff; 4202 4203 len = object->un_pager.vnp.vnp_size - off; 4204 ncount = btoc(len); 4205 if ((pgoff = (int)len & PAGE_MASK) != 0) { 4206 /* 4207 * If the object is locked and the following 4208 * conditions hold, then the page's dirty 4209 * field cannot be concurrently changed by a 4210 * pmap operation. 4211 */ 4212 m = ma[ncount - 1]; 4213 vm_page_assert_sbusied(m); 4214 KASSERT(!pmap_page_is_write_mapped(m), 4215 ("zfs_putpages: page %p is not read-only", 4216 m)); 4217 vm_page_clear_dirty(m, pgoff, PAGE_SIZE - 4218 pgoff); 4219 } 4220 } else { 4221 len = 0; 4222 ncount = 0; 4223 } 4224 if (ncount < pcount) { 4225 for (i = ncount; i < pcount; i++) { 4226 rtvals[i] = zfs_vm_pagerret_bad; 4227 } 4228 } 4229 } 4230 zfs_vmobject_wunlock(object); 4231 4232 if (ncount == 0) 4233 goto out; 4234 4235 if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, zp->z_uid) || 4236 zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, zp->z_gid) || 4237 (zp->z_projid != ZFS_DEFAULT_PROJID && 4238 zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT, 4239 zp->z_projid))) { 4240 goto out; 4241 } 4242 4243 tx = dmu_tx_create(zfsvfs->z_os); 4244 dmu_tx_hold_write(tx, zp->z_id, off, len); 4245 4246 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4247 zfs_sa_upgrade_txholds(tx, zp); 4248 err = dmu_tx_assign(tx, TXG_WAIT); 4249 if (err != 0) { 4250 dmu_tx_abort(tx); 4251 goto out; 4252 } 4253 4254 if (zp->z_blksz < PAGE_SIZE) { 4255 for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) { 4256 tocopy = len > PAGE_SIZE ? PAGE_SIZE : len; 4257 va = zfs_map_page(ma[i], &sf); 4258 dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx); 4259 zfs_unmap_page(sf); 4260 } 4261 } else { 4262 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx); 4263 } 4264 4265 if (err == 0) { 4266 uint64_t mtime[2], ctime[2]; 4267 sa_bulk_attr_t bulk[3]; 4268 int count = 0; 4269 4270 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 4271 &mtime, 16); 4272 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 4273 &ctime, 16); 4274 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4275 &zp->z_pflags, 8); 4276 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); 4277 err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 4278 ASSERT0(err); 4279 /* 4280 * XXX we should be passing a callback to undirty 4281 * but that would make the locking messier 4282 */ 4283 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, 4284 len, 0, NULL, NULL); 4285 4286 zfs_vmobject_wlock(object); 4287 for (i = 0; i < ncount; i++) { 4288 rtvals[i] = zfs_vm_pagerret_ok; 4289 vm_page_undirty(ma[i]); 4290 } 4291 zfs_vmobject_wunlock(object); 4292 VM_CNT_INC(v_vnodeout); 4293 VM_CNT_ADD(v_vnodepgsout, ncount); 4294 } 4295 dmu_tx_commit(tx); 4296 4297 out: 4298 zfs_rangelock_exit(lr); 4299 if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 || 4300 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4301 zil_commit(zfsvfs->z_log, zp->z_id); 4302 4303 dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, len); 4304 4305 ZFS_EXIT(zfsvfs); 4306 return (rtvals[0]); 4307 } 4308 4309 #ifndef _SYS_SYSPROTO_H_ 4310 struct vop_putpages_args { 4311 struct vnode *a_vp; 4312 vm_page_t *a_m; 4313 int a_count; 4314 int a_sync; 4315 int *a_rtvals; 4316 }; 4317 #endif 4318 4319 static int 4320 zfs_freebsd_putpages(struct vop_putpages_args *ap) 4321 { 4322 4323 return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, 4324 ap->a_rtvals)); 4325 } 4326 4327 #ifndef _SYS_SYSPROTO_H_ 4328 struct vop_bmap_args { 4329 struct vnode *a_vp; 4330 daddr_t a_bn; 4331 struct bufobj **a_bop; 4332 daddr_t *a_bnp; 4333 int *a_runp; 4334 int *a_runb; 4335 }; 4336 #endif 4337 4338 static int 4339 zfs_freebsd_bmap(struct vop_bmap_args *ap) 4340 { 4341 4342 if (ap->a_bop != NULL) 4343 *ap->a_bop = &ap->a_vp->v_bufobj; 4344 if (ap->a_bnp != NULL) 4345 *ap->a_bnp = ap->a_bn; 4346 if (ap->a_runp != NULL) 4347 *ap->a_runp = 0; 4348 if (ap->a_runb != NULL) 4349 *ap->a_runb = 0; 4350 4351 return (0); 4352 } 4353 4354 #ifndef _SYS_SYSPROTO_H_ 4355 struct vop_open_args { 4356 struct vnode *a_vp; 4357 int a_mode; 4358 struct ucred *a_cred; 4359 struct thread *a_td; 4360 }; 4361 #endif 4362 4363 static int 4364 zfs_freebsd_open(struct vop_open_args *ap) 4365 { 4366 vnode_t *vp = ap->a_vp; 4367 znode_t *zp = VTOZ(vp); 4368 int error; 4369 4370 error = zfs_open(&vp, ap->a_mode, ap->a_cred); 4371 if (error == 0) 4372 vnode_create_vobject(vp, zp->z_size, ap->a_td); 4373 return (error); 4374 } 4375 4376 #ifndef _SYS_SYSPROTO_H_ 4377 struct vop_close_args { 4378 struct vnode *a_vp; 4379 int a_fflag; 4380 struct ucred *a_cred; 4381 struct thread *a_td; 4382 }; 4383 #endif 4384 4385 static int 4386 zfs_freebsd_close(struct vop_close_args *ap) 4387 { 4388 4389 return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred)); 4390 } 4391 4392 #ifndef _SYS_SYSPROTO_H_ 4393 struct vop_ioctl_args { 4394 struct vnode *a_vp; 4395 ulong_t a_command; 4396 caddr_t a_data; 4397 int a_fflag; 4398 struct ucred *cred; 4399 struct thread *td; 4400 }; 4401 #endif 4402 4403 static int 4404 zfs_freebsd_ioctl(struct vop_ioctl_args *ap) 4405 { 4406 4407 return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 4408 ap->a_fflag, ap->a_cred, NULL)); 4409 } 4410 4411 static int 4412 ioflags(int ioflags) 4413 { 4414 int flags = 0; 4415 4416 if (ioflags & IO_APPEND) 4417 flags |= FAPPEND; 4418 if (ioflags & IO_NDELAY) 4419 flags |= FNONBLOCK; 4420 if (ioflags & IO_SYNC) 4421 flags |= (FSYNC | FDSYNC | FRSYNC); 4422 4423 return (flags); 4424 } 4425 4426 #ifndef _SYS_SYSPROTO_H_ 4427 struct vop_read_args { 4428 struct vnode *a_vp; 4429 struct uio *a_uio; 4430 int a_ioflag; 4431 struct ucred *a_cred; 4432 }; 4433 #endif 4434 4435 static int 4436 zfs_freebsd_read(struct vop_read_args *ap) 4437 { 4438 zfs_uio_t uio; 4439 zfs_uio_init(&uio, ap->a_uio); 4440 return (zfs_read(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag), 4441 ap->a_cred)); 4442 } 4443 4444 #ifndef _SYS_SYSPROTO_H_ 4445 struct vop_write_args { 4446 struct vnode *a_vp; 4447 struct uio *a_uio; 4448 int a_ioflag; 4449 struct ucred *a_cred; 4450 }; 4451 #endif 4452 4453 static int 4454 zfs_freebsd_write(struct vop_write_args *ap) 4455 { 4456 zfs_uio_t uio; 4457 zfs_uio_init(&uio, ap->a_uio); 4458 return (zfs_write(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag), 4459 ap->a_cred)); 4460 } 4461 4462 #if __FreeBSD_version >= 1300102 4463 /* 4464 * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see 4465 * the comment above cache_fplookup for details. 4466 */ 4467 static int 4468 zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args *v) 4469 { 4470 vnode_t *vp; 4471 znode_t *zp; 4472 uint64_t pflags; 4473 4474 vp = v->a_vp; 4475 zp = VTOZ_SMR(vp); 4476 if (__predict_false(zp == NULL)) 4477 return (EAGAIN); 4478 pflags = atomic_load_64(&zp->z_pflags); 4479 if (pflags & ZFS_AV_QUARANTINED) 4480 return (EAGAIN); 4481 if (pflags & ZFS_XATTR) 4482 return (EAGAIN); 4483 if ((pflags & ZFS_NO_EXECS_DENIED) == 0) 4484 return (EAGAIN); 4485 return (0); 4486 } 4487 #endif 4488 4489 #if __FreeBSD_version >= 1300139 4490 static int 4491 zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args *v) 4492 { 4493 vnode_t *vp; 4494 znode_t *zp; 4495 char *target; 4496 4497 vp = v->a_vp; 4498 zp = VTOZ_SMR(vp); 4499 if (__predict_false(zp == NULL)) { 4500 return (EAGAIN); 4501 } 4502 4503 target = atomic_load_consume_ptr(&zp->z_cached_symlink); 4504 if (target == NULL) { 4505 return (EAGAIN); 4506 } 4507 return (cache_symlink_resolve(v->a_fpl, target, strlen(target))); 4508 } 4509 #endif 4510 4511 #ifndef _SYS_SYSPROTO_H_ 4512 struct vop_access_args { 4513 struct vnode *a_vp; 4514 accmode_t a_accmode; 4515 struct ucred *a_cred; 4516 struct thread *a_td; 4517 }; 4518 #endif 4519 4520 static int 4521 zfs_freebsd_access(struct vop_access_args *ap) 4522 { 4523 vnode_t *vp = ap->a_vp; 4524 znode_t *zp = VTOZ(vp); 4525 accmode_t accmode; 4526 int error = 0; 4527 4528 4529 if (ap->a_accmode == VEXEC) { 4530 if (zfs_fastaccesschk_execute(zp, ap->a_cred) == 0) 4531 return (0); 4532 } 4533 4534 /* 4535 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 4536 */ 4537 accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 4538 if (accmode != 0) 4539 error = zfs_access(zp, accmode, 0, ap->a_cred); 4540 4541 /* 4542 * VADMIN has to be handled by vaccess(). 4543 */ 4544 if (error == 0) { 4545 accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 4546 if (accmode != 0) { 4547 #if __FreeBSD_version >= 1300105 4548 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 4549 zp->z_gid, accmode, ap->a_cred); 4550 #else 4551 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 4552 zp->z_gid, accmode, ap->a_cred, NULL); 4553 #endif 4554 } 4555 } 4556 4557 /* 4558 * For VEXEC, ensure that at least one execute bit is set for 4559 * non-directories. 4560 */ 4561 if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 4562 (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { 4563 error = EACCES; 4564 } 4565 4566 return (error); 4567 } 4568 4569 #ifndef _SYS_SYSPROTO_H_ 4570 struct vop_lookup_args { 4571 struct vnode *a_dvp; 4572 struct vnode **a_vpp; 4573 struct componentname *a_cnp; 4574 }; 4575 #endif 4576 4577 static int 4578 zfs_freebsd_lookup(struct vop_lookup_args *ap, boolean_t cached) 4579 { 4580 struct componentname *cnp = ap->a_cnp; 4581 char nm[NAME_MAX + 1]; 4582 4583 ASSERT3U(cnp->cn_namelen, <, sizeof (nm)); 4584 strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof (nm))); 4585 4586 return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 4587 cnp->cn_cred, 0, cached)); 4588 } 4589 4590 static int 4591 zfs_freebsd_cachedlookup(struct vop_cachedlookup_args *ap) 4592 { 4593 4594 return (zfs_freebsd_lookup((struct vop_lookup_args *)ap, B_TRUE)); 4595 } 4596 4597 #ifndef _SYS_SYSPROTO_H_ 4598 struct vop_lookup_args { 4599 struct vnode *a_dvp; 4600 struct vnode **a_vpp; 4601 struct componentname *a_cnp; 4602 }; 4603 #endif 4604 4605 static int 4606 zfs_cache_lookup(struct vop_lookup_args *ap) 4607 { 4608 zfsvfs_t *zfsvfs; 4609 4610 zfsvfs = ap->a_dvp->v_mount->mnt_data; 4611 if (zfsvfs->z_use_namecache) 4612 return (vfs_cache_lookup(ap)); 4613 else 4614 return (zfs_freebsd_lookup(ap, B_FALSE)); 4615 } 4616 4617 #ifndef _SYS_SYSPROTO_H_ 4618 struct vop_create_args { 4619 struct vnode *a_dvp; 4620 struct vnode **a_vpp; 4621 struct componentname *a_cnp; 4622 struct vattr *a_vap; 4623 }; 4624 #endif 4625 4626 static int 4627 zfs_freebsd_create(struct vop_create_args *ap) 4628 { 4629 zfsvfs_t *zfsvfs; 4630 struct componentname *cnp = ap->a_cnp; 4631 vattr_t *vap = ap->a_vap; 4632 znode_t *zp = NULL; 4633 int rc, mode; 4634 4635 ASSERT(cnp->cn_flags & SAVENAME); 4636 4637 vattr_init_mask(vap); 4638 mode = vap->va_mode & ALLPERMS; 4639 zfsvfs = ap->a_dvp->v_mount->mnt_data; 4640 *ap->a_vpp = NULL; 4641 4642 rc = zfs_create(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, !EXCL, mode, 4643 &zp, cnp->cn_cred, 0 /* flag */, NULL /* vsecattr */); 4644 if (rc == 0) 4645 *ap->a_vpp = ZTOV(zp); 4646 if (zfsvfs->z_use_namecache && 4647 rc == 0 && (cnp->cn_flags & MAKEENTRY) != 0) 4648 cache_enter(ap->a_dvp, *ap->a_vpp, cnp); 4649 4650 return (rc); 4651 } 4652 4653 #ifndef _SYS_SYSPROTO_H_ 4654 struct vop_remove_args { 4655 struct vnode *a_dvp; 4656 struct vnode *a_vp; 4657 struct componentname *a_cnp; 4658 }; 4659 #endif 4660 4661 static int 4662 zfs_freebsd_remove(struct vop_remove_args *ap) 4663 { 4664 4665 ASSERT(ap->a_cnp->cn_flags & SAVENAME); 4666 4667 return (zfs_remove_(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr, 4668 ap->a_cnp->cn_cred)); 4669 } 4670 4671 #ifndef _SYS_SYSPROTO_H_ 4672 struct vop_mkdir_args { 4673 struct vnode *a_dvp; 4674 struct vnode **a_vpp; 4675 struct componentname *a_cnp; 4676 struct vattr *a_vap; 4677 }; 4678 #endif 4679 4680 static int 4681 zfs_freebsd_mkdir(struct vop_mkdir_args *ap) 4682 { 4683 vattr_t *vap = ap->a_vap; 4684 znode_t *zp = NULL; 4685 int rc; 4686 4687 ASSERT(ap->a_cnp->cn_flags & SAVENAME); 4688 4689 vattr_init_mask(vap); 4690 *ap->a_vpp = NULL; 4691 4692 rc = zfs_mkdir(VTOZ(ap->a_dvp), ap->a_cnp->cn_nameptr, vap, &zp, 4693 ap->a_cnp->cn_cred, 0, NULL); 4694 4695 if (rc == 0) 4696 *ap->a_vpp = ZTOV(zp); 4697 return (rc); 4698 } 4699 4700 #ifndef _SYS_SYSPROTO_H_ 4701 struct vop_rmdir_args { 4702 struct vnode *a_dvp; 4703 struct vnode *a_vp; 4704 struct componentname *a_cnp; 4705 }; 4706 #endif 4707 4708 static int 4709 zfs_freebsd_rmdir(struct vop_rmdir_args *ap) 4710 { 4711 struct componentname *cnp = ap->a_cnp; 4712 4713 ASSERT(cnp->cn_flags & SAVENAME); 4714 4715 return (zfs_rmdir_(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred)); 4716 } 4717 4718 #ifndef _SYS_SYSPROTO_H_ 4719 struct vop_readdir_args { 4720 struct vnode *a_vp; 4721 struct uio *a_uio; 4722 struct ucred *a_cred; 4723 int *a_eofflag; 4724 int *a_ncookies; 4725 cookie_t **a_cookies; 4726 }; 4727 #endif 4728 4729 static int 4730 zfs_freebsd_readdir(struct vop_readdir_args *ap) 4731 { 4732 zfs_uio_t uio; 4733 zfs_uio_init(&uio, ap->a_uio); 4734 return (zfs_readdir(ap->a_vp, &uio, ap->a_cred, ap->a_eofflag, 4735 ap->a_ncookies, ap->a_cookies)); 4736 } 4737 4738 #ifndef _SYS_SYSPROTO_H_ 4739 struct vop_fsync_args { 4740 struct vnode *a_vp; 4741 int a_waitfor; 4742 struct thread *a_td; 4743 }; 4744 #endif 4745 4746 static int 4747 zfs_freebsd_fsync(struct vop_fsync_args *ap) 4748 { 4749 4750 vop_stdfsync(ap); 4751 return (zfs_fsync(VTOZ(ap->a_vp), 0, ap->a_td->td_ucred)); 4752 } 4753 4754 #ifndef _SYS_SYSPROTO_H_ 4755 struct vop_getattr_args { 4756 struct vnode *a_vp; 4757 struct vattr *a_vap; 4758 struct ucred *a_cred; 4759 }; 4760 #endif 4761 4762 static int 4763 zfs_freebsd_getattr(struct vop_getattr_args *ap) 4764 { 4765 vattr_t *vap = ap->a_vap; 4766 xvattr_t xvap; 4767 ulong_t fflags = 0; 4768 int error; 4769 4770 xva_init(&xvap); 4771 xvap.xva_vattr = *vap; 4772 xvap.xva_vattr.va_mask |= AT_XVATTR; 4773 4774 /* Convert chflags into ZFS-type flags. */ 4775 /* XXX: what about SF_SETTABLE?. */ 4776 XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 4777 XVA_SET_REQ(&xvap, XAT_APPENDONLY); 4778 XVA_SET_REQ(&xvap, XAT_NOUNLINK); 4779 XVA_SET_REQ(&xvap, XAT_NODUMP); 4780 XVA_SET_REQ(&xvap, XAT_READONLY); 4781 XVA_SET_REQ(&xvap, XAT_ARCHIVE); 4782 XVA_SET_REQ(&xvap, XAT_SYSTEM); 4783 XVA_SET_REQ(&xvap, XAT_HIDDEN); 4784 XVA_SET_REQ(&xvap, XAT_REPARSE); 4785 XVA_SET_REQ(&xvap, XAT_OFFLINE); 4786 XVA_SET_REQ(&xvap, XAT_SPARSE); 4787 4788 error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred); 4789 if (error != 0) 4790 return (error); 4791 4792 /* Convert ZFS xattr into chflags. */ 4793 #define FLAG_CHECK(fflag, xflag, xfield) do { \ 4794 if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 4795 fflags |= (fflag); \ 4796 } while (0) 4797 FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 4798 xvap.xva_xoptattrs.xoa_immutable); 4799 FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 4800 xvap.xva_xoptattrs.xoa_appendonly); 4801 FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 4802 xvap.xva_xoptattrs.xoa_nounlink); 4803 FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE, 4804 xvap.xva_xoptattrs.xoa_archive); 4805 FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 4806 xvap.xva_xoptattrs.xoa_nodump); 4807 FLAG_CHECK(UF_READONLY, XAT_READONLY, 4808 xvap.xva_xoptattrs.xoa_readonly); 4809 FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM, 4810 xvap.xva_xoptattrs.xoa_system); 4811 FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN, 4812 xvap.xva_xoptattrs.xoa_hidden); 4813 FLAG_CHECK(UF_REPARSE, XAT_REPARSE, 4814 xvap.xva_xoptattrs.xoa_reparse); 4815 FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE, 4816 xvap.xva_xoptattrs.xoa_offline); 4817 FLAG_CHECK(UF_SPARSE, XAT_SPARSE, 4818 xvap.xva_xoptattrs.xoa_sparse); 4819 4820 #undef FLAG_CHECK 4821 *vap = xvap.xva_vattr; 4822 vap->va_flags = fflags; 4823 return (0); 4824 } 4825 4826 #ifndef _SYS_SYSPROTO_H_ 4827 struct vop_setattr_args { 4828 struct vnode *a_vp; 4829 struct vattr *a_vap; 4830 struct ucred *a_cred; 4831 }; 4832 #endif 4833 4834 static int 4835 zfs_freebsd_setattr(struct vop_setattr_args *ap) 4836 { 4837 vnode_t *vp = ap->a_vp; 4838 vattr_t *vap = ap->a_vap; 4839 cred_t *cred = ap->a_cred; 4840 xvattr_t xvap; 4841 ulong_t fflags; 4842 uint64_t zflags; 4843 4844 vattr_init_mask(vap); 4845 vap->va_mask &= ~AT_NOSET; 4846 4847 xva_init(&xvap); 4848 xvap.xva_vattr = *vap; 4849 4850 zflags = VTOZ(vp)->z_pflags; 4851 4852 if (vap->va_flags != VNOVAL) { 4853 zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 4854 int error; 4855 4856 if (zfsvfs->z_use_fuids == B_FALSE) 4857 return (EOPNOTSUPP); 4858 4859 fflags = vap->va_flags; 4860 /* 4861 * XXX KDM 4862 * We need to figure out whether it makes sense to allow 4863 * UF_REPARSE through, since we don't really have other 4864 * facilities to handle reparse points and zfs_setattr() 4865 * doesn't currently allow setting that attribute anyway. 4866 */ 4867 if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE| 4868 UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE| 4869 UF_OFFLINE|UF_SPARSE)) != 0) 4870 return (EOPNOTSUPP); 4871 /* 4872 * Unprivileged processes are not permitted to unset system 4873 * flags, or modify flags if any system flags are set. 4874 * Privileged non-jail processes may not modify system flags 4875 * if securelevel > 0 and any existing system flags are set. 4876 * Privileged jail processes behave like privileged non-jail 4877 * processes if the PR_ALLOW_CHFLAGS permission bit is set; 4878 * otherwise, they behave like unprivileged processes. 4879 */ 4880 if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 4881 spl_priv_check_cred(cred, PRIV_VFS_SYSFLAGS) == 0) { 4882 if (zflags & 4883 (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 4884 error = securelevel_gt(cred, 0); 4885 if (error != 0) 4886 return (error); 4887 } 4888 } else { 4889 /* 4890 * Callers may only modify the file flags on 4891 * objects they have VADMIN rights for. 4892 */ 4893 if ((error = VOP_ACCESS(vp, VADMIN, cred, 4894 curthread)) != 0) 4895 return (error); 4896 if (zflags & 4897 (ZFS_IMMUTABLE | ZFS_APPENDONLY | 4898 ZFS_NOUNLINK)) { 4899 return (EPERM); 4900 } 4901 if (fflags & 4902 (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 4903 return (EPERM); 4904 } 4905 } 4906 4907 #define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 4908 if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 4909 ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 4910 XVA_SET_REQ(&xvap, (xflag)); \ 4911 (xfield) = ((fflags & (fflag)) != 0); \ 4912 } \ 4913 } while (0) 4914 /* Convert chflags into ZFS-type flags. */ 4915 /* XXX: what about SF_SETTABLE?. */ 4916 FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 4917 xvap.xva_xoptattrs.xoa_immutable); 4918 FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 4919 xvap.xva_xoptattrs.xoa_appendonly); 4920 FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 4921 xvap.xva_xoptattrs.xoa_nounlink); 4922 FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE, 4923 xvap.xva_xoptattrs.xoa_archive); 4924 FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 4925 xvap.xva_xoptattrs.xoa_nodump); 4926 FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY, 4927 xvap.xva_xoptattrs.xoa_readonly); 4928 FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM, 4929 xvap.xva_xoptattrs.xoa_system); 4930 FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN, 4931 xvap.xva_xoptattrs.xoa_hidden); 4932 FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE, 4933 xvap.xva_xoptattrs.xoa_reparse); 4934 FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE, 4935 xvap.xva_xoptattrs.xoa_offline); 4936 FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE, 4937 xvap.xva_xoptattrs.xoa_sparse); 4938 #undef FLAG_CHANGE 4939 } 4940 if (vap->va_birthtime.tv_sec != VNOVAL) { 4941 xvap.xva_vattr.va_mask |= AT_XVATTR; 4942 XVA_SET_REQ(&xvap, XAT_CREATETIME); 4943 } 4944 return (zfs_setattr(VTOZ(vp), (vattr_t *)&xvap, 0, cred)); 4945 } 4946 4947 #ifndef _SYS_SYSPROTO_H_ 4948 struct vop_rename_args { 4949 struct vnode *a_fdvp; 4950 struct vnode *a_fvp; 4951 struct componentname *a_fcnp; 4952 struct vnode *a_tdvp; 4953 struct vnode *a_tvp; 4954 struct componentname *a_tcnp; 4955 }; 4956 #endif 4957 4958 static int 4959 zfs_freebsd_rename(struct vop_rename_args *ap) 4960 { 4961 vnode_t *fdvp = ap->a_fdvp; 4962 vnode_t *fvp = ap->a_fvp; 4963 vnode_t *tdvp = ap->a_tdvp; 4964 vnode_t *tvp = ap->a_tvp; 4965 int error; 4966 4967 ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 4968 ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 4969 4970 error = zfs_do_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp, 4971 ap->a_tcnp, ap->a_fcnp->cn_cred); 4972 4973 vrele(fdvp); 4974 vrele(fvp); 4975 vrele(tdvp); 4976 if (tvp != NULL) 4977 vrele(tvp); 4978 4979 return (error); 4980 } 4981 4982 #ifndef _SYS_SYSPROTO_H_ 4983 struct vop_symlink_args { 4984 struct vnode *a_dvp; 4985 struct vnode **a_vpp; 4986 struct componentname *a_cnp; 4987 struct vattr *a_vap; 4988 char *a_target; 4989 }; 4990 #endif 4991 4992 static int 4993 zfs_freebsd_symlink(struct vop_symlink_args *ap) 4994 { 4995 struct componentname *cnp = ap->a_cnp; 4996 vattr_t *vap = ap->a_vap; 4997 znode_t *zp = NULL; 4998 #if __FreeBSD_version >= 1300139 4999 char *symlink; 5000 size_t symlink_len; 5001 #endif 5002 int rc; 5003 5004 ASSERT(cnp->cn_flags & SAVENAME); 5005 5006 vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 5007 vattr_init_mask(vap); 5008 *ap->a_vpp = NULL; 5009 5010 rc = zfs_symlink(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, 5011 ap->a_target, &zp, cnp->cn_cred, 0 /* flags */); 5012 if (rc == 0) { 5013 *ap->a_vpp = ZTOV(zp); 5014 ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); 5015 #if __FreeBSD_version >= 1300139 5016 MPASS(zp->z_cached_symlink == NULL); 5017 symlink_len = strlen(ap->a_target); 5018 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK); 5019 if (symlink != NULL) { 5020 memcpy(symlink, ap->a_target, symlink_len); 5021 symlink[symlink_len] = '\0'; 5022 atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink, 5023 (uintptr_t)symlink); 5024 } 5025 #endif 5026 } 5027 return (rc); 5028 } 5029 5030 #ifndef _SYS_SYSPROTO_H_ 5031 struct vop_readlink_args { 5032 struct vnode *a_vp; 5033 struct uio *a_uio; 5034 struct ucred *a_cred; 5035 }; 5036 #endif 5037 5038 static int 5039 zfs_freebsd_readlink(struct vop_readlink_args *ap) 5040 { 5041 zfs_uio_t uio; 5042 int error; 5043 #if __FreeBSD_version >= 1300139 5044 znode_t *zp = VTOZ(ap->a_vp); 5045 char *symlink, *base; 5046 size_t symlink_len; 5047 bool trycache; 5048 #endif 5049 5050 zfs_uio_init(&uio, ap->a_uio); 5051 #if __FreeBSD_version >= 1300139 5052 trycache = false; 5053 if (zfs_uio_segflg(&uio) == UIO_SYSSPACE && 5054 zfs_uio_iovcnt(&uio) == 1) { 5055 base = zfs_uio_iovbase(&uio, 0); 5056 symlink_len = zfs_uio_iovlen(&uio, 0); 5057 trycache = true; 5058 } 5059 #endif 5060 error = zfs_readlink(ap->a_vp, &uio, ap->a_cred, NULL); 5061 #if __FreeBSD_version >= 1300139 5062 if (atomic_load_ptr(&zp->z_cached_symlink) != NULL || 5063 error != 0 || !trycache) { 5064 return (error); 5065 } 5066 symlink_len -= zfs_uio_resid(&uio); 5067 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK); 5068 if (symlink != NULL) { 5069 memcpy(symlink, base, symlink_len); 5070 symlink[symlink_len] = '\0'; 5071 if (!atomic_cmpset_rel_ptr((uintptr_t *)&zp->z_cached_symlink, 5072 (uintptr_t)NULL, (uintptr_t)symlink)) { 5073 cache_symlink_free(symlink, symlink_len + 1); 5074 } 5075 } 5076 #endif 5077 return (error); 5078 } 5079 5080 #ifndef _SYS_SYSPROTO_H_ 5081 struct vop_link_args { 5082 struct vnode *a_tdvp; 5083 struct vnode *a_vp; 5084 struct componentname *a_cnp; 5085 }; 5086 #endif 5087 5088 static int 5089 zfs_freebsd_link(struct vop_link_args *ap) 5090 { 5091 struct componentname *cnp = ap->a_cnp; 5092 vnode_t *vp = ap->a_vp; 5093 vnode_t *tdvp = ap->a_tdvp; 5094 5095 if (tdvp->v_mount != vp->v_mount) 5096 return (EXDEV); 5097 5098 ASSERT(cnp->cn_flags & SAVENAME); 5099 5100 return (zfs_link(VTOZ(tdvp), VTOZ(vp), 5101 cnp->cn_nameptr, cnp->cn_cred, 0)); 5102 } 5103 5104 #ifndef _SYS_SYSPROTO_H_ 5105 struct vop_inactive_args { 5106 struct vnode *a_vp; 5107 struct thread *a_td; 5108 }; 5109 #endif 5110 5111 static int 5112 zfs_freebsd_inactive(struct vop_inactive_args *ap) 5113 { 5114 vnode_t *vp = ap->a_vp; 5115 5116 #if __FreeBSD_version >= 1300123 5117 zfs_inactive(vp, curthread->td_ucred, NULL); 5118 #else 5119 zfs_inactive(vp, ap->a_td->td_ucred, NULL); 5120 #endif 5121 return (0); 5122 } 5123 5124 #if __FreeBSD_version >= 1300042 5125 #ifndef _SYS_SYSPROTO_H_ 5126 struct vop_need_inactive_args { 5127 struct vnode *a_vp; 5128 struct thread *a_td; 5129 }; 5130 #endif 5131 5132 static int 5133 zfs_freebsd_need_inactive(struct vop_need_inactive_args *ap) 5134 { 5135 vnode_t *vp = ap->a_vp; 5136 znode_t *zp = VTOZ(vp); 5137 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5138 int need; 5139 5140 if (vn_need_pageq_flush(vp)) 5141 return (1); 5142 5143 if (!ZFS_TEARDOWN_INACTIVE_TRY_ENTER_READ(zfsvfs)) 5144 return (1); 5145 need = (zp->z_sa_hdl == NULL || zp->z_unlinked || zp->z_atime_dirty); 5146 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 5147 5148 return (need); 5149 } 5150 #endif 5151 5152 #ifndef _SYS_SYSPROTO_H_ 5153 struct vop_reclaim_args { 5154 struct vnode *a_vp; 5155 struct thread *a_td; 5156 }; 5157 #endif 5158 5159 static int 5160 zfs_freebsd_reclaim(struct vop_reclaim_args *ap) 5161 { 5162 vnode_t *vp = ap->a_vp; 5163 znode_t *zp = VTOZ(vp); 5164 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5165 5166 ASSERT3P(zp, !=, NULL); 5167 5168 #if __FreeBSD_version < 1300042 5169 /* Destroy the vm object and flush associated pages. */ 5170 vnode_destroy_vobject(vp); 5171 #endif 5172 /* 5173 * z_teardown_inactive_lock protects from a race with 5174 * zfs_znode_dmu_fini in zfsvfs_teardown during 5175 * force unmount. 5176 */ 5177 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs); 5178 if (zp->z_sa_hdl == NULL) 5179 zfs_znode_free(zp); 5180 else 5181 zfs_zinactive(zp); 5182 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 5183 5184 vp->v_data = NULL; 5185 return (0); 5186 } 5187 5188 #ifndef _SYS_SYSPROTO_H_ 5189 struct vop_fid_args { 5190 struct vnode *a_vp; 5191 struct fid *a_fid; 5192 }; 5193 #endif 5194 5195 static int 5196 zfs_freebsd_fid(struct vop_fid_args *ap) 5197 { 5198 5199 return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 5200 } 5201 5202 5203 #ifndef _SYS_SYSPROTO_H_ 5204 struct vop_pathconf_args { 5205 struct vnode *a_vp; 5206 int a_name; 5207 register_t *a_retval; 5208 } *ap; 5209 #endif 5210 5211 static int 5212 zfs_freebsd_pathconf(struct vop_pathconf_args *ap) 5213 { 5214 ulong_t val; 5215 int error; 5216 5217 error = zfs_pathconf(ap->a_vp, ap->a_name, &val, 5218 curthread->td_ucred, NULL); 5219 if (error == 0) { 5220 *ap->a_retval = val; 5221 return (error); 5222 } 5223 if (error != EOPNOTSUPP) 5224 return (error); 5225 5226 switch (ap->a_name) { 5227 case _PC_NAME_MAX: 5228 *ap->a_retval = NAME_MAX; 5229 return (0); 5230 #if __FreeBSD_version >= 1400032 5231 case _PC_DEALLOC_PRESENT: 5232 *ap->a_retval = 1; 5233 return (0); 5234 #endif 5235 case _PC_PIPE_BUF: 5236 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) { 5237 *ap->a_retval = PIPE_BUF; 5238 return (0); 5239 } 5240 return (EINVAL); 5241 default: 5242 return (vop_stdpathconf(ap)); 5243 } 5244 } 5245 5246 static int zfs_xattr_compat = 1; 5247 5248 static int 5249 zfs_check_attrname(const char *name) 5250 { 5251 /* We don't allow '/' character in attribute name. */ 5252 if (strchr(name, '/') != NULL) 5253 return (SET_ERROR(EINVAL)); 5254 /* We don't allow attribute names that start with a namespace prefix. */ 5255 if (ZFS_XA_NS_PREFIX_FORBIDDEN(name)) 5256 return (SET_ERROR(EINVAL)); 5257 return (0); 5258 } 5259 5260 /* 5261 * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 5262 * extended attribute name: 5263 * 5264 * NAMESPACE XATTR_COMPAT PREFIX 5265 * system * freebsd:system: 5266 * user 1 (none, can be used to access ZFS 5267 * fsattr(5) attributes created on Solaris) 5268 * user 0 user. 5269 */ 5270 static int 5271 zfs_create_attrname(int attrnamespace, const char *name, char *attrname, 5272 size_t size, boolean_t compat) 5273 { 5274 const char *namespace, *prefix, *suffix; 5275 5276 bzero(attrname, size); 5277 5278 switch (attrnamespace) { 5279 case EXTATTR_NAMESPACE_USER: 5280 if (compat) { 5281 /* 5282 * This is the default namespace by which we can access 5283 * all attributes created on Solaris. 5284 */ 5285 prefix = namespace = suffix = ""; 5286 } else { 5287 /* 5288 * This is compatible with the user namespace encoding 5289 * on Linux prior to xattr_compat, but nothing 5290 * else. 5291 */ 5292 prefix = ""; 5293 namespace = "user"; 5294 suffix = "."; 5295 } 5296 break; 5297 case EXTATTR_NAMESPACE_SYSTEM: 5298 prefix = "freebsd:"; 5299 namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 5300 suffix = ":"; 5301 break; 5302 case EXTATTR_NAMESPACE_EMPTY: 5303 default: 5304 return (SET_ERROR(EINVAL)); 5305 } 5306 if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 5307 name) >= size) { 5308 return (SET_ERROR(ENAMETOOLONG)); 5309 } 5310 return (0); 5311 } 5312 5313 static int 5314 zfs_ensure_xattr_cached(znode_t *zp) 5315 { 5316 int error = 0; 5317 5318 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); 5319 5320 if (zp->z_xattr_cached != NULL) 5321 return (0); 5322 5323 if (rw_write_held(&zp->z_xattr_lock)) 5324 return (zfs_sa_get_xattr(zp)); 5325 5326 if (!rw_tryupgrade(&zp->z_xattr_lock)) { 5327 rw_exit(&zp->z_xattr_lock); 5328 rw_enter(&zp->z_xattr_lock, RW_WRITER); 5329 } 5330 if (zp->z_xattr_cached == NULL) 5331 error = zfs_sa_get_xattr(zp); 5332 rw_downgrade(&zp->z_xattr_lock); 5333 return (error); 5334 } 5335 5336 #ifndef _SYS_SYSPROTO_H_ 5337 struct vop_getextattr { 5338 IN struct vnode *a_vp; 5339 IN int a_attrnamespace; 5340 IN const char *a_name; 5341 INOUT struct uio *a_uio; 5342 OUT size_t *a_size; 5343 IN struct ucred *a_cred; 5344 IN struct thread *a_td; 5345 }; 5346 #endif 5347 5348 static int 5349 zfs_getextattr_dir(struct vop_getextattr_args *ap, const char *attrname) 5350 { 5351 struct thread *td = ap->a_td; 5352 struct nameidata nd; 5353 struct vattr va; 5354 vnode_t *xvp = NULL, *vp; 5355 int error, flags; 5356 5357 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, 5358 LOOKUP_XATTR, B_FALSE); 5359 if (error != 0) 5360 return (error); 5361 5362 flags = FREAD; 5363 #if __FreeBSD_version < 1400043 5364 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 5365 xvp, td); 5366 #else 5367 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp); 5368 #endif 5369 error = vn_open_cred(&nd, &flags, 0, VN_OPEN_INVFS, ap->a_cred, NULL); 5370 vp = nd.ni_vp; 5371 NDFREE(&nd, NDF_ONLY_PNBUF); 5372 if (error != 0) 5373 return (SET_ERROR(error)); 5374 5375 if (ap->a_size != NULL) { 5376 error = VOP_GETATTR(vp, &va, ap->a_cred); 5377 if (error == 0) 5378 *ap->a_size = (size_t)va.va_size; 5379 } else if (ap->a_uio != NULL) 5380 error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred); 5381 5382 VOP_UNLOCK1(vp); 5383 vn_close(vp, flags, ap->a_cred, td); 5384 return (error); 5385 } 5386 5387 static int 5388 zfs_getextattr_sa(struct vop_getextattr_args *ap, const char *attrname) 5389 { 5390 znode_t *zp = VTOZ(ap->a_vp); 5391 uchar_t *nv_value; 5392 uint_t nv_size; 5393 int error; 5394 5395 error = zfs_ensure_xattr_cached(zp); 5396 if (error != 0) 5397 return (error); 5398 5399 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); 5400 ASSERT3P(zp->z_xattr_cached, !=, NULL); 5401 5402 error = nvlist_lookup_byte_array(zp->z_xattr_cached, attrname, 5403 &nv_value, &nv_size); 5404 if (error != 0) 5405 return (SET_ERROR(error)); 5406 5407 if (ap->a_size != NULL) 5408 *ap->a_size = nv_size; 5409 else if (ap->a_uio != NULL) 5410 error = uiomove(nv_value, nv_size, ap->a_uio); 5411 if (error != 0) 5412 return (SET_ERROR(error)); 5413 5414 return (0); 5415 } 5416 5417 static int 5418 zfs_getextattr_impl(struct vop_getextattr_args *ap, boolean_t compat) 5419 { 5420 znode_t *zp = VTOZ(ap->a_vp); 5421 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5422 char attrname[EXTATTR_MAXNAMELEN+1]; 5423 int error; 5424 5425 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5426 sizeof (attrname), compat); 5427 if (error != 0) 5428 return (error); 5429 5430 error = ENOENT; 5431 if (zfsvfs->z_use_sa && zp->z_is_sa) 5432 error = zfs_getextattr_sa(ap, attrname); 5433 if (error == ENOENT) 5434 error = zfs_getextattr_dir(ap, attrname); 5435 return (error); 5436 } 5437 5438 /* 5439 * Vnode operation to retrieve a named extended attribute. 5440 */ 5441 static int 5442 zfs_getextattr(struct vop_getextattr_args *ap) 5443 { 5444 znode_t *zp = VTOZ(ap->a_vp); 5445 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5446 int error; 5447 5448 /* 5449 * If the xattr property is off, refuse the request. 5450 */ 5451 if (!(zfsvfs->z_flags & ZSB_XATTR)) 5452 return (SET_ERROR(EOPNOTSUPP)); 5453 5454 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5455 ap->a_cred, ap->a_td, VREAD); 5456 if (error != 0) 5457 return (SET_ERROR(error)); 5458 5459 error = zfs_check_attrname(ap->a_name); 5460 if (error != 0) 5461 return (error); 5462 5463 error = ENOENT; 5464 ZFS_ENTER(zfsvfs); 5465 ZFS_VERIFY_ZP(zp) 5466 rw_enter(&zp->z_xattr_lock, RW_READER); 5467 5468 error = zfs_getextattr_impl(ap, zfs_xattr_compat); 5469 if ((error == ENOENT || error == ENOATTR) && 5470 ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) { 5471 /* 5472 * Fall back to the alternate namespace format if we failed to 5473 * find a user xattr. 5474 */ 5475 error = zfs_getextattr_impl(ap, !zfs_xattr_compat); 5476 } 5477 5478 rw_exit(&zp->z_xattr_lock); 5479 ZFS_EXIT(zfsvfs); 5480 if (error == ENOENT) 5481 error = SET_ERROR(ENOATTR); 5482 return (error); 5483 } 5484 5485 #ifndef _SYS_SYSPROTO_H_ 5486 struct vop_deleteextattr { 5487 IN struct vnode *a_vp; 5488 IN int a_attrnamespace; 5489 IN const char *a_name; 5490 IN struct ucred *a_cred; 5491 IN struct thread *a_td; 5492 }; 5493 #endif 5494 5495 static int 5496 zfs_deleteextattr_dir(struct vop_deleteextattr_args *ap, const char *attrname) 5497 { 5498 struct nameidata nd; 5499 vnode_t *xvp = NULL, *vp; 5500 int error; 5501 5502 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, 5503 LOOKUP_XATTR, B_FALSE); 5504 if (error != 0) 5505 return (error); 5506 5507 #if __FreeBSD_version < 1400043 5508 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, 5509 UIO_SYSSPACE, attrname, xvp, ap->a_td); 5510 #else 5511 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, 5512 UIO_SYSSPACE, attrname, xvp); 5513 #endif 5514 error = namei(&nd); 5515 vp = nd.ni_vp; 5516 if (error != 0) { 5517 NDFREE(&nd, NDF_ONLY_PNBUF); 5518 return (SET_ERROR(error)); 5519 } 5520 5521 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 5522 NDFREE(&nd, NDF_ONLY_PNBUF); 5523 5524 vput(nd.ni_dvp); 5525 if (vp == nd.ni_dvp) 5526 vrele(vp); 5527 else 5528 vput(vp); 5529 5530 return (error); 5531 } 5532 5533 static int 5534 zfs_deleteextattr_sa(struct vop_deleteextattr_args *ap, const char *attrname) 5535 { 5536 znode_t *zp = VTOZ(ap->a_vp); 5537 nvlist_t *nvl; 5538 int error; 5539 5540 error = zfs_ensure_xattr_cached(zp); 5541 if (error != 0) 5542 return (error); 5543 5544 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock)); 5545 ASSERT3P(zp->z_xattr_cached, !=, NULL); 5546 5547 nvl = zp->z_xattr_cached; 5548 error = nvlist_remove(nvl, attrname, DATA_TYPE_BYTE_ARRAY); 5549 if (error != 0) 5550 error = SET_ERROR(error); 5551 else 5552 error = zfs_sa_set_xattr(zp, attrname, NULL, 0); 5553 if (error != 0) { 5554 zp->z_xattr_cached = NULL; 5555 nvlist_free(nvl); 5556 } 5557 return (error); 5558 } 5559 5560 static int 5561 zfs_deleteextattr_impl(struct vop_deleteextattr_args *ap, boolean_t compat) 5562 { 5563 znode_t *zp = VTOZ(ap->a_vp); 5564 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5565 char attrname[EXTATTR_MAXNAMELEN+1]; 5566 int error; 5567 5568 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5569 sizeof (attrname), compat); 5570 if (error != 0) 5571 return (error); 5572 5573 error = ENOENT; 5574 if (zfsvfs->z_use_sa && zp->z_is_sa) 5575 error = zfs_deleteextattr_sa(ap, attrname); 5576 if (error == ENOENT) 5577 error = zfs_deleteextattr_dir(ap, attrname); 5578 return (error); 5579 } 5580 5581 /* 5582 * Vnode operation to remove a named attribute. 5583 */ 5584 static int 5585 zfs_deleteextattr(struct vop_deleteextattr_args *ap) 5586 { 5587 znode_t *zp = VTOZ(ap->a_vp); 5588 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5589 int error; 5590 5591 /* 5592 * If the xattr property is off, refuse the request. 5593 */ 5594 if (!(zfsvfs->z_flags & ZSB_XATTR)) 5595 return (SET_ERROR(EOPNOTSUPP)); 5596 5597 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5598 ap->a_cred, ap->a_td, VWRITE); 5599 if (error != 0) 5600 return (SET_ERROR(error)); 5601 5602 error = zfs_check_attrname(ap->a_name); 5603 if (error != 0) 5604 return (error); 5605 5606 ZFS_ENTER(zfsvfs); 5607 ZFS_VERIFY_ZP(zp); 5608 rw_enter(&zp->z_xattr_lock, RW_WRITER); 5609 5610 error = zfs_deleteextattr_impl(ap, zfs_xattr_compat); 5611 if ((error == ENOENT || error == ENOATTR) && 5612 ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) { 5613 /* 5614 * Fall back to the alternate namespace format if we failed to 5615 * find a user xattr. 5616 */ 5617 error = zfs_deleteextattr_impl(ap, !zfs_xattr_compat); 5618 } 5619 5620 rw_exit(&zp->z_xattr_lock); 5621 ZFS_EXIT(zfsvfs); 5622 if (error == ENOENT) 5623 error = SET_ERROR(ENOATTR); 5624 return (error); 5625 } 5626 5627 #ifndef _SYS_SYSPROTO_H_ 5628 struct vop_setextattr { 5629 IN struct vnode *a_vp; 5630 IN int a_attrnamespace; 5631 IN const char *a_name; 5632 INOUT struct uio *a_uio; 5633 IN struct ucred *a_cred; 5634 IN struct thread *a_td; 5635 }; 5636 #endif 5637 5638 static int 5639 zfs_setextattr_dir(struct vop_setextattr_args *ap, const char *attrname) 5640 { 5641 struct thread *td = ap->a_td; 5642 struct nameidata nd; 5643 struct vattr va; 5644 vnode_t *xvp = NULL, *vp; 5645 int error, flags; 5646 5647 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, 5648 LOOKUP_XATTR | CREATE_XATTR_DIR, B_FALSE); 5649 if (error != 0) 5650 return (error); 5651 5652 flags = FFLAGS(O_WRONLY | O_CREAT); 5653 #if __FreeBSD_version < 1400043 5654 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp, td); 5655 #else 5656 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp); 5657 #endif 5658 error = vn_open_cred(&nd, &flags, 0600, VN_OPEN_INVFS, ap->a_cred, 5659 NULL); 5660 vp = nd.ni_vp; 5661 NDFREE(&nd, NDF_ONLY_PNBUF); 5662 if (error != 0) 5663 return (SET_ERROR(error)); 5664 5665 VATTR_NULL(&va); 5666 va.va_size = 0; 5667 error = VOP_SETATTR(vp, &va, ap->a_cred); 5668 if (error == 0) 5669 VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred); 5670 5671 VOP_UNLOCK1(vp); 5672 vn_close(vp, flags, ap->a_cred, td); 5673 return (error); 5674 } 5675 5676 static int 5677 zfs_setextattr_sa(struct vop_setextattr_args *ap, const char *attrname) 5678 { 5679 znode_t *zp = VTOZ(ap->a_vp); 5680 nvlist_t *nvl; 5681 size_t sa_size; 5682 int error; 5683 5684 error = zfs_ensure_xattr_cached(zp); 5685 if (error != 0) 5686 return (error); 5687 5688 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock)); 5689 ASSERT3P(zp->z_xattr_cached, !=, NULL); 5690 5691 nvl = zp->z_xattr_cached; 5692 size_t entry_size = ap->a_uio->uio_resid; 5693 if (entry_size > DXATTR_MAX_ENTRY_SIZE) 5694 return (SET_ERROR(EFBIG)); 5695 error = nvlist_size(nvl, &sa_size, NV_ENCODE_XDR); 5696 if (error != 0) 5697 return (SET_ERROR(error)); 5698 if (sa_size > DXATTR_MAX_SA_SIZE) 5699 return (SET_ERROR(EFBIG)); 5700 uchar_t *buf = kmem_alloc(entry_size, KM_SLEEP); 5701 error = uiomove(buf, entry_size, ap->a_uio); 5702 if (error != 0) { 5703 error = SET_ERROR(error); 5704 } else { 5705 error = nvlist_add_byte_array(nvl, attrname, buf, entry_size); 5706 if (error != 0) 5707 error = SET_ERROR(error); 5708 } 5709 if (error == 0) 5710 error = zfs_sa_set_xattr(zp, attrname, buf, entry_size); 5711 kmem_free(buf, entry_size); 5712 if (error != 0) { 5713 zp->z_xattr_cached = NULL; 5714 nvlist_free(nvl); 5715 } 5716 return (error); 5717 } 5718 5719 static int 5720 zfs_setextattr_impl(struct vop_setextattr_args *ap, boolean_t compat) 5721 { 5722 znode_t *zp = VTOZ(ap->a_vp); 5723 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5724 char attrname[EXTATTR_MAXNAMELEN+1]; 5725 int error; 5726 5727 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5728 sizeof (attrname), compat); 5729 if (error != 0) 5730 return (error); 5731 5732 struct vop_deleteextattr_args vda = { 5733 .a_vp = ap->a_vp, 5734 .a_attrnamespace = ap->a_attrnamespace, 5735 .a_name = ap->a_name, 5736 .a_cred = ap->a_cred, 5737 .a_td = ap->a_td, 5738 }; 5739 error = ENOENT; 5740 if (zfsvfs->z_use_sa && zp->z_is_sa && zfsvfs->z_xattr_sa) { 5741 error = zfs_setextattr_sa(ap, attrname); 5742 if (error == 0) { 5743 /* 5744 * Successfully put into SA, we need to clear the one 5745 * in dir if present. 5746 */ 5747 zfs_deleteextattr_dir(&vda, attrname); 5748 } 5749 } 5750 if (error != 0) { 5751 error = zfs_setextattr_dir(ap, attrname); 5752 if (error == 0 && zp->z_is_sa) { 5753 /* 5754 * Successfully put into dir, we need to clear the one 5755 * in SA if present. 5756 */ 5757 zfs_deleteextattr_sa(&vda, attrname); 5758 } 5759 } 5760 if (error == 0 && ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) { 5761 /* 5762 * Also clear all versions of the alternate compat name. 5763 */ 5764 zfs_deleteextattr_impl(&vda, !compat); 5765 } 5766 return (error); 5767 } 5768 5769 /* 5770 * Vnode operation to set a named attribute. 5771 */ 5772 static int 5773 zfs_setextattr(struct vop_setextattr_args *ap) 5774 { 5775 znode_t *zp = VTOZ(ap->a_vp); 5776 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5777 int error; 5778 5779 /* 5780 * If the xattr property is off, refuse the request. 5781 */ 5782 if (!(zfsvfs->z_flags & ZSB_XATTR)) 5783 return (SET_ERROR(EOPNOTSUPP)); 5784 5785 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5786 ap->a_cred, ap->a_td, VWRITE); 5787 if (error != 0) 5788 return (SET_ERROR(error)); 5789 5790 error = zfs_check_attrname(ap->a_name); 5791 if (error != 0) 5792 return (error); 5793 5794 ZFS_ENTER(zfsvfs); 5795 ZFS_VERIFY_ZP(zp); 5796 rw_enter(&zp->z_xattr_lock, RW_WRITER); 5797 5798 error = zfs_setextattr_impl(ap, zfs_xattr_compat); 5799 5800 rw_exit(&zp->z_xattr_lock); 5801 ZFS_EXIT(zfsvfs); 5802 return (error); 5803 } 5804 5805 #ifndef _SYS_SYSPROTO_H_ 5806 struct vop_listextattr { 5807 IN struct vnode *a_vp; 5808 IN int a_attrnamespace; 5809 INOUT struct uio *a_uio; 5810 OUT size_t *a_size; 5811 IN struct ucred *a_cred; 5812 IN struct thread *a_td; 5813 }; 5814 #endif 5815 5816 static int 5817 zfs_listextattr_dir(struct vop_listextattr_args *ap, const char *attrprefix) 5818 { 5819 struct thread *td = ap->a_td; 5820 struct nameidata nd; 5821 uint8_t dirbuf[sizeof (struct dirent)]; 5822 struct iovec aiov; 5823 struct uio auio; 5824 vnode_t *xvp = NULL, *vp; 5825 int error, eof; 5826 5827 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, 5828 LOOKUP_XATTR, B_FALSE); 5829 if (error != 0) { 5830 /* 5831 * ENOATTR means that the EA directory does not yet exist, 5832 * i.e. there are no extended attributes there. 5833 */ 5834 if (error == ENOATTR) 5835 error = 0; 5836 return (error); 5837 } 5838 5839 #if __FreeBSD_version < 1400043 5840 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED, 5841 UIO_SYSSPACE, ".", xvp, td); 5842 #else 5843 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED, 5844 UIO_SYSSPACE, ".", xvp); 5845 #endif 5846 error = namei(&nd); 5847 vp = nd.ni_vp; 5848 NDFREE(&nd, NDF_ONLY_PNBUF); 5849 if (error != 0) 5850 return (SET_ERROR(error)); 5851 5852 auio.uio_iov = &aiov; 5853 auio.uio_iovcnt = 1; 5854 auio.uio_segflg = UIO_SYSSPACE; 5855 auio.uio_td = td; 5856 auio.uio_rw = UIO_READ; 5857 auio.uio_offset = 0; 5858 5859 size_t plen = strlen(attrprefix); 5860 5861 do { 5862 aiov.iov_base = (void *)dirbuf; 5863 aiov.iov_len = sizeof (dirbuf); 5864 auio.uio_resid = sizeof (dirbuf); 5865 error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 5866 if (error != 0) 5867 break; 5868 int done = sizeof (dirbuf) - auio.uio_resid; 5869 for (int pos = 0; pos < done; ) { 5870 struct dirent *dp = (struct dirent *)(dirbuf + pos); 5871 pos += dp->d_reclen; 5872 /* 5873 * XXX: Temporarily we also accept DT_UNKNOWN, as this 5874 * is what we get when attribute was created on Solaris. 5875 */ 5876 if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 5877 continue; 5878 else if (plen == 0 && 5879 ZFS_XA_NS_PREFIX_FORBIDDEN(dp->d_name)) 5880 continue; 5881 else if (strncmp(dp->d_name, attrprefix, plen) != 0) 5882 continue; 5883 uint8_t nlen = dp->d_namlen - plen; 5884 if (ap->a_size != NULL) { 5885 *ap->a_size += 1 + nlen; 5886 } else if (ap->a_uio != NULL) { 5887 /* 5888 * Format of extattr name entry is one byte for 5889 * length and the rest for name. 5890 */ 5891 error = uiomove(&nlen, 1, ap->a_uio); 5892 if (error == 0) { 5893 char *namep = dp->d_name + plen; 5894 error = uiomove(namep, nlen, ap->a_uio); 5895 } 5896 if (error != 0) { 5897 error = SET_ERROR(error); 5898 break; 5899 } 5900 } 5901 } 5902 } while (!eof && error == 0); 5903 5904 vput(vp); 5905 return (error); 5906 } 5907 5908 static int 5909 zfs_listextattr_sa(struct vop_listextattr_args *ap, const char *attrprefix) 5910 { 5911 znode_t *zp = VTOZ(ap->a_vp); 5912 int error; 5913 5914 error = zfs_ensure_xattr_cached(zp); 5915 if (error != 0) 5916 return (error); 5917 5918 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); 5919 ASSERT3P(zp->z_xattr_cached, !=, NULL); 5920 5921 size_t plen = strlen(attrprefix); 5922 nvpair_t *nvp = NULL; 5923 while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) { 5924 ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY); 5925 5926 const char *name = nvpair_name(nvp); 5927 if (plen == 0 && ZFS_XA_NS_PREFIX_FORBIDDEN(name)) 5928 continue; 5929 else if (strncmp(name, attrprefix, plen) != 0) 5930 continue; 5931 uint8_t nlen = strlen(name) - plen; 5932 if (ap->a_size != NULL) { 5933 *ap->a_size += 1 + nlen; 5934 } else if (ap->a_uio != NULL) { 5935 /* 5936 * Format of extattr name entry is one byte for 5937 * length and the rest for name. 5938 */ 5939 error = uiomove(&nlen, 1, ap->a_uio); 5940 if (error == 0) { 5941 char *namep = __DECONST(char *, name) + plen; 5942 error = uiomove(namep, nlen, ap->a_uio); 5943 } 5944 if (error != 0) { 5945 error = SET_ERROR(error); 5946 break; 5947 } 5948 } 5949 } 5950 5951 return (error); 5952 } 5953 5954 static int 5955 zfs_listextattr_impl(struct vop_listextattr_args *ap, boolean_t compat) 5956 { 5957 znode_t *zp = VTOZ(ap->a_vp); 5958 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5959 char attrprefix[16]; 5960 int error; 5961 5962 error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 5963 sizeof (attrprefix), compat); 5964 if (error != 0) 5965 return (error); 5966 5967 if (zfsvfs->z_use_sa && zp->z_is_sa) 5968 error = zfs_listextattr_sa(ap, attrprefix); 5969 if (error == 0) 5970 error = zfs_listextattr_dir(ap, attrprefix); 5971 return (error); 5972 } 5973 5974 /* 5975 * Vnode operation to retrieve extended attributes on a vnode. 5976 */ 5977 static int 5978 zfs_listextattr(struct vop_listextattr_args *ap) 5979 { 5980 znode_t *zp = VTOZ(ap->a_vp); 5981 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5982 int error; 5983 5984 if (ap->a_size != NULL) 5985 *ap->a_size = 0; 5986 5987 /* 5988 * If the xattr property is off, refuse the request. 5989 */ 5990 if (!(zfsvfs->z_flags & ZSB_XATTR)) 5991 return (SET_ERROR(EOPNOTSUPP)); 5992 5993 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5994 ap->a_cred, ap->a_td, VREAD); 5995 if (error != 0) 5996 return (SET_ERROR(error)); 5997 5998 ZFS_ENTER(zfsvfs); 5999 ZFS_VERIFY_ZP(zp); 6000 rw_enter(&zp->z_xattr_lock, RW_READER); 6001 6002 error = zfs_listextattr_impl(ap, zfs_xattr_compat); 6003 if (error == 0 && ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) { 6004 /* Also list user xattrs with the alternate format. */ 6005 error = zfs_listextattr_impl(ap, !zfs_xattr_compat); 6006 } 6007 6008 rw_exit(&zp->z_xattr_lock); 6009 ZFS_EXIT(zfsvfs); 6010 return (error); 6011 } 6012 6013 #ifndef _SYS_SYSPROTO_H_ 6014 struct vop_getacl_args { 6015 struct vnode *vp; 6016 acl_type_t type; 6017 struct acl *aclp; 6018 struct ucred *cred; 6019 struct thread *td; 6020 }; 6021 #endif 6022 6023 static int 6024 zfs_freebsd_getacl(struct vop_getacl_args *ap) 6025 { 6026 int error; 6027 vsecattr_t vsecattr; 6028 6029 if (ap->a_type != ACL_TYPE_NFS4) 6030 return (EINVAL); 6031 6032 vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 6033 if ((error = zfs_getsecattr(VTOZ(ap->a_vp), 6034 &vsecattr, 0, ap->a_cred))) 6035 return (error); 6036 6037 error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, 6038 vsecattr.vsa_aclcnt); 6039 if (vsecattr.vsa_aclentp != NULL) 6040 kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 6041 6042 return (error); 6043 } 6044 6045 #ifndef _SYS_SYSPROTO_H_ 6046 struct vop_setacl_args { 6047 struct vnode *vp; 6048 acl_type_t type; 6049 struct acl *aclp; 6050 struct ucred *cred; 6051 struct thread *td; 6052 }; 6053 #endif 6054 6055 static int 6056 zfs_freebsd_setacl(struct vop_setacl_args *ap) 6057 { 6058 int error; 6059 vsecattr_t vsecattr; 6060 int aclbsize; /* size of acl list in bytes */ 6061 aclent_t *aaclp; 6062 6063 if (ap->a_type != ACL_TYPE_NFS4) 6064 return (EINVAL); 6065 6066 if (ap->a_aclp == NULL) 6067 return (EINVAL); 6068 6069 if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 6070 return (EINVAL); 6071 6072 /* 6073 * With NFSv4 ACLs, chmod(2) may need to add additional entries, 6074 * splitting every entry into two and appending "canonical six" 6075 * entries at the end. Don't allow for setting an ACL that would 6076 * cause chmod(2) to run out of ACL entries. 6077 */ 6078 if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 6079 return (ENOSPC); 6080 6081 error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 6082 if (error != 0) 6083 return (error); 6084 6085 vsecattr.vsa_mask = VSA_ACE; 6086 aclbsize = ap->a_aclp->acl_cnt * sizeof (ace_t); 6087 vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 6088 aaclp = vsecattr.vsa_aclentp; 6089 vsecattr.vsa_aclentsz = aclbsize; 6090 6091 aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 6092 error = zfs_setsecattr(VTOZ(ap->a_vp), &vsecattr, 0, ap->a_cred); 6093 kmem_free(aaclp, aclbsize); 6094 6095 return (error); 6096 } 6097 6098 #ifndef _SYS_SYSPROTO_H_ 6099 struct vop_aclcheck_args { 6100 struct vnode *vp; 6101 acl_type_t type; 6102 struct acl *aclp; 6103 struct ucred *cred; 6104 struct thread *td; 6105 }; 6106 #endif 6107 6108 static int 6109 zfs_freebsd_aclcheck(struct vop_aclcheck_args *ap) 6110 { 6111 6112 return (EOPNOTSUPP); 6113 } 6114 6115 static int 6116 zfs_vptocnp(struct vop_vptocnp_args *ap) 6117 { 6118 vnode_t *covered_vp; 6119 vnode_t *vp = ap->a_vp; 6120 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 6121 znode_t *zp = VTOZ(vp); 6122 int ltype; 6123 int error; 6124 6125 ZFS_ENTER(zfsvfs); 6126 ZFS_VERIFY_ZP(zp); 6127 6128 /* 6129 * If we are a snapshot mounted under .zfs, run the operation 6130 * on the covered vnode. 6131 */ 6132 if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) { 6133 char name[MAXNAMLEN + 1]; 6134 znode_t *dzp; 6135 size_t len; 6136 6137 error = zfs_znode_parent_and_name(zp, &dzp, name); 6138 if (error == 0) { 6139 len = strlen(name); 6140 if (*ap->a_buflen < len) 6141 error = SET_ERROR(ENOMEM); 6142 } 6143 if (error == 0) { 6144 *ap->a_buflen -= len; 6145 bcopy(name, ap->a_buf + *ap->a_buflen, len); 6146 *ap->a_vpp = ZTOV(dzp); 6147 } 6148 ZFS_EXIT(zfsvfs); 6149 return (error); 6150 } 6151 ZFS_EXIT(zfsvfs); 6152 6153 covered_vp = vp->v_mount->mnt_vnodecovered; 6154 #if __FreeBSD_version >= 1300045 6155 enum vgetstate vs = vget_prep(covered_vp); 6156 #else 6157 vhold(covered_vp); 6158 #endif 6159 ltype = VOP_ISLOCKED(vp); 6160 VOP_UNLOCK1(vp); 6161 #if __FreeBSD_version >= 1300045 6162 error = vget_finish(covered_vp, LK_SHARED, vs); 6163 #else 6164 error = vget(covered_vp, LK_SHARED | LK_VNHELD, curthread); 6165 #endif 6166 if (error == 0) { 6167 #if __FreeBSD_version >= 1300123 6168 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_buf, 6169 ap->a_buflen); 6170 #else 6171 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred, 6172 ap->a_buf, ap->a_buflen); 6173 #endif 6174 vput(covered_vp); 6175 } 6176 vn_lock(vp, ltype | LK_RETRY); 6177 if (VN_IS_DOOMED(vp)) 6178 error = SET_ERROR(ENOENT); 6179 return (error); 6180 } 6181 6182 #if __FreeBSD_version >= 1400032 6183 static int 6184 zfs_deallocate(struct vop_deallocate_args *ap) 6185 { 6186 znode_t *zp = VTOZ(ap->a_vp); 6187 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6188 zilog_t *zilog; 6189 off_t off, len, file_sz; 6190 int error; 6191 6192 ZFS_ENTER(zfsvfs); 6193 ZFS_VERIFY_ZP(zp); 6194 6195 /* 6196 * Callers might not be able to detect properly that we are read-only, 6197 * so check it explicitly here. 6198 */ 6199 if (zfs_is_readonly(zfsvfs)) { 6200 ZFS_EXIT(zfsvfs); 6201 return (SET_ERROR(EROFS)); 6202 } 6203 6204 zilog = zfsvfs->z_log; 6205 off = *ap->a_offset; 6206 len = *ap->a_len; 6207 file_sz = zp->z_size; 6208 if (off + len > file_sz) 6209 len = file_sz - off; 6210 /* Fast path for out-of-range request. */ 6211 if (len <= 0) { 6212 *ap->a_len = 0; 6213 ZFS_EXIT(zfsvfs); 6214 return (0); 6215 } 6216 6217 error = zfs_freesp(zp, off, len, O_RDWR, TRUE); 6218 if (error == 0) { 6219 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS || 6220 (ap->a_ioflag & IO_SYNC) != 0) 6221 zil_commit(zilog, zp->z_id); 6222 *ap->a_offset = off + len; 6223 *ap->a_len = 0; 6224 } 6225 6226 ZFS_EXIT(zfsvfs); 6227 return (error); 6228 } 6229 #endif 6230 6231 struct vop_vector zfs_vnodeops; 6232 struct vop_vector zfs_fifoops; 6233 struct vop_vector zfs_shareops; 6234 6235 struct vop_vector zfs_vnodeops = { 6236 .vop_default = &default_vnodeops, 6237 .vop_inactive = zfs_freebsd_inactive, 6238 #if __FreeBSD_version >= 1300042 6239 .vop_need_inactive = zfs_freebsd_need_inactive, 6240 #endif 6241 .vop_reclaim = zfs_freebsd_reclaim, 6242 #if __FreeBSD_version >= 1300102 6243 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec, 6244 #endif 6245 #if __FreeBSD_version >= 1300139 6246 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink, 6247 #endif 6248 .vop_access = zfs_freebsd_access, 6249 .vop_allocate = VOP_EINVAL, 6250 #if __FreeBSD_version >= 1400032 6251 .vop_deallocate = zfs_deallocate, 6252 #endif 6253 .vop_lookup = zfs_cache_lookup, 6254 .vop_cachedlookup = zfs_freebsd_cachedlookup, 6255 .vop_getattr = zfs_freebsd_getattr, 6256 .vop_setattr = zfs_freebsd_setattr, 6257 .vop_create = zfs_freebsd_create, 6258 .vop_mknod = (vop_mknod_t *)zfs_freebsd_create, 6259 .vop_mkdir = zfs_freebsd_mkdir, 6260 .vop_readdir = zfs_freebsd_readdir, 6261 .vop_fsync = zfs_freebsd_fsync, 6262 .vop_open = zfs_freebsd_open, 6263 .vop_close = zfs_freebsd_close, 6264 .vop_rmdir = zfs_freebsd_rmdir, 6265 .vop_ioctl = zfs_freebsd_ioctl, 6266 .vop_link = zfs_freebsd_link, 6267 .vop_symlink = zfs_freebsd_symlink, 6268 .vop_readlink = zfs_freebsd_readlink, 6269 .vop_read = zfs_freebsd_read, 6270 .vop_write = zfs_freebsd_write, 6271 .vop_remove = zfs_freebsd_remove, 6272 .vop_rename = zfs_freebsd_rename, 6273 .vop_pathconf = zfs_freebsd_pathconf, 6274 .vop_bmap = zfs_freebsd_bmap, 6275 .vop_fid = zfs_freebsd_fid, 6276 .vop_getextattr = zfs_getextattr, 6277 .vop_deleteextattr = zfs_deleteextattr, 6278 .vop_setextattr = zfs_setextattr, 6279 .vop_listextattr = zfs_listextattr, 6280 .vop_getacl = zfs_freebsd_getacl, 6281 .vop_setacl = zfs_freebsd_setacl, 6282 .vop_aclcheck = zfs_freebsd_aclcheck, 6283 .vop_getpages = zfs_freebsd_getpages, 6284 .vop_putpages = zfs_freebsd_putpages, 6285 .vop_vptocnp = zfs_vptocnp, 6286 #if __FreeBSD_version >= 1300064 6287 .vop_lock1 = vop_lock, 6288 .vop_unlock = vop_unlock, 6289 .vop_islocked = vop_islocked, 6290 #endif 6291 #if __FreeBSD_version >= 1400043 6292 .vop_add_writecount = vop_stdadd_writecount_nomsync, 6293 #endif 6294 }; 6295 VFS_VOP_VECTOR_REGISTER(zfs_vnodeops); 6296 6297 struct vop_vector zfs_fifoops = { 6298 .vop_default = &fifo_specops, 6299 .vop_fsync = zfs_freebsd_fsync, 6300 #if __FreeBSD_version >= 1300102 6301 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec, 6302 #endif 6303 #if __FreeBSD_version >= 1300139 6304 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink, 6305 #endif 6306 .vop_access = zfs_freebsd_access, 6307 .vop_getattr = zfs_freebsd_getattr, 6308 .vop_inactive = zfs_freebsd_inactive, 6309 .vop_read = VOP_PANIC, 6310 .vop_reclaim = zfs_freebsd_reclaim, 6311 .vop_setattr = zfs_freebsd_setattr, 6312 .vop_write = VOP_PANIC, 6313 .vop_pathconf = zfs_freebsd_pathconf, 6314 .vop_fid = zfs_freebsd_fid, 6315 .vop_getacl = zfs_freebsd_getacl, 6316 .vop_setacl = zfs_freebsd_setacl, 6317 .vop_aclcheck = zfs_freebsd_aclcheck, 6318 #if __FreeBSD_version >= 1400043 6319 .vop_add_writecount = vop_stdadd_writecount_nomsync, 6320 #endif 6321 }; 6322 VFS_VOP_VECTOR_REGISTER(zfs_fifoops); 6323 6324 /* 6325 * special share hidden files vnode operations template 6326 */ 6327 struct vop_vector zfs_shareops = { 6328 .vop_default = &default_vnodeops, 6329 #if __FreeBSD_version >= 1300121 6330 .vop_fplookup_vexec = VOP_EAGAIN, 6331 #endif 6332 #if __FreeBSD_version >= 1300139 6333 .vop_fplookup_symlink = VOP_EAGAIN, 6334 #endif 6335 .vop_access = zfs_freebsd_access, 6336 .vop_inactive = zfs_freebsd_inactive, 6337 .vop_reclaim = zfs_freebsd_reclaim, 6338 .vop_fid = zfs_freebsd_fid, 6339 .vop_pathconf = zfs_freebsd_pathconf, 6340 #if __FreeBSD_version >= 1400043 6341 .vop_add_writecount = vop_stdadd_writecount_nomsync, 6342 #endif 6343 }; 6344 VFS_VOP_VECTOR_REGISTER(zfs_shareops); 6345 6346 ZFS_MODULE_PARAM(zfs, zfs_, xattr_compat, INT, ZMOD_RW, 6347 "Use legacy ZFS xattr naming for writing new user namespace xattrs"); 6348