1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 25 * Copyright (c) 2014 Integros [integros.com] 26 * Copyright 2017 Nexenta Systems, Inc. 27 */ 28 29 /* Portions Copyright 2007 Jeremy Teo */ 30 /* Portions Copyright 2010 Robert Milkowski */ 31 32 33 #include <sys/types.h> 34 #include <sys/param.h> 35 #include <sys/time.h> 36 #include <sys/systm.h> 37 #include <sys/sysmacros.h> 38 #include <sys/resource.h> 39 #include <sys/vfs.h> 40 #include <sys/endian.h> 41 #include <sys/vm.h> 42 #include <sys/vnode.h> 43 #if __FreeBSD_version >= 1300102 44 #include <sys/smr.h> 45 #endif 46 #include <sys/dirent.h> 47 #include <sys/file.h> 48 #include <sys/stat.h> 49 #include <sys/kmem.h> 50 #include <sys/taskq.h> 51 #include <sys/uio.h> 52 #include <sys/atomic.h> 53 #include <sys/namei.h> 54 #include <sys/mman.h> 55 #include <sys/cmn_err.h> 56 #include <sys/kdb.h> 57 #include <sys/sysproto.h> 58 #include <sys/errno.h> 59 #include <sys/unistd.h> 60 #include <sys/zfs_dir.h> 61 #include <sys/zfs_ioctl.h> 62 #include <sys/fs/zfs.h> 63 #include <sys/dmu.h> 64 #include <sys/dmu_objset.h> 65 #include <sys/spa.h> 66 #include <sys/txg.h> 67 #include <sys/dbuf.h> 68 #include <sys/zap.h> 69 #include <sys/sa.h> 70 #include <sys/policy.h> 71 #include <sys/sunddi.h> 72 #include <sys/filio.h> 73 #include <sys/sid.h> 74 #include <sys/zfs_ctldir.h> 75 #include <sys/zfs_fuid.h> 76 #include <sys/zfs_quota.h> 77 #include <sys/zfs_sa.h> 78 #include <sys/zfs_rlock.h> 79 #include <sys/extdirent.h> 80 #include <sys/bio.h> 81 #include <sys/buf.h> 82 #include <sys/sched.h> 83 #include <sys/acl.h> 84 #include <sys/vmmeter.h> 85 #include <vm/vm_param.h> 86 #include <sys/zil.h> 87 #include <sys/zfs_vnops.h> 88 89 #include <vm/vm_object.h> 90 91 #include <sys/extattr.h> 92 #include <sys/priv.h> 93 94 #ifndef VN_OPEN_INVFS 95 #define VN_OPEN_INVFS 0x0 96 #endif 97 98 VFS_SMR_DECLARE; 99 100 #if __FreeBSD_version >= 1300047 101 #define vm_page_wire_lock(pp) 102 #define vm_page_wire_unlock(pp) 103 #else 104 #define vm_page_wire_lock(pp) vm_page_lock(pp) 105 #define vm_page_wire_unlock(pp) vm_page_unlock(pp) 106 #endif 107 108 #ifdef DEBUG_VFS_LOCKS 109 #define VNCHECKREF(vp) \ 110 VNASSERT((vp)->v_holdcnt > 0 && (vp)->v_usecount > 0, vp, \ 111 ("%s: wrong ref counts", __func__)); 112 #else 113 #define VNCHECKREF(vp) 114 #endif 115 116 /* 117 * Programming rules. 118 * 119 * Each vnode op performs some logical unit of work. To do this, the ZPL must 120 * properly lock its in-core state, create a DMU transaction, do the work, 121 * record this work in the intent log (ZIL), commit the DMU transaction, 122 * and wait for the intent log to commit if it is a synchronous operation. 123 * Moreover, the vnode ops must work in both normal and log replay context. 124 * The ordering of events is important to avoid deadlocks and references 125 * to freed memory. The example below illustrates the following Big Rules: 126 * 127 * (1) A check must be made in each zfs thread for a mounted file system. 128 * This is done avoiding races using ZFS_ENTER(zfsvfs). 129 * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 130 * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 131 * can return EIO from the calling function. 132 * 133 * (2) VN_RELE() should always be the last thing except for zil_commit() 134 * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 135 * First, if it's the last reference, the vnode/znode 136 * can be freed, so the zp may point to freed memory. Second, the last 137 * reference will call zfs_zinactive(), which may induce a lot of work -- 138 * pushing cached pages (which acquires range locks) and syncing out 139 * cached atime changes. Third, zfs_zinactive() may require a new tx, 140 * which could deadlock the system if you were already holding one. 141 * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 142 * 143 * (3) All range locks must be grabbed before calling dmu_tx_assign(), 144 * as they can span dmu_tx_assign() calls. 145 * 146 * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to 147 * dmu_tx_assign(). This is critical because we don't want to block 148 * while holding locks. 149 * 150 * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This 151 * reduces lock contention and CPU usage when we must wait (note that if 152 * throughput is constrained by the storage, nearly every transaction 153 * must wait). 154 * 155 * Note, in particular, that if a lock is sometimes acquired before 156 * the tx assigns, and sometimes after (e.g. z_lock), then failing 157 * to use a non-blocking assign can deadlock the system. The scenario: 158 * 159 * Thread A has grabbed a lock before calling dmu_tx_assign(). 160 * Thread B is in an already-assigned tx, and blocks for this lock. 161 * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 162 * forever, because the previous txg can't quiesce until B's tx commits. 163 * 164 * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 165 * then drop all locks, call dmu_tx_wait(), and try again. On subsequent 166 * calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT, 167 * to indicate that this operation has already called dmu_tx_wait(). 168 * This will ensure that we don't retry forever, waiting a short bit 169 * each time. 170 * 171 * (5) If the operation succeeded, generate the intent log entry for it 172 * before dropping locks. This ensures that the ordering of events 173 * in the intent log matches the order in which they actually occurred. 174 * During ZIL replay the zfs_log_* functions will update the sequence 175 * number to indicate the zil transaction has replayed. 176 * 177 * (6) At the end of each vnode op, the DMU tx must always commit, 178 * regardless of whether there were any errors. 179 * 180 * (7) After dropping all locks, invoke zil_commit(zilog, foid) 181 * to ensure that synchronous semantics are provided when necessary. 182 * 183 * In general, this is how things should be ordered in each vnode op: 184 * 185 * ZFS_ENTER(zfsvfs); // exit if unmounted 186 * top: 187 * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD()) 188 * rw_enter(...); // grab any other locks you need 189 * tx = dmu_tx_create(...); // get DMU tx 190 * dmu_tx_hold_*(); // hold each object you might modify 191 * error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); 192 * if (error) { 193 * rw_exit(...); // drop locks 194 * zfs_dirent_unlock(dl); // unlock directory entry 195 * VN_RELE(...); // release held vnodes 196 * if (error == ERESTART) { 197 * waited = B_TRUE; 198 * dmu_tx_wait(tx); 199 * dmu_tx_abort(tx); 200 * goto top; 201 * } 202 * dmu_tx_abort(tx); // abort DMU tx 203 * ZFS_EXIT(zfsvfs); // finished in zfs 204 * return (error); // really out of space 205 * } 206 * error = do_real_work(); // do whatever this VOP does 207 * if (error == 0) 208 * zfs_log_*(...); // on success, make ZIL entry 209 * dmu_tx_commit(tx); // commit DMU tx -- error or not 210 * rw_exit(...); // drop locks 211 * zfs_dirent_unlock(dl); // unlock directory entry 212 * VN_RELE(...); // release held vnodes 213 * zil_commit(zilog, foid); // synchronous when necessary 214 * ZFS_EXIT(zfsvfs); // finished in zfs 215 * return (error); // done, report error 216 */ 217 218 /* ARGSUSED */ 219 static int 220 zfs_open(vnode_t **vpp, int flag, cred_t *cr) 221 { 222 znode_t *zp = VTOZ(*vpp); 223 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 224 225 ZFS_ENTER(zfsvfs); 226 ZFS_VERIFY_ZP(zp); 227 228 if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 229 ((flag & FAPPEND) == 0)) { 230 ZFS_EXIT(zfsvfs); 231 return (SET_ERROR(EPERM)); 232 } 233 234 /* Keep a count of the synchronous opens in the znode */ 235 if (flag & (FSYNC | FDSYNC)) 236 atomic_inc_32(&zp->z_sync_cnt); 237 238 ZFS_EXIT(zfsvfs); 239 return (0); 240 } 241 242 /* ARGSUSED */ 243 static int 244 zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr) 245 { 246 znode_t *zp = VTOZ(vp); 247 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 248 249 ZFS_ENTER(zfsvfs); 250 ZFS_VERIFY_ZP(zp); 251 252 /* Decrement the synchronous opens in the znode */ 253 if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 254 atomic_dec_32(&zp->z_sync_cnt); 255 256 ZFS_EXIT(zfsvfs); 257 return (0); 258 } 259 260 /* ARGSUSED */ 261 static int 262 zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag, cred_t *cred, 263 int *rvalp) 264 { 265 loff_t off; 266 int error; 267 268 switch (com) { 269 case _FIOFFS: 270 { 271 return (0); 272 273 /* 274 * The following two ioctls are used by bfu. Faking out, 275 * necessary to avoid bfu errors. 276 */ 277 } 278 case _FIOGDIO: 279 case _FIOSDIO: 280 { 281 return (0); 282 } 283 284 case F_SEEK_DATA: 285 case F_SEEK_HOLE: 286 { 287 off = *(offset_t *)data; 288 /* offset parameter is in/out */ 289 error = zfs_holey(VTOZ(vp), com, &off); 290 if (error) 291 return (error); 292 *(offset_t *)data = off; 293 return (0); 294 } 295 } 296 return (SET_ERROR(ENOTTY)); 297 } 298 299 static vm_page_t 300 page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 301 { 302 vm_object_t obj; 303 vm_page_t pp; 304 int64_t end; 305 306 /* 307 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE 308 * aligned boundaries, if the range is not aligned. As a result a 309 * DEV_BSIZE subrange with partially dirty data may get marked as clean. 310 * It may happen that all DEV_BSIZE subranges are marked clean and thus 311 * the whole page would be considered clean despite have some 312 * dirty data. 313 * For this reason we should shrink the range to DEV_BSIZE aligned 314 * boundaries before calling vm_page_clear_dirty. 315 */ 316 end = rounddown2(off + nbytes, DEV_BSIZE); 317 off = roundup2(off, DEV_BSIZE); 318 nbytes = end - off; 319 320 obj = vp->v_object; 321 zfs_vmobject_assert_wlocked_12(obj); 322 #if __FreeBSD_version < 1300050 323 for (;;) { 324 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 325 pp->valid) { 326 if (vm_page_xbusied(pp)) { 327 /* 328 * Reference the page before unlocking and 329 * sleeping so that the page daemon is less 330 * likely to reclaim it. 331 */ 332 vm_page_reference(pp); 333 vm_page_lock(pp); 334 zfs_vmobject_wunlock(obj); 335 vm_page_busy_sleep(pp, "zfsmwb", true); 336 zfs_vmobject_wlock(obj); 337 continue; 338 } 339 vm_page_sbusy(pp); 340 } else if (pp != NULL) { 341 ASSERT(!pp->valid); 342 pp = NULL; 343 } 344 if (pp != NULL) { 345 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 346 vm_object_pip_add(obj, 1); 347 pmap_remove_write(pp); 348 if (nbytes != 0) 349 vm_page_clear_dirty(pp, off, nbytes); 350 } 351 break; 352 } 353 #else 354 vm_page_grab_valid_unlocked(&pp, obj, OFF_TO_IDX(start), 355 VM_ALLOC_NOCREAT | VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | 356 VM_ALLOC_IGN_SBUSY); 357 if (pp != NULL) { 358 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 359 vm_object_pip_add(obj, 1); 360 pmap_remove_write(pp); 361 if (nbytes != 0) 362 vm_page_clear_dirty(pp, off, nbytes); 363 } 364 #endif 365 return (pp); 366 } 367 368 static void 369 page_unbusy(vm_page_t pp) 370 { 371 372 vm_page_sunbusy(pp); 373 #if __FreeBSD_version >= 1300041 374 vm_object_pip_wakeup(pp->object); 375 #else 376 vm_object_pip_subtract(pp->object, 1); 377 #endif 378 } 379 380 #if __FreeBSD_version > 1300051 381 static vm_page_t 382 page_hold(vnode_t *vp, int64_t start) 383 { 384 vm_object_t obj; 385 vm_page_t m; 386 387 obj = vp->v_object; 388 vm_page_grab_valid_unlocked(&m, obj, OFF_TO_IDX(start), 389 VM_ALLOC_NOCREAT | VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY | 390 VM_ALLOC_NOBUSY); 391 return (m); 392 } 393 #else 394 static vm_page_t 395 page_hold(vnode_t *vp, int64_t start) 396 { 397 vm_object_t obj; 398 vm_page_t pp; 399 400 obj = vp->v_object; 401 zfs_vmobject_assert_wlocked(obj); 402 403 for (;;) { 404 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 405 pp->valid) { 406 if (vm_page_xbusied(pp)) { 407 /* 408 * Reference the page before unlocking and 409 * sleeping so that the page daemon is less 410 * likely to reclaim it. 411 */ 412 vm_page_reference(pp); 413 vm_page_lock(pp); 414 zfs_vmobject_wunlock(obj); 415 vm_page_busy_sleep(pp, "zfsmwb", true); 416 zfs_vmobject_wlock(obj); 417 continue; 418 } 419 420 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 421 vm_page_wire_lock(pp); 422 vm_page_hold(pp); 423 vm_page_wire_unlock(pp); 424 425 } else 426 pp = NULL; 427 break; 428 } 429 return (pp); 430 } 431 #endif 432 433 static void 434 page_unhold(vm_page_t pp) 435 { 436 437 vm_page_wire_lock(pp); 438 #if __FreeBSD_version >= 1300035 439 vm_page_unwire(pp, PQ_ACTIVE); 440 #else 441 vm_page_unhold(pp); 442 #endif 443 vm_page_wire_unlock(pp); 444 } 445 446 /* 447 * When a file is memory mapped, we must keep the IO data synchronized 448 * between the DMU cache and the memory mapped pages. What this means: 449 * 450 * On Write: If we find a memory mapped page, we write to *both* 451 * the page and the dmu buffer. 452 */ 453 void 454 update_pages(znode_t *zp, int64_t start, int len, objset_t *os) 455 { 456 vm_object_t obj; 457 struct sf_buf *sf; 458 vnode_t *vp = ZTOV(zp); 459 caddr_t va; 460 int off; 461 462 ASSERT3P(vp->v_mount, !=, NULL); 463 obj = vp->v_object; 464 ASSERT3P(obj, !=, NULL); 465 466 off = start & PAGEOFFSET; 467 zfs_vmobject_wlock_12(obj); 468 #if __FreeBSD_version >= 1300041 469 vm_object_pip_add(obj, 1); 470 #endif 471 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 472 vm_page_t pp; 473 int nbytes = imin(PAGESIZE - off, len); 474 475 if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { 476 zfs_vmobject_wunlock_12(obj); 477 478 va = zfs_map_page(pp, &sf); 479 (void) dmu_read(os, zp->z_id, start + off, nbytes, 480 va + off, DMU_READ_PREFETCH); 481 zfs_unmap_page(sf); 482 483 zfs_vmobject_wlock_12(obj); 484 page_unbusy(pp); 485 } 486 len -= nbytes; 487 off = 0; 488 } 489 #if __FreeBSD_version >= 1300041 490 vm_object_pip_wakeup(obj); 491 #else 492 vm_object_pip_wakeupn(obj, 0); 493 #endif 494 zfs_vmobject_wunlock_12(obj); 495 } 496 497 /* 498 * Read with UIO_NOCOPY flag means that sendfile(2) requests 499 * ZFS to populate a range of page cache pages with data. 500 * 501 * NOTE: this function could be optimized to pre-allocate 502 * all pages in advance, drain exclusive busy on all of them, 503 * map them into contiguous KVA region and populate them 504 * in one single dmu_read() call. 505 */ 506 int 507 mappedread_sf(znode_t *zp, int nbytes, zfs_uio_t *uio) 508 { 509 vnode_t *vp = ZTOV(zp); 510 objset_t *os = zp->z_zfsvfs->z_os; 511 struct sf_buf *sf; 512 vm_object_t obj; 513 vm_page_t pp; 514 int64_t start; 515 caddr_t va; 516 int len = nbytes; 517 int error = 0; 518 519 ASSERT3U(zfs_uio_segflg(uio), ==, UIO_NOCOPY); 520 ASSERT3P(vp->v_mount, !=, NULL); 521 obj = vp->v_object; 522 ASSERT3P(obj, !=, NULL); 523 ASSERT0(zfs_uio_offset(uio) & PAGEOFFSET); 524 525 zfs_vmobject_wlock_12(obj); 526 for (start = zfs_uio_offset(uio); len > 0; start += PAGESIZE) { 527 int bytes = MIN(PAGESIZE, len); 528 529 pp = vm_page_grab_unlocked(obj, OFF_TO_IDX(start), 530 VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); 531 if (vm_page_none_valid(pp)) { 532 zfs_vmobject_wunlock_12(obj); 533 va = zfs_map_page(pp, &sf); 534 error = dmu_read(os, zp->z_id, start, bytes, va, 535 DMU_READ_PREFETCH); 536 if (bytes != PAGESIZE && error == 0) 537 bzero(va + bytes, PAGESIZE - bytes); 538 zfs_unmap_page(sf); 539 zfs_vmobject_wlock_12(obj); 540 #if __FreeBSD_version >= 1300081 541 if (error == 0) { 542 vm_page_valid(pp); 543 vm_page_activate(pp); 544 vm_page_do_sunbusy(pp); 545 } else { 546 zfs_vmobject_wlock(obj); 547 if (!vm_page_wired(pp) && pp->valid == 0 && 548 vm_page_busy_tryupgrade(pp)) 549 vm_page_free(pp); 550 else 551 vm_page_sunbusy(pp); 552 zfs_vmobject_wunlock(obj); 553 } 554 #else 555 vm_page_do_sunbusy(pp); 556 vm_page_lock(pp); 557 if (error) { 558 if (pp->wire_count == 0 && pp->valid == 0 && 559 !vm_page_busied(pp)) 560 vm_page_free(pp); 561 } else { 562 pp->valid = VM_PAGE_BITS_ALL; 563 vm_page_activate(pp); 564 } 565 vm_page_unlock(pp); 566 #endif 567 } else { 568 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 569 vm_page_do_sunbusy(pp); 570 } 571 if (error) 572 break; 573 zfs_uio_advance(uio, bytes); 574 len -= bytes; 575 } 576 zfs_vmobject_wunlock_12(obj); 577 return (error); 578 } 579 580 /* 581 * When a file is memory mapped, we must keep the IO data synchronized 582 * between the DMU cache and the memory mapped pages. What this means: 583 * 584 * On Read: We "read" preferentially from memory mapped pages, 585 * else we default from the dmu buffer. 586 * 587 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 588 * the file is memory mapped. 589 */ 590 int 591 mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio) 592 { 593 vnode_t *vp = ZTOV(zp); 594 vm_object_t obj; 595 int64_t start; 596 int len = nbytes; 597 int off; 598 int error = 0; 599 600 ASSERT3P(vp->v_mount, !=, NULL); 601 obj = vp->v_object; 602 ASSERT3P(obj, !=, NULL); 603 604 start = zfs_uio_offset(uio); 605 off = start & PAGEOFFSET; 606 zfs_vmobject_wlock_12(obj); 607 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 608 vm_page_t pp; 609 uint64_t bytes = MIN(PAGESIZE - off, len); 610 611 if ((pp = page_hold(vp, start))) { 612 struct sf_buf *sf; 613 caddr_t va; 614 615 zfs_vmobject_wunlock_12(obj); 616 va = zfs_map_page(pp, &sf); 617 error = vn_io_fault_uiomove(va + off, bytes, 618 GET_UIO_STRUCT(uio)); 619 zfs_unmap_page(sf); 620 zfs_vmobject_wlock_12(obj); 621 page_unhold(pp); 622 } else { 623 zfs_vmobject_wunlock_12(obj); 624 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 625 uio, bytes); 626 zfs_vmobject_wlock_12(obj); 627 } 628 len -= bytes; 629 off = 0; 630 if (error) 631 break; 632 } 633 zfs_vmobject_wunlock_12(obj); 634 return (error); 635 } 636 637 int 638 zfs_write_simple(znode_t *zp, const void *data, size_t len, 639 loff_t pos, size_t *presid) 640 { 641 int error = 0; 642 ssize_t resid; 643 644 error = vn_rdwr(UIO_WRITE, ZTOV(zp), __DECONST(void *, data), len, pos, 645 UIO_SYSSPACE, IO_SYNC, kcred, NOCRED, &resid, curthread); 646 647 if (error) { 648 return (SET_ERROR(error)); 649 } else if (presid == NULL) { 650 if (resid != 0) { 651 error = SET_ERROR(EIO); 652 } 653 } else { 654 *presid = resid; 655 } 656 return (error); 657 } 658 659 void 660 zfs_zrele_async(znode_t *zp) 661 { 662 vnode_t *vp = ZTOV(zp); 663 objset_t *os = ITOZSB(vp)->z_os; 664 665 VN_RELE_ASYNC(vp, dsl_pool_zrele_taskq(dmu_objset_pool(os))); 666 } 667 668 static int 669 zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) 670 { 671 int error; 672 673 *vpp = arg; 674 error = vn_lock(*vpp, lkflags); 675 if (error != 0) 676 vrele(*vpp); 677 return (error); 678 } 679 680 static int 681 zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags) 682 { 683 znode_t *zdp = VTOZ(dvp); 684 zfsvfs_t *zfsvfs __unused = zdp->z_zfsvfs; 685 int error; 686 int ltype; 687 688 if (zfsvfs->z_replay == B_FALSE) 689 ASSERT_VOP_LOCKED(dvp, __func__); 690 691 if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { 692 ASSERT3P(dvp, ==, vp); 693 vref(dvp); 694 ltype = lkflags & LK_TYPE_MASK; 695 if (ltype != VOP_ISLOCKED(dvp)) { 696 if (ltype == LK_EXCLUSIVE) 697 vn_lock(dvp, LK_UPGRADE | LK_RETRY); 698 else /* if (ltype == LK_SHARED) */ 699 vn_lock(dvp, LK_DOWNGRADE | LK_RETRY); 700 701 /* 702 * Relock for the "." case could leave us with 703 * reclaimed vnode. 704 */ 705 if (VN_IS_DOOMED(dvp)) { 706 vrele(dvp); 707 return (SET_ERROR(ENOENT)); 708 } 709 } 710 return (0); 711 } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { 712 /* 713 * Note that in this case, dvp is the child vnode, and we 714 * are looking up the parent vnode - exactly reverse from 715 * normal operation. Unlocking dvp requires some rather 716 * tricky unlock/relock dance to prevent mp from being freed; 717 * use vn_vget_ino_gen() which takes care of all that. 718 * 719 * XXX Note that there is a time window when both vnodes are 720 * unlocked. It is possible, although highly unlikely, that 721 * during that window the parent-child relationship between 722 * the vnodes may change, for example, get reversed. 723 * In that case we would have a wrong lock order for the vnodes. 724 * All other filesystems seem to ignore this problem, so we 725 * do the same here. 726 * A potential solution could be implemented as follows: 727 * - using LK_NOWAIT when locking the second vnode and retrying 728 * if necessary 729 * - checking that the parent-child relationship still holds 730 * after locking both vnodes and retrying if it doesn't 731 */ 732 error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp); 733 return (error); 734 } else { 735 error = vn_lock(vp, lkflags); 736 if (error != 0) 737 vrele(vp); 738 return (error); 739 } 740 } 741 742 /* 743 * Lookup an entry in a directory, or an extended attribute directory. 744 * If it exists, return a held vnode reference for it. 745 * 746 * IN: dvp - vnode of directory to search. 747 * nm - name of entry to lookup. 748 * pnp - full pathname to lookup [UNUSED]. 749 * flags - LOOKUP_XATTR set if looking for an attribute. 750 * rdir - root directory vnode [UNUSED]. 751 * cr - credentials of caller. 752 * ct - caller context 753 * 754 * OUT: vpp - vnode of located entry, NULL if not found. 755 * 756 * RETURN: 0 on success, error code on failure. 757 * 758 * Timestamps: 759 * NA 760 */ 761 /* ARGSUSED */ 762 static int 763 zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, 764 struct componentname *cnp, int nameiop, cred_t *cr, int flags, 765 boolean_t cached) 766 { 767 znode_t *zdp = VTOZ(dvp); 768 znode_t *zp; 769 zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 770 #if __FreeBSD_version > 1300124 771 seqc_t dvp_seqc; 772 #endif 773 int error = 0; 774 775 /* 776 * Fast path lookup, however we must skip DNLC lookup 777 * for case folding or normalizing lookups because the 778 * DNLC code only stores the passed in name. This means 779 * creating 'a' and removing 'A' on a case insensitive 780 * file system would work, but DNLC still thinks 'a' 781 * exists and won't let you create it again on the next 782 * pass through fast path. 783 */ 784 if (!(flags & LOOKUP_XATTR)) { 785 if (dvp->v_type != VDIR) { 786 return (SET_ERROR(ENOTDIR)); 787 } else if (zdp->z_sa_hdl == NULL) { 788 return (SET_ERROR(EIO)); 789 } 790 } 791 792 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, 793 const char *, nm); 794 795 ZFS_ENTER(zfsvfs); 796 ZFS_VERIFY_ZP(zdp); 797 798 #if __FreeBSD_version > 1300124 799 dvp_seqc = vn_seqc_read_notmodify(dvp); 800 #endif 801 802 *vpp = NULL; 803 804 if (flags & LOOKUP_XATTR) { 805 /* 806 * If the xattr property is off, refuse the lookup request. 807 */ 808 if (!(zfsvfs->z_flags & ZSB_XATTR)) { 809 ZFS_EXIT(zfsvfs); 810 return (SET_ERROR(EOPNOTSUPP)); 811 } 812 813 /* 814 * We don't allow recursive attributes.. 815 * Maybe someday we will. 816 */ 817 if (zdp->z_pflags & ZFS_XATTR) { 818 ZFS_EXIT(zfsvfs); 819 return (SET_ERROR(EINVAL)); 820 } 821 822 if ((error = zfs_get_xattrdir(VTOZ(dvp), &zp, cr, flags))) { 823 ZFS_EXIT(zfsvfs); 824 return (error); 825 } 826 *vpp = ZTOV(zp); 827 828 /* 829 * Do we have permission to get into attribute directory? 830 */ 831 error = zfs_zaccess(zp, ACE_EXECUTE, 0, B_FALSE, cr); 832 if (error) { 833 vrele(ZTOV(zp)); 834 } 835 836 ZFS_EXIT(zfsvfs); 837 return (error); 838 } 839 840 /* 841 * Check accessibility of directory if we're not coming in via 842 * VOP_CACHEDLOOKUP. 843 */ 844 if (!cached) { 845 #ifdef NOEXECCHECK 846 if ((cnp->cn_flags & NOEXECCHECK) != 0) { 847 cnp->cn_flags &= ~NOEXECCHECK; 848 } else 849 #endif 850 if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) { 851 ZFS_EXIT(zfsvfs); 852 return (error); 853 } 854 } 855 856 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 857 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 858 ZFS_EXIT(zfsvfs); 859 return (SET_ERROR(EILSEQ)); 860 } 861 862 863 /* 864 * First handle the special cases. 865 */ 866 if ((cnp->cn_flags & ISDOTDOT) != 0) { 867 /* 868 * If we are a snapshot mounted under .zfs, return 869 * the vp for the snapshot directory. 870 */ 871 if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 872 struct componentname cn; 873 vnode_t *zfsctl_vp; 874 int ltype; 875 876 ZFS_EXIT(zfsvfs); 877 ltype = VOP_ISLOCKED(dvp); 878 VOP_UNLOCK1(dvp); 879 error = zfsctl_root(zfsvfs->z_parent, LK_SHARED, 880 &zfsctl_vp); 881 if (error == 0) { 882 cn.cn_nameptr = "snapshot"; 883 cn.cn_namelen = strlen(cn.cn_nameptr); 884 cn.cn_nameiop = cnp->cn_nameiop; 885 cn.cn_flags = cnp->cn_flags & ~ISDOTDOT; 886 cn.cn_lkflags = cnp->cn_lkflags; 887 error = VOP_LOOKUP(zfsctl_vp, vpp, &cn); 888 vput(zfsctl_vp); 889 } 890 vn_lock(dvp, ltype | LK_RETRY); 891 return (error); 892 } 893 } 894 if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) { 895 ZFS_EXIT(zfsvfs); 896 if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP) 897 return (SET_ERROR(ENOTSUP)); 898 error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp); 899 return (error); 900 } 901 902 /* 903 * The loop is retry the lookup if the parent-child relationship 904 * changes during the dot-dot locking complexities. 905 */ 906 for (;;) { 907 uint64_t parent; 908 909 error = zfs_dirlook(zdp, nm, &zp); 910 if (error == 0) 911 *vpp = ZTOV(zp); 912 913 ZFS_EXIT(zfsvfs); 914 if (error != 0) 915 break; 916 917 error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags); 918 if (error != 0) { 919 /* 920 * If we've got a locking error, then the vnode 921 * got reclaimed because of a force unmount. 922 * We never enter doomed vnodes into the name cache. 923 */ 924 *vpp = NULL; 925 return (error); 926 } 927 928 if ((cnp->cn_flags & ISDOTDOT) == 0) 929 break; 930 931 ZFS_ENTER(zfsvfs); 932 if (zdp->z_sa_hdl == NULL) { 933 error = SET_ERROR(EIO); 934 } else { 935 error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 936 &parent, sizeof (parent)); 937 } 938 if (error != 0) { 939 ZFS_EXIT(zfsvfs); 940 vput(ZTOV(zp)); 941 break; 942 } 943 if (zp->z_id == parent) { 944 ZFS_EXIT(zfsvfs); 945 break; 946 } 947 vput(ZTOV(zp)); 948 } 949 950 if (error != 0) 951 *vpp = NULL; 952 953 /* Translate errors and add SAVENAME when needed. */ 954 if (cnp->cn_flags & ISLASTCN) { 955 switch (nameiop) { 956 case CREATE: 957 case RENAME: 958 if (error == ENOENT) { 959 error = EJUSTRETURN; 960 cnp->cn_flags |= SAVENAME; 961 break; 962 } 963 fallthrough; 964 case DELETE: 965 if (error == 0) 966 cnp->cn_flags |= SAVENAME; 967 break; 968 } 969 } 970 971 #if __FreeBSD_version > 1300124 972 if ((cnp->cn_flags & ISDOTDOT) != 0) { 973 /* 974 * FIXME: zfs_lookup_lock relocks vnodes and does nothing to 975 * handle races. In particular different callers may end up 976 * with different vnodes and will try to add conflicting 977 * entries to the namecache. 978 * 979 * While finding different result may be acceptable in face 980 * of concurrent modification, adding conflicting entries 981 * trips over an assert in the namecache. 982 * 983 * Ultimately let an entry through once everything settles. 984 */ 985 if (!vn_seqc_consistent(dvp, dvp_seqc)) { 986 cnp->cn_flags &= ~MAKEENTRY; 987 } 988 } 989 #endif 990 991 /* Insert name into cache (as non-existent) if appropriate. */ 992 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay && 993 error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0) 994 cache_enter(dvp, NULL, cnp); 995 996 /* Insert name into cache if appropriate. */ 997 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay && 998 error == 0 && (cnp->cn_flags & MAKEENTRY)) { 999 if (!(cnp->cn_flags & ISLASTCN) || 1000 (nameiop != DELETE && nameiop != RENAME)) { 1001 cache_enter(dvp, *vpp, cnp); 1002 } 1003 } 1004 1005 return (error); 1006 } 1007 1008 /* 1009 * Attempt to create a new entry in a directory. If the entry 1010 * already exists, truncate the file if permissible, else return 1011 * an error. Return the vp of the created or trunc'd file. 1012 * 1013 * IN: dvp - vnode of directory to put new file entry in. 1014 * name - name of new file entry. 1015 * vap - attributes of new file. 1016 * excl - flag indicating exclusive or non-exclusive mode. 1017 * mode - mode to open file with. 1018 * cr - credentials of caller. 1019 * flag - large file flag [UNUSED]. 1020 * ct - caller context 1021 * vsecp - ACL to be set 1022 * 1023 * OUT: vpp - vnode of created or trunc'd entry. 1024 * 1025 * RETURN: 0 on success, error code on failure. 1026 * 1027 * Timestamps: 1028 * dvp - ctime|mtime updated if new entry created 1029 * vp - ctime|mtime always, atime if new 1030 */ 1031 1032 /* ARGSUSED */ 1033 int 1034 zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode, 1035 znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp) 1036 { 1037 znode_t *zp; 1038 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1039 zilog_t *zilog; 1040 objset_t *os; 1041 dmu_tx_t *tx; 1042 int error; 1043 ksid_t *ksid; 1044 uid_t uid; 1045 gid_t gid = crgetgid(cr); 1046 uint64_t projid = ZFS_DEFAULT_PROJID; 1047 zfs_acl_ids_t acl_ids; 1048 boolean_t fuid_dirtied; 1049 uint64_t txtype; 1050 #ifdef DEBUG_VFS_LOCKS 1051 vnode_t *dvp = ZTOV(dzp); 1052 #endif 1053 1054 /* 1055 * If we have an ephemeral id, ACL, or XVATTR then 1056 * make sure file system is at proper version 1057 */ 1058 1059 ksid = crgetsid(cr, KSID_OWNER); 1060 if (ksid) 1061 uid = ksid_getid(ksid); 1062 else 1063 uid = crgetuid(cr); 1064 1065 if (zfsvfs->z_use_fuids == B_FALSE && 1066 (vsecp || (vap->va_mask & AT_XVATTR) || 1067 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1068 return (SET_ERROR(EINVAL)); 1069 1070 ZFS_ENTER(zfsvfs); 1071 ZFS_VERIFY_ZP(dzp); 1072 os = zfsvfs->z_os; 1073 zilog = zfsvfs->z_log; 1074 1075 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1076 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1077 ZFS_EXIT(zfsvfs); 1078 return (SET_ERROR(EILSEQ)); 1079 } 1080 1081 if (vap->va_mask & AT_XVATTR) { 1082 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap, 1083 crgetuid(cr), cr, vap->va_type)) != 0) { 1084 ZFS_EXIT(zfsvfs); 1085 return (error); 1086 } 1087 } 1088 1089 *zpp = NULL; 1090 1091 if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1092 vap->va_mode &= ~S_ISVTX; 1093 1094 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 1095 if (error) { 1096 ZFS_EXIT(zfsvfs); 1097 return (error); 1098 } 1099 ASSERT3P(zp, ==, NULL); 1100 1101 /* 1102 * Create a new file object and update the directory 1103 * to reference it. 1104 */ 1105 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) { 1106 goto out; 1107 } 1108 1109 /* 1110 * We only support the creation of regular files in 1111 * extended attribute directories. 1112 */ 1113 1114 if ((dzp->z_pflags & ZFS_XATTR) && 1115 (vap->va_type != VREG)) { 1116 error = SET_ERROR(EINVAL); 1117 goto out; 1118 } 1119 1120 if ((error = zfs_acl_ids_create(dzp, 0, vap, 1121 cr, vsecp, &acl_ids)) != 0) 1122 goto out; 1123 1124 if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode)) 1125 projid = zfs_inherit_projid(dzp); 1126 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) { 1127 zfs_acl_ids_free(&acl_ids); 1128 error = SET_ERROR(EDQUOT); 1129 goto out; 1130 } 1131 1132 getnewvnode_reserve_(); 1133 1134 tx = dmu_tx_create(os); 1135 1136 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1137 ZFS_SA_BASE_ATTR_SIZE); 1138 1139 fuid_dirtied = zfsvfs->z_fuid_dirty; 1140 if (fuid_dirtied) 1141 zfs_fuid_txhold(zfsvfs, tx); 1142 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1143 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 1144 if (!zfsvfs->z_use_sa && 1145 acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1146 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1147 0, acl_ids.z_aclp->z_acl_bytes); 1148 } 1149 error = dmu_tx_assign(tx, TXG_WAIT); 1150 if (error) { 1151 zfs_acl_ids_free(&acl_ids); 1152 dmu_tx_abort(tx); 1153 getnewvnode_drop_reserve(); 1154 ZFS_EXIT(zfsvfs); 1155 return (error); 1156 } 1157 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1158 if (fuid_dirtied) 1159 zfs_fuid_sync(zfsvfs, tx); 1160 1161 (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 1162 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1163 zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1164 vsecp, acl_ids.z_fuidp, vap); 1165 zfs_acl_ids_free(&acl_ids); 1166 dmu_tx_commit(tx); 1167 1168 getnewvnode_drop_reserve(); 1169 1170 out: 1171 VNCHECKREF(dvp); 1172 if (error == 0) { 1173 *zpp = zp; 1174 } 1175 1176 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1177 zil_commit(zilog, 0); 1178 1179 ZFS_EXIT(zfsvfs); 1180 return (error); 1181 } 1182 1183 /* 1184 * Remove an entry from a directory. 1185 * 1186 * IN: dvp - vnode of directory to remove entry from. 1187 * name - name of entry to remove. 1188 * cr - credentials of caller. 1189 * ct - caller context 1190 * flags - case flags 1191 * 1192 * RETURN: 0 on success, error code on failure. 1193 * 1194 * Timestamps: 1195 * dvp - ctime|mtime 1196 * vp - ctime (if nlink > 0) 1197 */ 1198 1199 /*ARGSUSED*/ 1200 static int 1201 zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr) 1202 { 1203 znode_t *dzp = VTOZ(dvp); 1204 znode_t *zp; 1205 znode_t *xzp; 1206 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1207 zilog_t *zilog; 1208 uint64_t xattr_obj; 1209 uint64_t obj = 0; 1210 dmu_tx_t *tx; 1211 boolean_t unlinked; 1212 uint64_t txtype; 1213 int error; 1214 1215 1216 ZFS_ENTER(zfsvfs); 1217 ZFS_VERIFY_ZP(dzp); 1218 zp = VTOZ(vp); 1219 ZFS_VERIFY_ZP(zp); 1220 zilog = zfsvfs->z_log; 1221 1222 xattr_obj = 0; 1223 xzp = NULL; 1224 1225 if ((error = zfs_zaccess_delete(dzp, zp, cr))) { 1226 goto out; 1227 } 1228 1229 /* 1230 * Need to use rmdir for removing directories. 1231 */ 1232 if (vp->v_type == VDIR) { 1233 error = SET_ERROR(EPERM); 1234 goto out; 1235 } 1236 1237 vnevent_remove(vp, dvp, name, ct); 1238 1239 obj = zp->z_id; 1240 1241 /* are there any extended attributes? */ 1242 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1243 &xattr_obj, sizeof (xattr_obj)); 1244 if (error == 0 && xattr_obj) { 1245 error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1246 ASSERT0(error); 1247 } 1248 1249 /* 1250 * We may delete the znode now, or we may put it in the unlinked set; 1251 * it depends on whether we're the last link, and on whether there are 1252 * other holds on the vnode. So we dmu_tx_hold() the right things to 1253 * allow for either case. 1254 */ 1255 tx = dmu_tx_create(zfsvfs->z_os); 1256 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1257 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1258 zfs_sa_upgrade_txholds(tx, zp); 1259 zfs_sa_upgrade_txholds(tx, dzp); 1260 1261 if (xzp) { 1262 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 1263 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1264 } 1265 1266 /* charge as an update -- would be nice not to charge at all */ 1267 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1268 1269 /* 1270 * Mark this transaction as typically resulting in a net free of space 1271 */ 1272 dmu_tx_mark_netfree(tx); 1273 1274 error = dmu_tx_assign(tx, TXG_WAIT); 1275 if (error) { 1276 dmu_tx_abort(tx); 1277 ZFS_EXIT(zfsvfs); 1278 return (error); 1279 } 1280 1281 /* 1282 * Remove the directory entry. 1283 */ 1284 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked); 1285 1286 if (error) { 1287 dmu_tx_commit(tx); 1288 goto out; 1289 } 1290 1291 if (unlinked) { 1292 zfs_unlinked_add(zp, tx); 1293 vp->v_vflag |= VV_NOSYNC; 1294 } 1295 /* XXX check changes to linux vnops */ 1296 txtype = TX_REMOVE; 1297 zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked); 1298 1299 dmu_tx_commit(tx); 1300 out: 1301 1302 if (xzp) 1303 vrele(ZTOV(xzp)); 1304 1305 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1306 zil_commit(zilog, 0); 1307 1308 1309 ZFS_EXIT(zfsvfs); 1310 return (error); 1311 } 1312 1313 1314 static int 1315 zfs_lookup_internal(znode_t *dzp, const char *name, vnode_t **vpp, 1316 struct componentname *cnp, int nameiop) 1317 { 1318 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1319 int error; 1320 1321 cnp->cn_nameptr = __DECONST(char *, name); 1322 cnp->cn_namelen = strlen(name); 1323 cnp->cn_nameiop = nameiop; 1324 cnp->cn_flags = ISLASTCN | SAVENAME; 1325 cnp->cn_lkflags = LK_EXCLUSIVE | LK_RETRY; 1326 cnp->cn_cred = kcred; 1327 #if __FreeBSD_version < 1400037 1328 cnp->cn_thread = curthread; 1329 #endif 1330 1331 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay) { 1332 struct vop_lookup_args a; 1333 1334 a.a_gen.a_desc = &vop_lookup_desc; 1335 a.a_dvp = ZTOV(dzp); 1336 a.a_vpp = vpp; 1337 a.a_cnp = cnp; 1338 error = vfs_cache_lookup(&a); 1339 } else { 1340 error = zfs_lookup(ZTOV(dzp), name, vpp, cnp, nameiop, kcred, 0, 1341 B_FALSE); 1342 } 1343 #ifdef ZFS_DEBUG 1344 if (error) { 1345 printf("got error %d on name %s on op %d\n", error, name, 1346 nameiop); 1347 kdb_backtrace(); 1348 } 1349 #endif 1350 return (error); 1351 } 1352 1353 int 1354 zfs_remove(znode_t *dzp, const char *name, cred_t *cr, int flags) 1355 { 1356 vnode_t *vp; 1357 int error; 1358 struct componentname cn; 1359 1360 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE))) 1361 return (error); 1362 1363 error = zfs_remove_(ZTOV(dzp), vp, name, cr); 1364 vput(vp); 1365 return (error); 1366 } 1367 /* 1368 * Create a new directory and insert it into dvp using the name 1369 * provided. Return a pointer to the inserted directory. 1370 * 1371 * IN: dvp - vnode of directory to add subdir to. 1372 * dirname - name of new directory. 1373 * vap - attributes of new directory. 1374 * cr - credentials of caller. 1375 * ct - caller context 1376 * flags - case flags 1377 * vsecp - ACL to be set 1378 * 1379 * OUT: vpp - vnode of created directory. 1380 * 1381 * RETURN: 0 on success, error code on failure. 1382 * 1383 * Timestamps: 1384 * dvp - ctime|mtime updated 1385 * vp - ctime|mtime|atime updated 1386 */ 1387 /*ARGSUSED*/ 1388 int 1389 zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp, 1390 cred_t *cr, int flags, vsecattr_t *vsecp) 1391 { 1392 znode_t *zp; 1393 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1394 zilog_t *zilog; 1395 uint64_t txtype; 1396 dmu_tx_t *tx; 1397 int error; 1398 ksid_t *ksid; 1399 uid_t uid; 1400 gid_t gid = crgetgid(cr); 1401 zfs_acl_ids_t acl_ids; 1402 boolean_t fuid_dirtied; 1403 1404 ASSERT3U(vap->va_type, ==, VDIR); 1405 1406 /* 1407 * If we have an ephemeral id, ACL, or XVATTR then 1408 * make sure file system is at proper version 1409 */ 1410 1411 ksid = crgetsid(cr, KSID_OWNER); 1412 if (ksid) 1413 uid = ksid_getid(ksid); 1414 else 1415 uid = crgetuid(cr); 1416 if (zfsvfs->z_use_fuids == B_FALSE && 1417 ((vap->va_mask & AT_XVATTR) || 1418 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1419 return (SET_ERROR(EINVAL)); 1420 1421 ZFS_ENTER(zfsvfs); 1422 ZFS_VERIFY_ZP(dzp); 1423 zilog = zfsvfs->z_log; 1424 1425 if (dzp->z_pflags & ZFS_XATTR) { 1426 ZFS_EXIT(zfsvfs); 1427 return (SET_ERROR(EINVAL)); 1428 } 1429 1430 if (zfsvfs->z_utf8 && u8_validate(dirname, 1431 strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1432 ZFS_EXIT(zfsvfs); 1433 return (SET_ERROR(EILSEQ)); 1434 } 1435 1436 if (vap->va_mask & AT_XVATTR) { 1437 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap, 1438 crgetuid(cr), cr, vap->va_type)) != 0) { 1439 ZFS_EXIT(zfsvfs); 1440 return (error); 1441 } 1442 } 1443 1444 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 1445 NULL, &acl_ids)) != 0) { 1446 ZFS_EXIT(zfsvfs); 1447 return (error); 1448 } 1449 1450 /* 1451 * First make sure the new directory doesn't exist. 1452 * 1453 * Existence is checked first to make sure we don't return 1454 * EACCES instead of EEXIST which can cause some applications 1455 * to fail. 1456 */ 1457 *zpp = NULL; 1458 1459 if ((error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW))) { 1460 zfs_acl_ids_free(&acl_ids); 1461 ZFS_EXIT(zfsvfs); 1462 return (error); 1463 } 1464 ASSERT3P(zp, ==, NULL); 1465 1466 if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) { 1467 zfs_acl_ids_free(&acl_ids); 1468 ZFS_EXIT(zfsvfs); 1469 return (error); 1470 } 1471 1472 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) { 1473 zfs_acl_ids_free(&acl_ids); 1474 ZFS_EXIT(zfsvfs); 1475 return (SET_ERROR(EDQUOT)); 1476 } 1477 1478 /* 1479 * Add a new entry to the directory. 1480 */ 1481 getnewvnode_reserve_(); 1482 tx = dmu_tx_create(zfsvfs->z_os); 1483 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 1484 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 1485 fuid_dirtied = zfsvfs->z_fuid_dirty; 1486 if (fuid_dirtied) 1487 zfs_fuid_txhold(zfsvfs, tx); 1488 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1489 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 1490 acl_ids.z_aclp->z_acl_bytes); 1491 } 1492 1493 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1494 ZFS_SA_BASE_ATTR_SIZE); 1495 1496 error = dmu_tx_assign(tx, TXG_WAIT); 1497 if (error) { 1498 zfs_acl_ids_free(&acl_ids); 1499 dmu_tx_abort(tx); 1500 getnewvnode_drop_reserve(); 1501 ZFS_EXIT(zfsvfs); 1502 return (error); 1503 } 1504 1505 /* 1506 * Create new node. 1507 */ 1508 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1509 1510 if (fuid_dirtied) 1511 zfs_fuid_sync(zfsvfs, tx); 1512 1513 /* 1514 * Now put new name in parent dir. 1515 */ 1516 (void) zfs_link_create(dzp, dirname, zp, tx, ZNEW); 1517 1518 *zpp = zp; 1519 1520 txtype = zfs_log_create_txtype(Z_DIR, NULL, vap); 1521 zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL, 1522 acl_ids.z_fuidp, vap); 1523 1524 zfs_acl_ids_free(&acl_ids); 1525 1526 dmu_tx_commit(tx); 1527 1528 getnewvnode_drop_reserve(); 1529 1530 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1531 zil_commit(zilog, 0); 1532 1533 ZFS_EXIT(zfsvfs); 1534 return (0); 1535 } 1536 1537 #if __FreeBSD_version < 1300124 1538 static void 1539 cache_vop_rmdir(struct vnode *dvp, struct vnode *vp) 1540 { 1541 1542 cache_purge(dvp); 1543 cache_purge(vp); 1544 } 1545 #endif 1546 1547 /* 1548 * Remove a directory subdir entry. If the current working 1549 * directory is the same as the subdir to be removed, the 1550 * remove will fail. 1551 * 1552 * IN: dvp - vnode of directory to remove from. 1553 * name - name of directory to be removed. 1554 * cwd - vnode of current working directory. 1555 * cr - credentials of caller. 1556 * ct - caller context 1557 * flags - case flags 1558 * 1559 * RETURN: 0 on success, error code on failure. 1560 * 1561 * Timestamps: 1562 * dvp - ctime|mtime updated 1563 */ 1564 /*ARGSUSED*/ 1565 static int 1566 zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr) 1567 { 1568 znode_t *dzp = VTOZ(dvp); 1569 znode_t *zp = VTOZ(vp); 1570 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1571 zilog_t *zilog; 1572 dmu_tx_t *tx; 1573 int error; 1574 1575 ZFS_ENTER(zfsvfs); 1576 ZFS_VERIFY_ZP(dzp); 1577 ZFS_VERIFY_ZP(zp); 1578 zilog = zfsvfs->z_log; 1579 1580 1581 if ((error = zfs_zaccess_delete(dzp, zp, cr))) { 1582 goto out; 1583 } 1584 1585 if (vp->v_type != VDIR) { 1586 error = SET_ERROR(ENOTDIR); 1587 goto out; 1588 } 1589 1590 vnevent_rmdir(vp, dvp, name, ct); 1591 1592 tx = dmu_tx_create(zfsvfs->z_os); 1593 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1594 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1595 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1596 zfs_sa_upgrade_txholds(tx, zp); 1597 zfs_sa_upgrade_txholds(tx, dzp); 1598 dmu_tx_mark_netfree(tx); 1599 error = dmu_tx_assign(tx, TXG_WAIT); 1600 if (error) { 1601 dmu_tx_abort(tx); 1602 ZFS_EXIT(zfsvfs); 1603 return (error); 1604 } 1605 1606 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL); 1607 1608 if (error == 0) { 1609 uint64_t txtype = TX_RMDIR; 1610 zfs_log_remove(zilog, tx, txtype, dzp, name, 1611 ZFS_NO_OBJECT, B_FALSE); 1612 } 1613 1614 dmu_tx_commit(tx); 1615 1616 cache_vop_rmdir(dvp, vp); 1617 out: 1618 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1619 zil_commit(zilog, 0); 1620 1621 ZFS_EXIT(zfsvfs); 1622 return (error); 1623 } 1624 1625 int 1626 zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd, cred_t *cr, int flags) 1627 { 1628 struct componentname cn; 1629 vnode_t *vp; 1630 int error; 1631 1632 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE))) 1633 return (error); 1634 1635 error = zfs_rmdir_(ZTOV(dzp), vp, name, cr); 1636 vput(vp); 1637 return (error); 1638 } 1639 1640 /* 1641 * Read as many directory entries as will fit into the provided 1642 * buffer from the given directory cursor position (specified in 1643 * the uio structure). 1644 * 1645 * IN: vp - vnode of directory to read. 1646 * uio - structure supplying read location, range info, 1647 * and return buffer. 1648 * cr - credentials of caller. 1649 * ct - caller context 1650 * flags - case flags 1651 * 1652 * OUT: uio - updated offset and range, buffer filled. 1653 * eofp - set to true if end-of-file detected. 1654 * 1655 * RETURN: 0 on success, error code on failure. 1656 * 1657 * Timestamps: 1658 * vp - atime updated 1659 * 1660 * Note that the low 4 bits of the cookie returned by zap is always zero. 1661 * This allows us to use the low range for "special" directory entries: 1662 * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 1663 * we use the offset 2 for the '.zfs' directory. 1664 */ 1665 /* ARGSUSED */ 1666 static int 1667 zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp, 1668 int *ncookies, ulong_t **cookies) 1669 { 1670 znode_t *zp = VTOZ(vp); 1671 iovec_t *iovp; 1672 edirent_t *eodp; 1673 dirent64_t *odp; 1674 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1675 objset_t *os; 1676 caddr_t outbuf; 1677 size_t bufsize; 1678 zap_cursor_t zc; 1679 zap_attribute_t zap; 1680 uint_t bytes_wanted; 1681 uint64_t offset; /* must be unsigned; checks for < 1 */ 1682 uint64_t parent; 1683 int local_eof; 1684 int outcount; 1685 int error; 1686 uint8_t prefetch; 1687 boolean_t check_sysattrs; 1688 uint8_t type; 1689 int ncooks; 1690 ulong_t *cooks = NULL; 1691 int flags = 0; 1692 1693 ZFS_ENTER(zfsvfs); 1694 ZFS_VERIFY_ZP(zp); 1695 1696 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 1697 &parent, sizeof (parent))) != 0) { 1698 ZFS_EXIT(zfsvfs); 1699 return (error); 1700 } 1701 1702 /* 1703 * If we are not given an eof variable, 1704 * use a local one. 1705 */ 1706 if (eofp == NULL) 1707 eofp = &local_eof; 1708 1709 /* 1710 * Check for valid iov_len. 1711 */ 1712 if (GET_UIO_STRUCT(uio)->uio_iov->iov_len <= 0) { 1713 ZFS_EXIT(zfsvfs); 1714 return (SET_ERROR(EINVAL)); 1715 } 1716 1717 /* 1718 * Quit if directory has been removed (posix) 1719 */ 1720 if ((*eofp = zp->z_unlinked) != 0) { 1721 ZFS_EXIT(zfsvfs); 1722 return (0); 1723 } 1724 1725 error = 0; 1726 os = zfsvfs->z_os; 1727 offset = zfs_uio_offset(uio); 1728 prefetch = zp->z_zn_prefetch; 1729 1730 /* 1731 * Initialize the iterator cursor. 1732 */ 1733 if (offset <= 3) { 1734 /* 1735 * Start iteration from the beginning of the directory. 1736 */ 1737 zap_cursor_init(&zc, os, zp->z_id); 1738 } else { 1739 /* 1740 * The offset is a serialized cursor. 1741 */ 1742 zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 1743 } 1744 1745 /* 1746 * Get space to change directory entries into fs independent format. 1747 */ 1748 iovp = GET_UIO_STRUCT(uio)->uio_iov; 1749 bytes_wanted = iovp->iov_len; 1750 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) { 1751 bufsize = bytes_wanted; 1752 outbuf = kmem_alloc(bufsize, KM_SLEEP); 1753 odp = (struct dirent64 *)outbuf; 1754 } else { 1755 bufsize = bytes_wanted; 1756 outbuf = NULL; 1757 odp = (struct dirent64 *)iovp->iov_base; 1758 } 1759 eodp = (struct edirent *)odp; 1760 1761 if (ncookies != NULL) { 1762 /* 1763 * Minimum entry size is dirent size and 1 byte for a file name. 1764 */ 1765 ncooks = zfs_uio_resid(uio) / (sizeof (struct dirent) - 1766 sizeof (((struct dirent *)NULL)->d_name) + 1); 1767 cooks = malloc(ncooks * sizeof (ulong_t), M_TEMP, M_WAITOK); 1768 *cookies = cooks; 1769 *ncookies = ncooks; 1770 } 1771 /* 1772 * If this VFS supports the system attribute view interface; and 1773 * we're looking at an extended attribute directory; and we care 1774 * about normalization conflicts on this vfs; then we must check 1775 * for normalization conflicts with the sysattr name space. 1776 */ 1777 #ifdef TODO 1778 check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 1779 (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 1780 (flags & V_RDDIR_ENTFLAGS); 1781 #else 1782 check_sysattrs = 0; 1783 #endif 1784 1785 /* 1786 * Transform to file-system independent format 1787 */ 1788 outcount = 0; 1789 while (outcount < bytes_wanted) { 1790 ino64_t objnum; 1791 ushort_t reclen; 1792 off64_t *next = NULL; 1793 1794 /* 1795 * Special case `.', `..', and `.zfs'. 1796 */ 1797 if (offset == 0) { 1798 (void) strcpy(zap.za_name, "."); 1799 zap.za_normalization_conflict = 0; 1800 objnum = zp->z_id; 1801 type = DT_DIR; 1802 } else if (offset == 1) { 1803 (void) strcpy(zap.za_name, ".."); 1804 zap.za_normalization_conflict = 0; 1805 objnum = parent; 1806 type = DT_DIR; 1807 } else if (offset == 2 && zfs_show_ctldir(zp)) { 1808 (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 1809 zap.za_normalization_conflict = 0; 1810 objnum = ZFSCTL_INO_ROOT; 1811 type = DT_DIR; 1812 } else { 1813 /* 1814 * Grab next entry. 1815 */ 1816 if ((error = zap_cursor_retrieve(&zc, &zap))) { 1817 if ((*eofp = (error == ENOENT)) != 0) 1818 break; 1819 else 1820 goto update; 1821 } 1822 1823 if (zap.za_integer_length != 8 || 1824 zap.za_num_integers != 1) { 1825 cmn_err(CE_WARN, "zap_readdir: bad directory " 1826 "entry, obj = %lld, offset = %lld\n", 1827 (u_longlong_t)zp->z_id, 1828 (u_longlong_t)offset); 1829 error = SET_ERROR(ENXIO); 1830 goto update; 1831 } 1832 1833 objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 1834 /* 1835 * MacOS X can extract the object type here such as: 1836 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 1837 */ 1838 type = ZFS_DIRENT_TYPE(zap.za_first_integer); 1839 1840 if (check_sysattrs && !zap.za_normalization_conflict) { 1841 #ifdef TODO 1842 zap.za_normalization_conflict = 1843 xattr_sysattr_casechk(zap.za_name); 1844 #else 1845 panic("%s:%u: TODO", __func__, __LINE__); 1846 #endif 1847 } 1848 } 1849 1850 if (flags & V_RDDIR_ACCFILTER) { 1851 /* 1852 * If we have no access at all, don't include 1853 * this entry in the returned information 1854 */ 1855 znode_t *ezp; 1856 if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 1857 goto skip_entry; 1858 if (!zfs_has_access(ezp, cr)) { 1859 vrele(ZTOV(ezp)); 1860 goto skip_entry; 1861 } 1862 vrele(ZTOV(ezp)); 1863 } 1864 1865 if (flags & V_RDDIR_ENTFLAGS) 1866 reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 1867 else 1868 reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 1869 1870 /* 1871 * Will this entry fit in the buffer? 1872 */ 1873 if (outcount + reclen > bufsize) { 1874 /* 1875 * Did we manage to fit anything in the buffer? 1876 */ 1877 if (!outcount) { 1878 error = SET_ERROR(EINVAL); 1879 goto update; 1880 } 1881 break; 1882 } 1883 if (flags & V_RDDIR_ENTFLAGS) { 1884 /* 1885 * Add extended flag entry: 1886 */ 1887 eodp->ed_ino = objnum; 1888 eodp->ed_reclen = reclen; 1889 /* NOTE: ed_off is the offset for the *next* entry */ 1890 next = &(eodp->ed_off); 1891 eodp->ed_eflags = zap.za_normalization_conflict ? 1892 ED_CASE_CONFLICT : 0; 1893 (void) strncpy(eodp->ed_name, zap.za_name, 1894 EDIRENT_NAMELEN(reclen)); 1895 eodp = (edirent_t *)((intptr_t)eodp + reclen); 1896 } else { 1897 /* 1898 * Add normal entry: 1899 */ 1900 odp->d_ino = objnum; 1901 odp->d_reclen = reclen; 1902 odp->d_namlen = strlen(zap.za_name); 1903 /* NOTE: d_off is the offset for the *next* entry. */ 1904 next = &odp->d_off; 1905 strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 1906 odp->d_type = type; 1907 dirent_terminate(odp); 1908 odp = (dirent64_t *)((intptr_t)odp + reclen); 1909 } 1910 outcount += reclen; 1911 1912 ASSERT3S(outcount, <=, bufsize); 1913 1914 /* Prefetch znode */ 1915 if (prefetch) 1916 dmu_prefetch(os, objnum, 0, 0, 0, 1917 ZIO_PRIORITY_SYNC_READ); 1918 1919 skip_entry: 1920 /* 1921 * Move to the next entry, fill in the previous offset. 1922 */ 1923 if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 1924 zap_cursor_advance(&zc); 1925 offset = zap_cursor_serialize(&zc); 1926 } else { 1927 offset += 1; 1928 } 1929 1930 /* Fill the offset right after advancing the cursor. */ 1931 if (next != NULL) 1932 *next = offset; 1933 if (cooks != NULL) { 1934 *cooks++ = offset; 1935 ncooks--; 1936 KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 1937 } 1938 } 1939 zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 1940 1941 /* Subtract unused cookies */ 1942 if (ncookies != NULL) 1943 *ncookies -= ncooks; 1944 1945 if (zfs_uio_segflg(uio) == UIO_SYSSPACE && zfs_uio_iovcnt(uio) == 1) { 1946 iovp->iov_base += outcount; 1947 iovp->iov_len -= outcount; 1948 zfs_uio_resid(uio) -= outcount; 1949 } else if ((error = 1950 zfs_uiomove(outbuf, (long)outcount, UIO_READ, uio))) { 1951 /* 1952 * Reset the pointer. 1953 */ 1954 offset = zfs_uio_offset(uio); 1955 } 1956 1957 update: 1958 zap_cursor_fini(&zc); 1959 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) 1960 kmem_free(outbuf, bufsize); 1961 1962 if (error == ENOENT) 1963 error = 0; 1964 1965 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 1966 1967 zfs_uio_setoffset(uio, offset); 1968 ZFS_EXIT(zfsvfs); 1969 if (error != 0 && cookies != NULL) { 1970 free(*cookies, M_TEMP); 1971 *cookies = NULL; 1972 *ncookies = 0; 1973 } 1974 return (error); 1975 } 1976 1977 /* 1978 * Get the requested file attributes and place them in the provided 1979 * vattr structure. 1980 * 1981 * IN: vp - vnode of file. 1982 * vap - va_mask identifies requested attributes. 1983 * If AT_XVATTR set, then optional attrs are requested 1984 * flags - ATTR_NOACLCHECK (CIFS server context) 1985 * cr - credentials of caller. 1986 * 1987 * OUT: vap - attribute values. 1988 * 1989 * RETURN: 0 (always succeeds). 1990 */ 1991 /* ARGSUSED */ 1992 static int 1993 zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr) 1994 { 1995 znode_t *zp = VTOZ(vp); 1996 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1997 int error = 0; 1998 uint32_t blksize; 1999 u_longlong_t nblocks; 2000 uint64_t mtime[2], ctime[2], crtime[2], rdev; 2001 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2002 xoptattr_t *xoap = NULL; 2003 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2004 sa_bulk_attr_t bulk[4]; 2005 int count = 0; 2006 2007 ZFS_ENTER(zfsvfs); 2008 ZFS_VERIFY_ZP(zp); 2009 2010 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2011 2012 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 2013 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 2014 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); 2015 if (vp->v_type == VBLK || vp->v_type == VCHR) 2016 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 2017 &rdev, 8); 2018 2019 if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 2020 ZFS_EXIT(zfsvfs); 2021 return (error); 2022 } 2023 2024 /* 2025 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2026 * Also, if we are the owner don't bother, since owner should 2027 * always be allowed to read basic attributes of file. 2028 */ 2029 if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2030 (vap->va_uid != crgetuid(cr))) { 2031 if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2032 skipaclchk, cr))) { 2033 ZFS_EXIT(zfsvfs); 2034 return (error); 2035 } 2036 } 2037 2038 /* 2039 * Return all attributes. It's cheaper to provide the answer 2040 * than to determine whether we were asked the question. 2041 */ 2042 2043 vap->va_type = IFTOVT(zp->z_mode); 2044 vap->va_mode = zp->z_mode & ~S_IFMT; 2045 vn_fsid(vp, vap); 2046 vap->va_nodeid = zp->z_id; 2047 vap->va_nlink = zp->z_links; 2048 if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp) && 2049 zp->z_links < ZFS_LINK_MAX) 2050 vap->va_nlink++; 2051 vap->va_size = zp->z_size; 2052 if (vp->v_type == VBLK || vp->v_type == VCHR) 2053 vap->va_rdev = zfs_cmpldev(rdev); 2054 vap->va_seq = zp->z_seq; 2055 vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2056 vap->va_filerev = zp->z_seq; 2057 2058 /* 2059 * Add in any requested optional attributes and the create time. 2060 * Also set the corresponding bits in the returned attribute bitmap. 2061 */ 2062 if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2063 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2064 xoap->xoa_archive = 2065 ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2066 XVA_SET_RTN(xvap, XAT_ARCHIVE); 2067 } 2068 2069 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2070 xoap->xoa_readonly = 2071 ((zp->z_pflags & ZFS_READONLY) != 0); 2072 XVA_SET_RTN(xvap, XAT_READONLY); 2073 } 2074 2075 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2076 xoap->xoa_system = 2077 ((zp->z_pflags & ZFS_SYSTEM) != 0); 2078 XVA_SET_RTN(xvap, XAT_SYSTEM); 2079 } 2080 2081 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2082 xoap->xoa_hidden = 2083 ((zp->z_pflags & ZFS_HIDDEN) != 0); 2084 XVA_SET_RTN(xvap, XAT_HIDDEN); 2085 } 2086 2087 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2088 xoap->xoa_nounlink = 2089 ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2090 XVA_SET_RTN(xvap, XAT_NOUNLINK); 2091 } 2092 2093 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2094 xoap->xoa_immutable = 2095 ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2096 XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2097 } 2098 2099 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2100 xoap->xoa_appendonly = 2101 ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2102 XVA_SET_RTN(xvap, XAT_APPENDONLY); 2103 } 2104 2105 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2106 xoap->xoa_nodump = 2107 ((zp->z_pflags & ZFS_NODUMP) != 0); 2108 XVA_SET_RTN(xvap, XAT_NODUMP); 2109 } 2110 2111 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2112 xoap->xoa_opaque = 2113 ((zp->z_pflags & ZFS_OPAQUE) != 0); 2114 XVA_SET_RTN(xvap, XAT_OPAQUE); 2115 } 2116 2117 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2118 xoap->xoa_av_quarantined = 2119 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2120 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2121 } 2122 2123 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2124 xoap->xoa_av_modified = 2125 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2126 XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2127 } 2128 2129 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2130 vp->v_type == VREG) { 2131 zfs_sa_get_scanstamp(zp, xvap); 2132 } 2133 2134 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2135 xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 2136 XVA_SET_RTN(xvap, XAT_REPARSE); 2137 } 2138 if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 2139 xoap->xoa_generation = zp->z_gen; 2140 XVA_SET_RTN(xvap, XAT_GEN); 2141 } 2142 2143 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2144 xoap->xoa_offline = 2145 ((zp->z_pflags & ZFS_OFFLINE) != 0); 2146 XVA_SET_RTN(xvap, XAT_OFFLINE); 2147 } 2148 2149 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2150 xoap->xoa_sparse = 2151 ((zp->z_pflags & ZFS_SPARSE) != 0); 2152 XVA_SET_RTN(xvap, XAT_SPARSE); 2153 } 2154 2155 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) { 2156 xoap->xoa_projinherit = 2157 ((zp->z_pflags & ZFS_PROJINHERIT) != 0); 2158 XVA_SET_RTN(xvap, XAT_PROJINHERIT); 2159 } 2160 2161 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) { 2162 xoap->xoa_projid = zp->z_projid; 2163 XVA_SET_RTN(xvap, XAT_PROJID); 2164 } 2165 } 2166 2167 ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 2168 ZFS_TIME_DECODE(&vap->va_mtime, mtime); 2169 ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2170 ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 2171 2172 2173 sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 2174 vap->va_blksize = blksize; 2175 vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2176 2177 if (zp->z_blksz == 0) { 2178 /* 2179 * Block size hasn't been set; suggest maximal I/O transfers. 2180 */ 2181 vap->va_blksize = zfsvfs->z_max_blksz; 2182 } 2183 2184 ZFS_EXIT(zfsvfs); 2185 return (0); 2186 } 2187 2188 /* 2189 * Set the file attributes to the values contained in the 2190 * vattr structure. 2191 * 2192 * IN: zp - znode of file to be modified. 2193 * vap - new attribute values. 2194 * If AT_XVATTR set, then optional attrs are being set 2195 * flags - ATTR_UTIME set if non-default time values provided. 2196 * - ATTR_NOACLCHECK (CIFS context only). 2197 * cr - credentials of caller. 2198 * ct - caller context 2199 * 2200 * RETURN: 0 on success, error code on failure. 2201 * 2202 * Timestamps: 2203 * vp - ctime updated, mtime updated if size changed. 2204 */ 2205 /* ARGSUSED */ 2206 int 2207 zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr) 2208 { 2209 vnode_t *vp = ZTOV(zp); 2210 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2211 objset_t *os; 2212 zilog_t *zilog; 2213 dmu_tx_t *tx; 2214 vattr_t oldva; 2215 xvattr_t tmpxvattr; 2216 uint_t mask = vap->va_mask; 2217 uint_t saved_mask = 0; 2218 uint64_t saved_mode; 2219 int trim_mask = 0; 2220 uint64_t new_mode; 2221 uint64_t new_uid, new_gid; 2222 uint64_t xattr_obj; 2223 uint64_t mtime[2], ctime[2]; 2224 uint64_t projid = ZFS_INVALID_PROJID; 2225 znode_t *attrzp; 2226 int need_policy = FALSE; 2227 int err, err2; 2228 zfs_fuid_info_t *fuidp = NULL; 2229 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2230 xoptattr_t *xoap; 2231 zfs_acl_t *aclp; 2232 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2233 boolean_t fuid_dirtied = B_FALSE; 2234 sa_bulk_attr_t bulk[7], xattr_bulk[7]; 2235 int count = 0, xattr_count = 0; 2236 2237 if (mask == 0) 2238 return (0); 2239 2240 if (mask & AT_NOSET) 2241 return (SET_ERROR(EINVAL)); 2242 2243 ZFS_ENTER(zfsvfs); 2244 ZFS_VERIFY_ZP(zp); 2245 2246 os = zfsvfs->z_os; 2247 zilog = zfsvfs->z_log; 2248 2249 /* 2250 * Make sure that if we have ephemeral uid/gid or xvattr specified 2251 * that file system is at proper version level 2252 */ 2253 2254 if (zfsvfs->z_use_fuids == B_FALSE && 2255 (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2256 ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 2257 (mask & AT_XVATTR))) { 2258 ZFS_EXIT(zfsvfs); 2259 return (SET_ERROR(EINVAL)); 2260 } 2261 2262 if (mask & AT_SIZE && vp->v_type == VDIR) { 2263 ZFS_EXIT(zfsvfs); 2264 return (SET_ERROR(EISDIR)); 2265 } 2266 2267 if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 2268 ZFS_EXIT(zfsvfs); 2269 return (SET_ERROR(EINVAL)); 2270 } 2271 2272 /* 2273 * If this is an xvattr_t, then get a pointer to the structure of 2274 * optional attributes. If this is NULL, then we have a vattr_t. 2275 */ 2276 xoap = xva_getxoptattr(xvap); 2277 2278 xva_init(&tmpxvattr); 2279 2280 /* 2281 * Immutable files can only alter immutable bit and atime 2282 */ 2283 if ((zp->z_pflags & ZFS_IMMUTABLE) && 2284 ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 2285 ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 2286 ZFS_EXIT(zfsvfs); 2287 return (SET_ERROR(EPERM)); 2288 } 2289 2290 /* 2291 * Note: ZFS_READONLY is handled in zfs_zaccess_common. 2292 */ 2293 2294 /* 2295 * Verify timestamps doesn't overflow 32 bits. 2296 * ZFS can handle large timestamps, but 32bit syscalls can't 2297 * handle times greater than 2039. This check should be removed 2298 * once large timestamps are fully supported. 2299 */ 2300 if (mask & (AT_ATIME | AT_MTIME)) { 2301 if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 2302 ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 2303 ZFS_EXIT(zfsvfs); 2304 return (SET_ERROR(EOVERFLOW)); 2305 } 2306 } 2307 if (xoap != NULL && (mask & AT_XVATTR)) { 2308 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME) && 2309 TIMESPEC_OVERFLOW(&vap->va_birthtime)) { 2310 ZFS_EXIT(zfsvfs); 2311 return (SET_ERROR(EOVERFLOW)); 2312 } 2313 2314 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) { 2315 if (!dmu_objset_projectquota_enabled(os) || 2316 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode))) { 2317 ZFS_EXIT(zfsvfs); 2318 return (SET_ERROR(EOPNOTSUPP)); 2319 } 2320 2321 projid = xoap->xoa_projid; 2322 if (unlikely(projid == ZFS_INVALID_PROJID)) { 2323 ZFS_EXIT(zfsvfs); 2324 return (SET_ERROR(EINVAL)); 2325 } 2326 2327 if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID) 2328 projid = ZFS_INVALID_PROJID; 2329 else 2330 need_policy = TRUE; 2331 } 2332 2333 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) && 2334 (xoap->xoa_projinherit != 2335 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) && 2336 (!dmu_objset_projectquota_enabled(os) || 2337 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode)))) { 2338 ZFS_EXIT(zfsvfs); 2339 return (SET_ERROR(EOPNOTSUPP)); 2340 } 2341 } 2342 2343 attrzp = NULL; 2344 aclp = NULL; 2345 2346 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2347 ZFS_EXIT(zfsvfs); 2348 return (SET_ERROR(EROFS)); 2349 } 2350 2351 /* 2352 * First validate permissions 2353 */ 2354 2355 if (mask & AT_SIZE) { 2356 /* 2357 * XXX - Note, we are not providing any open 2358 * mode flags here (like FNDELAY), so we may 2359 * block if there are locks present... this 2360 * should be addressed in openat(). 2361 */ 2362 /* XXX - would it be OK to generate a log record here? */ 2363 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 2364 if (err) { 2365 ZFS_EXIT(zfsvfs); 2366 return (err); 2367 } 2368 } 2369 2370 if (mask & (AT_ATIME|AT_MTIME) || 2371 ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 2372 XVA_ISSET_REQ(xvap, XAT_READONLY) || 2373 XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 2374 XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 2375 XVA_ISSET_REQ(xvap, XAT_SPARSE) || 2376 XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 2377 XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 2378 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 2379 skipaclchk, cr); 2380 } 2381 2382 if (mask & (AT_UID|AT_GID)) { 2383 int idmask = (mask & (AT_UID|AT_GID)); 2384 int take_owner; 2385 int take_group; 2386 2387 /* 2388 * NOTE: even if a new mode is being set, 2389 * we may clear S_ISUID/S_ISGID bits. 2390 */ 2391 2392 if (!(mask & AT_MODE)) 2393 vap->va_mode = zp->z_mode; 2394 2395 /* 2396 * Take ownership or chgrp to group we are a member of 2397 */ 2398 2399 take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 2400 take_group = (mask & AT_GID) && 2401 zfs_groupmember(zfsvfs, vap->va_gid, cr); 2402 2403 /* 2404 * If both AT_UID and AT_GID are set then take_owner and 2405 * take_group must both be set in order to allow taking 2406 * ownership. 2407 * 2408 * Otherwise, send the check through secpolicy_vnode_setattr() 2409 * 2410 */ 2411 2412 if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 2413 ((idmask == AT_UID) && take_owner) || 2414 ((idmask == AT_GID) && take_group)) { 2415 if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 2416 skipaclchk, cr) == 0) { 2417 /* 2418 * Remove setuid/setgid for non-privileged users 2419 */ 2420 secpolicy_setid_clear(vap, vp, cr); 2421 trim_mask = (mask & (AT_UID|AT_GID)); 2422 } else { 2423 need_policy = TRUE; 2424 } 2425 } else { 2426 need_policy = TRUE; 2427 } 2428 } 2429 2430 oldva.va_mode = zp->z_mode; 2431 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 2432 if (mask & AT_XVATTR) { 2433 /* 2434 * Update xvattr mask to include only those attributes 2435 * that are actually changing. 2436 * 2437 * the bits will be restored prior to actually setting 2438 * the attributes so the caller thinks they were set. 2439 */ 2440 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2441 if (xoap->xoa_appendonly != 2442 ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 2443 need_policy = TRUE; 2444 } else { 2445 XVA_CLR_REQ(xvap, XAT_APPENDONLY); 2446 XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 2447 } 2448 } 2449 2450 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) { 2451 if (xoap->xoa_projinherit != 2452 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) { 2453 need_policy = TRUE; 2454 } else { 2455 XVA_CLR_REQ(xvap, XAT_PROJINHERIT); 2456 XVA_SET_REQ(&tmpxvattr, XAT_PROJINHERIT); 2457 } 2458 } 2459 2460 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2461 if (xoap->xoa_nounlink != 2462 ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 2463 need_policy = TRUE; 2464 } else { 2465 XVA_CLR_REQ(xvap, XAT_NOUNLINK); 2466 XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 2467 } 2468 } 2469 2470 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2471 if (xoap->xoa_immutable != 2472 ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 2473 need_policy = TRUE; 2474 } else { 2475 XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 2476 XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 2477 } 2478 } 2479 2480 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2481 if (xoap->xoa_nodump != 2482 ((zp->z_pflags & ZFS_NODUMP) != 0)) { 2483 need_policy = TRUE; 2484 } else { 2485 XVA_CLR_REQ(xvap, XAT_NODUMP); 2486 XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 2487 } 2488 } 2489 2490 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2491 if (xoap->xoa_av_modified != 2492 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 2493 need_policy = TRUE; 2494 } else { 2495 XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 2496 XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 2497 } 2498 } 2499 2500 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2501 if ((vp->v_type != VREG && 2502 xoap->xoa_av_quarantined) || 2503 xoap->xoa_av_quarantined != 2504 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 2505 need_policy = TRUE; 2506 } else { 2507 XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 2508 XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 2509 } 2510 } 2511 2512 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2513 ZFS_EXIT(zfsvfs); 2514 return (SET_ERROR(EPERM)); 2515 } 2516 2517 if (need_policy == FALSE && 2518 (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 2519 XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 2520 need_policy = TRUE; 2521 } 2522 } 2523 2524 if (mask & AT_MODE) { 2525 if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 2526 err = secpolicy_setid_setsticky_clear(vp, vap, 2527 &oldva, cr); 2528 if (err) { 2529 ZFS_EXIT(zfsvfs); 2530 return (err); 2531 } 2532 trim_mask |= AT_MODE; 2533 } else { 2534 need_policy = TRUE; 2535 } 2536 } 2537 2538 if (need_policy) { 2539 /* 2540 * If trim_mask is set then take ownership 2541 * has been granted or write_acl is present and user 2542 * has the ability to modify mode. In that case remove 2543 * UID|GID and or MODE from mask so that 2544 * secpolicy_vnode_setattr() doesn't revoke it. 2545 */ 2546 2547 if (trim_mask) { 2548 saved_mask = vap->va_mask; 2549 vap->va_mask &= ~trim_mask; 2550 if (trim_mask & AT_MODE) { 2551 /* 2552 * Save the mode, as secpolicy_vnode_setattr() 2553 * will overwrite it with ova.va_mode. 2554 */ 2555 saved_mode = vap->va_mode; 2556 } 2557 } 2558 err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 2559 (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 2560 if (err) { 2561 ZFS_EXIT(zfsvfs); 2562 return (err); 2563 } 2564 2565 if (trim_mask) { 2566 vap->va_mask |= saved_mask; 2567 if (trim_mask & AT_MODE) { 2568 /* 2569 * Recover the mode after 2570 * secpolicy_vnode_setattr(). 2571 */ 2572 vap->va_mode = saved_mode; 2573 } 2574 } 2575 } 2576 2577 /* 2578 * secpolicy_vnode_setattr, or take ownership may have 2579 * changed va_mask 2580 */ 2581 mask = vap->va_mask; 2582 2583 if ((mask & (AT_UID | AT_GID)) || projid != ZFS_INVALID_PROJID) { 2584 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 2585 &xattr_obj, sizeof (xattr_obj)); 2586 2587 if (err == 0 && xattr_obj) { 2588 err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 2589 if (err == 0) { 2590 err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE); 2591 if (err != 0) 2592 vrele(ZTOV(attrzp)); 2593 } 2594 if (err) 2595 goto out2; 2596 } 2597 if (mask & AT_UID) { 2598 new_uid = zfs_fuid_create(zfsvfs, 2599 (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 2600 if (new_uid != zp->z_uid && 2601 zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT, 2602 new_uid)) { 2603 if (attrzp) 2604 vput(ZTOV(attrzp)); 2605 err = SET_ERROR(EDQUOT); 2606 goto out2; 2607 } 2608 } 2609 2610 if (mask & AT_GID) { 2611 new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 2612 cr, ZFS_GROUP, &fuidp); 2613 if (new_gid != zp->z_gid && 2614 zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT, 2615 new_gid)) { 2616 if (attrzp) 2617 vput(ZTOV(attrzp)); 2618 err = SET_ERROR(EDQUOT); 2619 goto out2; 2620 } 2621 } 2622 2623 if (projid != ZFS_INVALID_PROJID && 2624 zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) { 2625 if (attrzp) 2626 vput(ZTOV(attrzp)); 2627 err = SET_ERROR(EDQUOT); 2628 goto out2; 2629 } 2630 } 2631 tx = dmu_tx_create(os); 2632 2633 if (mask & AT_MODE) { 2634 uint64_t pmode = zp->z_mode; 2635 uint64_t acl_obj; 2636 new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 2637 2638 if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 2639 !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 2640 err = SET_ERROR(EPERM); 2641 goto out; 2642 } 2643 2644 if ((err = zfs_acl_chmod_setattr(zp, &aclp, new_mode))) 2645 goto out; 2646 2647 if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 2648 /* 2649 * Are we upgrading ACL from old V0 format 2650 * to V1 format? 2651 */ 2652 if (zfsvfs->z_version >= ZPL_VERSION_FUID && 2653 zfs_znode_acl_version(zp) == 2654 ZFS_ACL_VERSION_INITIAL) { 2655 dmu_tx_hold_free(tx, acl_obj, 0, 2656 DMU_OBJECT_END); 2657 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 2658 0, aclp->z_acl_bytes); 2659 } else { 2660 dmu_tx_hold_write(tx, acl_obj, 0, 2661 aclp->z_acl_bytes); 2662 } 2663 } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2664 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 2665 0, aclp->z_acl_bytes); 2666 } 2667 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 2668 } else { 2669 if (((mask & AT_XVATTR) && 2670 XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) || 2671 (projid != ZFS_INVALID_PROJID && 2672 !(zp->z_pflags & ZFS_PROJID))) 2673 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 2674 else 2675 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2676 } 2677 2678 if (attrzp) { 2679 dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 2680 } 2681 2682 fuid_dirtied = zfsvfs->z_fuid_dirty; 2683 if (fuid_dirtied) 2684 zfs_fuid_txhold(zfsvfs, tx); 2685 2686 zfs_sa_upgrade_txholds(tx, zp); 2687 2688 err = dmu_tx_assign(tx, TXG_WAIT); 2689 if (err) 2690 goto out; 2691 2692 count = 0; 2693 /* 2694 * Set each attribute requested. 2695 * We group settings according to the locks they need to acquire. 2696 * 2697 * Note: you cannot set ctime directly, although it will be 2698 * updated as a side-effect of calling this function. 2699 */ 2700 2701 if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) { 2702 /* 2703 * For the existed object that is upgraded from old system, 2704 * its on-disk layout has no slot for the project ID attribute. 2705 * But quota accounting logic needs to access related slots by 2706 * offset directly. So we need to adjust old objects' layout 2707 * to make the project ID to some unified and fixed offset. 2708 */ 2709 if (attrzp) 2710 err = sa_add_projid(attrzp->z_sa_hdl, tx, projid); 2711 if (err == 0) 2712 err = sa_add_projid(zp->z_sa_hdl, tx, projid); 2713 2714 if (unlikely(err == EEXIST)) 2715 err = 0; 2716 else if (err != 0) 2717 goto out; 2718 else 2719 projid = ZFS_INVALID_PROJID; 2720 } 2721 2722 if (mask & (AT_UID|AT_GID|AT_MODE)) 2723 mutex_enter(&zp->z_acl_lock); 2724 2725 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 2726 &zp->z_pflags, sizeof (zp->z_pflags)); 2727 2728 if (attrzp) { 2729 if (mask & (AT_UID|AT_GID|AT_MODE)) 2730 mutex_enter(&attrzp->z_acl_lock); 2731 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2732 SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 2733 sizeof (attrzp->z_pflags)); 2734 if (projid != ZFS_INVALID_PROJID) { 2735 attrzp->z_projid = projid; 2736 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2737 SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid, 2738 sizeof (attrzp->z_projid)); 2739 } 2740 } 2741 2742 if (mask & (AT_UID|AT_GID)) { 2743 2744 if (mask & AT_UID) { 2745 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 2746 &new_uid, sizeof (new_uid)); 2747 zp->z_uid = new_uid; 2748 if (attrzp) { 2749 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2750 SA_ZPL_UID(zfsvfs), NULL, &new_uid, 2751 sizeof (new_uid)); 2752 attrzp->z_uid = new_uid; 2753 } 2754 } 2755 2756 if (mask & AT_GID) { 2757 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 2758 NULL, &new_gid, sizeof (new_gid)); 2759 zp->z_gid = new_gid; 2760 if (attrzp) { 2761 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2762 SA_ZPL_GID(zfsvfs), NULL, &new_gid, 2763 sizeof (new_gid)); 2764 attrzp->z_gid = new_gid; 2765 } 2766 } 2767 if (!(mask & AT_MODE)) { 2768 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 2769 NULL, &new_mode, sizeof (new_mode)); 2770 new_mode = zp->z_mode; 2771 } 2772 err = zfs_acl_chown_setattr(zp); 2773 ASSERT0(err); 2774 if (attrzp) { 2775 vn_seqc_write_begin(ZTOV(attrzp)); 2776 err = zfs_acl_chown_setattr(attrzp); 2777 vn_seqc_write_end(ZTOV(attrzp)); 2778 ASSERT0(err); 2779 } 2780 } 2781 2782 if (mask & AT_MODE) { 2783 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 2784 &new_mode, sizeof (new_mode)); 2785 zp->z_mode = new_mode; 2786 ASSERT3P(aclp, !=, NULL); 2787 err = zfs_aclset_common(zp, aclp, cr, tx); 2788 ASSERT0(err); 2789 if (zp->z_acl_cached) 2790 zfs_acl_free(zp->z_acl_cached); 2791 zp->z_acl_cached = aclp; 2792 aclp = NULL; 2793 } 2794 2795 2796 if (mask & AT_ATIME) { 2797 ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 2798 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 2799 &zp->z_atime, sizeof (zp->z_atime)); 2800 } 2801 2802 if (mask & AT_MTIME) { 2803 ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 2804 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 2805 mtime, sizeof (mtime)); 2806 } 2807 2808 if (projid != ZFS_INVALID_PROJID) { 2809 zp->z_projid = projid; 2810 SA_ADD_BULK_ATTR(bulk, count, 2811 SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid, 2812 sizeof (zp->z_projid)); 2813 } 2814 2815 /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 2816 if (mask & AT_SIZE && !(mask & AT_MTIME)) { 2817 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 2818 NULL, mtime, sizeof (mtime)); 2819 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 2820 &ctime, sizeof (ctime)); 2821 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); 2822 } else if (mask != 0) { 2823 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 2824 &ctime, sizeof (ctime)); 2825 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime); 2826 if (attrzp) { 2827 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2828 SA_ZPL_CTIME(zfsvfs), NULL, 2829 &ctime, sizeof (ctime)); 2830 zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 2831 mtime, ctime); 2832 } 2833 } 2834 2835 /* 2836 * Do this after setting timestamps to prevent timestamp 2837 * update from toggling bit 2838 */ 2839 2840 if (xoap && (mask & AT_XVATTR)) { 2841 2842 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) 2843 xoap->xoa_createtime = vap->va_birthtime; 2844 /* 2845 * restore trimmed off masks 2846 * so that return masks can be set for caller. 2847 */ 2848 2849 if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 2850 XVA_SET_REQ(xvap, XAT_APPENDONLY); 2851 } 2852 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 2853 XVA_SET_REQ(xvap, XAT_NOUNLINK); 2854 } 2855 if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 2856 XVA_SET_REQ(xvap, XAT_IMMUTABLE); 2857 } 2858 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 2859 XVA_SET_REQ(xvap, XAT_NODUMP); 2860 } 2861 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 2862 XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 2863 } 2864 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 2865 XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 2866 } 2867 if (XVA_ISSET_REQ(&tmpxvattr, XAT_PROJINHERIT)) { 2868 XVA_SET_REQ(xvap, XAT_PROJINHERIT); 2869 } 2870 2871 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 2872 ASSERT3S(vp->v_type, ==, VREG); 2873 2874 zfs_xvattr_set(zp, xvap, tx); 2875 } 2876 2877 if (fuid_dirtied) 2878 zfs_fuid_sync(zfsvfs, tx); 2879 2880 if (mask != 0) 2881 zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 2882 2883 if (mask & (AT_UID|AT_GID|AT_MODE)) 2884 mutex_exit(&zp->z_acl_lock); 2885 2886 if (attrzp) { 2887 if (mask & (AT_UID|AT_GID|AT_MODE)) 2888 mutex_exit(&attrzp->z_acl_lock); 2889 } 2890 out: 2891 if (err == 0 && attrzp) { 2892 err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 2893 xattr_count, tx); 2894 ASSERT0(err2); 2895 } 2896 2897 if (attrzp) 2898 vput(ZTOV(attrzp)); 2899 2900 if (aclp) 2901 zfs_acl_free(aclp); 2902 2903 if (fuidp) { 2904 zfs_fuid_info_free(fuidp); 2905 fuidp = NULL; 2906 } 2907 2908 if (err) { 2909 dmu_tx_abort(tx); 2910 } else { 2911 err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 2912 dmu_tx_commit(tx); 2913 } 2914 2915 out2: 2916 if (os->os_sync == ZFS_SYNC_ALWAYS) 2917 zil_commit(zilog, 0); 2918 2919 ZFS_EXIT(zfsvfs); 2920 return (err); 2921 } 2922 2923 /* 2924 * We acquire all but fdvp locks using non-blocking acquisitions. If we 2925 * fail to acquire any lock in the path we will drop all held locks, 2926 * acquire the new lock in a blocking fashion, and then release it and 2927 * restart the rename. This acquire/release step ensures that we do not 2928 * spin on a lock waiting for release. On error release all vnode locks 2929 * and decrement references the way tmpfs_rename() would do. 2930 */ 2931 static int 2932 zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp, 2933 struct vnode *tdvp, struct vnode **tvpp, 2934 const struct componentname *scnp, const struct componentname *tcnp) 2935 { 2936 zfsvfs_t *zfsvfs; 2937 struct vnode *nvp, *svp, *tvp; 2938 znode_t *sdzp, *tdzp, *szp, *tzp; 2939 const char *snm = scnp->cn_nameptr; 2940 const char *tnm = tcnp->cn_nameptr; 2941 int error; 2942 2943 VOP_UNLOCK1(tdvp); 2944 if (*tvpp != NULL && *tvpp != tdvp) 2945 VOP_UNLOCK1(*tvpp); 2946 2947 relock: 2948 error = vn_lock(sdvp, LK_EXCLUSIVE); 2949 if (error) 2950 goto out; 2951 sdzp = VTOZ(sdvp); 2952 2953 error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT); 2954 if (error != 0) { 2955 VOP_UNLOCK1(sdvp); 2956 if (error != EBUSY) 2957 goto out; 2958 error = vn_lock(tdvp, LK_EXCLUSIVE); 2959 if (error) 2960 goto out; 2961 VOP_UNLOCK1(tdvp); 2962 goto relock; 2963 } 2964 tdzp = VTOZ(tdvp); 2965 2966 /* 2967 * Before using sdzp and tdzp we must ensure that they are live. 2968 * As a porting legacy from illumos we have two things to worry 2969 * about. One is typical for FreeBSD and it is that the vnode is 2970 * not reclaimed (doomed). The other is that the znode is live. 2971 * The current code can invalidate the znode without acquiring the 2972 * corresponding vnode lock if the object represented by the znode 2973 * and vnode is no longer valid after a rollback or receive operation. 2974 * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock 2975 * that protects the znodes from the invalidation. 2976 */ 2977 zfsvfs = sdzp->z_zfsvfs; 2978 ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs); 2979 ZFS_ENTER(zfsvfs); 2980 2981 /* 2982 * We can not use ZFS_VERIFY_ZP() here because it could directly return 2983 * bypassing the cleanup code in the case of an error. 2984 */ 2985 if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 2986 ZFS_EXIT(zfsvfs); 2987 VOP_UNLOCK1(sdvp); 2988 VOP_UNLOCK1(tdvp); 2989 error = SET_ERROR(EIO); 2990 goto out; 2991 } 2992 2993 /* 2994 * Re-resolve svp to be certain it still exists and fetch the 2995 * correct vnode. 2996 */ 2997 error = zfs_dirent_lookup(sdzp, snm, &szp, ZEXISTS); 2998 if (error != 0) { 2999 /* Source entry invalid or not there. */ 3000 ZFS_EXIT(zfsvfs); 3001 VOP_UNLOCK1(sdvp); 3002 VOP_UNLOCK1(tdvp); 3003 if ((scnp->cn_flags & ISDOTDOT) != 0 || 3004 (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.')) 3005 error = SET_ERROR(EINVAL); 3006 goto out; 3007 } 3008 svp = ZTOV(szp); 3009 3010 /* 3011 * Re-resolve tvp, if it disappeared we just carry on. 3012 */ 3013 error = zfs_dirent_lookup(tdzp, tnm, &tzp, 0); 3014 if (error != 0) { 3015 ZFS_EXIT(zfsvfs); 3016 VOP_UNLOCK1(sdvp); 3017 VOP_UNLOCK1(tdvp); 3018 vrele(svp); 3019 if ((tcnp->cn_flags & ISDOTDOT) != 0) 3020 error = SET_ERROR(EINVAL); 3021 goto out; 3022 } 3023 if (tzp != NULL) 3024 tvp = ZTOV(tzp); 3025 else 3026 tvp = NULL; 3027 3028 /* 3029 * At present the vnode locks must be acquired before z_teardown_lock, 3030 * although it would be more logical to use the opposite order. 3031 */ 3032 ZFS_EXIT(zfsvfs); 3033 3034 /* 3035 * Now try acquire locks on svp and tvp. 3036 */ 3037 nvp = svp; 3038 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 3039 if (error != 0) { 3040 VOP_UNLOCK1(sdvp); 3041 VOP_UNLOCK1(tdvp); 3042 if (tvp != NULL) 3043 vrele(tvp); 3044 if (error != EBUSY) { 3045 vrele(nvp); 3046 goto out; 3047 } 3048 error = vn_lock(nvp, LK_EXCLUSIVE); 3049 if (error != 0) { 3050 vrele(nvp); 3051 goto out; 3052 } 3053 VOP_UNLOCK1(nvp); 3054 /* 3055 * Concurrent rename race. 3056 * XXX ? 3057 */ 3058 if (nvp == tdvp) { 3059 vrele(nvp); 3060 error = SET_ERROR(EINVAL); 3061 goto out; 3062 } 3063 vrele(*svpp); 3064 *svpp = nvp; 3065 goto relock; 3066 } 3067 vrele(*svpp); 3068 *svpp = nvp; 3069 3070 if (*tvpp != NULL) 3071 vrele(*tvpp); 3072 *tvpp = NULL; 3073 if (tvp != NULL) { 3074 nvp = tvp; 3075 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 3076 if (error != 0) { 3077 VOP_UNLOCK1(sdvp); 3078 VOP_UNLOCK1(tdvp); 3079 VOP_UNLOCK1(*svpp); 3080 if (error != EBUSY) { 3081 vrele(nvp); 3082 goto out; 3083 } 3084 error = vn_lock(nvp, LK_EXCLUSIVE); 3085 if (error != 0) { 3086 vrele(nvp); 3087 goto out; 3088 } 3089 vput(nvp); 3090 goto relock; 3091 } 3092 *tvpp = nvp; 3093 } 3094 3095 return (0); 3096 3097 out: 3098 return (error); 3099 } 3100 3101 /* 3102 * Note that we must use VRELE_ASYNC in this function as it walks 3103 * up the directory tree and vrele may need to acquire an exclusive 3104 * lock if a last reference to a vnode is dropped. 3105 */ 3106 static int 3107 zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp) 3108 { 3109 zfsvfs_t *zfsvfs; 3110 znode_t *zp, *zp1; 3111 uint64_t parent; 3112 int error; 3113 3114 zfsvfs = tdzp->z_zfsvfs; 3115 if (tdzp == szp) 3116 return (SET_ERROR(EINVAL)); 3117 if (tdzp == sdzp) 3118 return (0); 3119 if (tdzp->z_id == zfsvfs->z_root) 3120 return (0); 3121 zp = tdzp; 3122 for (;;) { 3123 ASSERT(!zp->z_unlinked); 3124 if ((error = sa_lookup(zp->z_sa_hdl, 3125 SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) 3126 break; 3127 3128 if (parent == szp->z_id) { 3129 error = SET_ERROR(EINVAL); 3130 break; 3131 } 3132 if (parent == zfsvfs->z_root) 3133 break; 3134 if (parent == sdzp->z_id) 3135 break; 3136 3137 error = zfs_zget(zfsvfs, parent, &zp1); 3138 if (error != 0) 3139 break; 3140 3141 if (zp != tdzp) 3142 VN_RELE_ASYNC(ZTOV(zp), 3143 dsl_pool_zrele_taskq( 3144 dmu_objset_pool(zfsvfs->z_os))); 3145 zp = zp1; 3146 } 3147 3148 if (error == ENOTDIR) 3149 panic("checkpath: .. not a directory\n"); 3150 if (zp != tdzp) 3151 VN_RELE_ASYNC(ZTOV(zp), 3152 dsl_pool_zrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 3153 return (error); 3154 } 3155 3156 #if __FreeBSD_version < 1300124 3157 static void 3158 cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp, 3159 struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp) 3160 { 3161 3162 cache_purge(fvp); 3163 if (tvp != NULL) 3164 cache_purge(tvp); 3165 cache_purge_negative(tdvp); 3166 } 3167 #endif 3168 3169 /* 3170 * Move an entry from the provided source directory to the target 3171 * directory. Change the entry name as indicated. 3172 * 3173 * IN: sdvp - Source directory containing the "old entry". 3174 * snm - Old entry name. 3175 * tdvp - Target directory to contain the "new entry". 3176 * tnm - New entry name. 3177 * cr - credentials of caller. 3178 * ct - caller context 3179 * flags - case flags 3180 * 3181 * RETURN: 0 on success, error code on failure. 3182 * 3183 * Timestamps: 3184 * sdvp,tdvp - ctime|mtime updated 3185 */ 3186 /*ARGSUSED*/ 3187 static int 3188 zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, 3189 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, 3190 cred_t *cr, int log) 3191 { 3192 zfsvfs_t *zfsvfs; 3193 znode_t *sdzp, *tdzp, *szp, *tzp; 3194 zilog_t *zilog = NULL; 3195 dmu_tx_t *tx; 3196 const char *snm = scnp->cn_nameptr; 3197 const char *tnm = tcnp->cn_nameptr; 3198 int error = 0; 3199 bool want_seqc_end __maybe_unused = false; 3200 3201 /* Reject renames across filesystems. */ 3202 if ((*svpp)->v_mount != tdvp->v_mount || 3203 ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) { 3204 error = SET_ERROR(EXDEV); 3205 goto out; 3206 } 3207 3208 if (zfsctl_is_node(tdvp)) { 3209 error = SET_ERROR(EXDEV); 3210 goto out; 3211 } 3212 3213 /* 3214 * Lock all four vnodes to ensure safety and semantics of renaming. 3215 */ 3216 error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp); 3217 if (error != 0) { 3218 /* no vnodes are locked in the case of error here */ 3219 return (error); 3220 } 3221 3222 tdzp = VTOZ(tdvp); 3223 sdzp = VTOZ(sdvp); 3224 zfsvfs = tdzp->z_zfsvfs; 3225 zilog = zfsvfs->z_log; 3226 3227 /* 3228 * After we re-enter ZFS_ENTER() we will have to revalidate all 3229 * znodes involved. 3230 */ 3231 ZFS_ENTER(zfsvfs); 3232 3233 if (zfsvfs->z_utf8 && u8_validate(tnm, 3234 strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3235 error = SET_ERROR(EILSEQ); 3236 goto unlockout; 3237 } 3238 3239 /* If source and target are the same file, there is nothing to do. */ 3240 if ((*svpp) == (*tvpp)) { 3241 error = 0; 3242 goto unlockout; 3243 } 3244 3245 if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) || 3246 ((*tvpp) != NULL && (*tvpp)->v_type == VDIR && 3247 (*tvpp)->v_mountedhere != NULL)) { 3248 error = SET_ERROR(EXDEV); 3249 goto unlockout; 3250 } 3251 3252 /* 3253 * We can not use ZFS_VERIFY_ZP() here because it could directly return 3254 * bypassing the cleanup code in the case of an error. 3255 */ 3256 if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 3257 error = SET_ERROR(EIO); 3258 goto unlockout; 3259 } 3260 3261 szp = VTOZ(*svpp); 3262 tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp); 3263 if (szp->z_sa_hdl == NULL || (tzp != NULL && tzp->z_sa_hdl == NULL)) { 3264 error = SET_ERROR(EIO); 3265 goto unlockout; 3266 } 3267 3268 /* 3269 * This is to prevent the creation of links into attribute space 3270 * by renaming a linked file into/outof an attribute directory. 3271 * See the comment in zfs_link() for why this is considered bad. 3272 */ 3273 if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3274 error = SET_ERROR(EINVAL); 3275 goto unlockout; 3276 } 3277 3278 /* 3279 * If we are using project inheritance, means if the directory has 3280 * ZFS_PROJINHERIT set, then its descendant directories will inherit 3281 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under 3282 * such case, we only allow renames into our tree when the project 3283 * IDs are the same. 3284 */ 3285 if (tdzp->z_pflags & ZFS_PROJINHERIT && 3286 tdzp->z_projid != szp->z_projid) { 3287 error = SET_ERROR(EXDEV); 3288 goto unlockout; 3289 } 3290 3291 /* 3292 * Must have write access at the source to remove the old entry 3293 * and write access at the target to create the new entry. 3294 * Note that if target and source are the same, this can be 3295 * done in a single check. 3296 */ 3297 if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr))) 3298 goto unlockout; 3299 3300 if ((*svpp)->v_type == VDIR) { 3301 /* 3302 * Avoid ".", "..", and aliases of "." for obvious reasons. 3303 */ 3304 if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') || 3305 sdzp == szp || 3306 (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { 3307 error = EINVAL; 3308 goto unlockout; 3309 } 3310 3311 /* 3312 * Check to make sure rename is valid. 3313 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3314 */ 3315 if ((error = zfs_rename_check(szp, sdzp, tdzp))) 3316 goto unlockout; 3317 } 3318 3319 /* 3320 * Does target exist? 3321 */ 3322 if (tzp) { 3323 /* 3324 * Source and target must be the same type. 3325 */ 3326 if ((*svpp)->v_type == VDIR) { 3327 if ((*tvpp)->v_type != VDIR) { 3328 error = SET_ERROR(ENOTDIR); 3329 goto unlockout; 3330 } else { 3331 cache_purge(tdvp); 3332 if (sdvp != tdvp) 3333 cache_purge(sdvp); 3334 } 3335 } else { 3336 if ((*tvpp)->v_type == VDIR) { 3337 error = SET_ERROR(EISDIR); 3338 goto unlockout; 3339 } 3340 } 3341 } 3342 3343 vn_seqc_write_begin(*svpp); 3344 vn_seqc_write_begin(sdvp); 3345 if (*tvpp != NULL) 3346 vn_seqc_write_begin(*tvpp); 3347 if (tdvp != *tvpp) 3348 vn_seqc_write_begin(tdvp); 3349 #if __FreeBSD_version >= 1300102 3350 want_seqc_end = true; 3351 #endif 3352 vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct); 3353 if (tzp) 3354 vnevent_rename_dest(*tvpp, tdvp, tnm, ct); 3355 3356 /* 3357 * notify the target directory if it is not the same 3358 * as source directory. 3359 */ 3360 if (tdvp != sdvp) { 3361 vnevent_rename_dest_dir(tdvp, ct); 3362 } 3363 3364 tx = dmu_tx_create(zfsvfs->z_os); 3365 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3366 dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3367 dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3368 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3369 if (sdzp != tdzp) { 3370 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 3371 zfs_sa_upgrade_txholds(tx, tdzp); 3372 } 3373 if (tzp) { 3374 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 3375 zfs_sa_upgrade_txholds(tx, tzp); 3376 } 3377 3378 zfs_sa_upgrade_txholds(tx, szp); 3379 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3380 error = dmu_tx_assign(tx, TXG_WAIT); 3381 if (error) { 3382 dmu_tx_abort(tx); 3383 goto unlockout; 3384 } 3385 3386 3387 if (tzp) /* Attempt to remove the existing target */ 3388 error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL); 3389 3390 if (error == 0) { 3391 error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING); 3392 if (error == 0) { 3393 szp->z_pflags |= ZFS_AV_MODIFIED; 3394 3395 error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 3396 (void *)&szp->z_pflags, sizeof (uint64_t), tx); 3397 ASSERT0(error); 3398 3399 error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING, 3400 NULL); 3401 if (error == 0) { 3402 zfs_log_rename(zilog, tx, TX_RENAME, sdzp, 3403 snm, tdzp, tnm, szp); 3404 3405 /* 3406 * Update path information for the target vnode 3407 */ 3408 vn_renamepath(tdvp, *svpp, tnm, strlen(tnm)); 3409 } else { 3410 /* 3411 * At this point, we have successfully created 3412 * the target name, but have failed to remove 3413 * the source name. Since the create was done 3414 * with the ZRENAMING flag, there are 3415 * complications; for one, the link count is 3416 * wrong. The easiest way to deal with this 3417 * is to remove the newly created target, and 3418 * return the original error. This must 3419 * succeed; fortunately, it is very unlikely to 3420 * fail, since we just created it. 3421 */ 3422 VERIFY0(zfs_link_destroy(tdzp, tnm, szp, tx, 3423 ZRENAMING, NULL)); 3424 } 3425 } 3426 if (error == 0) { 3427 cache_vop_rename(sdvp, *svpp, tdvp, *tvpp, scnp, tcnp); 3428 } 3429 } 3430 3431 dmu_tx_commit(tx); 3432 3433 unlockout: /* all 4 vnodes are locked, ZFS_ENTER called */ 3434 if (want_seqc_end) { 3435 vn_seqc_write_end(*svpp); 3436 vn_seqc_write_end(sdvp); 3437 if (*tvpp != NULL) 3438 vn_seqc_write_end(*tvpp); 3439 if (tdvp != *tvpp) 3440 vn_seqc_write_end(tdvp); 3441 want_seqc_end = false; 3442 } 3443 VOP_UNLOCK1(*svpp); 3444 VOP_UNLOCK1(sdvp); 3445 3446 if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3447 zil_commit(zilog, 0); 3448 ZFS_EXIT(zfsvfs); 3449 3450 out: /* original two vnodes are locked */ 3451 MPASS(!want_seqc_end); 3452 3453 if (*tvpp != NULL) 3454 VOP_UNLOCK1(*tvpp); 3455 if (tdvp != *tvpp) 3456 VOP_UNLOCK1(tdvp); 3457 return (error); 3458 } 3459 3460 int 3461 zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname, 3462 cred_t *cr, int flags) 3463 { 3464 struct componentname scn, tcn; 3465 vnode_t *sdvp, *tdvp; 3466 vnode_t *svp, *tvp; 3467 int error; 3468 svp = tvp = NULL; 3469 3470 sdvp = ZTOV(sdzp); 3471 tdvp = ZTOV(tdzp); 3472 error = zfs_lookup_internal(sdzp, sname, &svp, &scn, DELETE); 3473 if (sdzp->z_zfsvfs->z_replay == B_FALSE) 3474 VOP_UNLOCK1(sdvp); 3475 if (error != 0) 3476 goto fail; 3477 VOP_UNLOCK1(svp); 3478 3479 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 3480 error = zfs_lookup_internal(tdzp, tname, &tvp, &tcn, RENAME); 3481 if (error == EJUSTRETURN) 3482 tvp = NULL; 3483 else if (error != 0) { 3484 VOP_UNLOCK1(tdvp); 3485 goto fail; 3486 } 3487 3488 error = zfs_rename_(sdvp, &svp, &scn, tdvp, &tvp, &tcn, cr, 0); 3489 fail: 3490 if (svp != NULL) 3491 vrele(svp); 3492 if (tvp != NULL) 3493 vrele(tvp); 3494 3495 return (error); 3496 } 3497 3498 /* 3499 * Insert the indicated symbolic reference entry into the directory. 3500 * 3501 * IN: dvp - Directory to contain new symbolic link. 3502 * link - Name for new symlink entry. 3503 * vap - Attributes of new entry. 3504 * cr - credentials of caller. 3505 * ct - caller context 3506 * flags - case flags 3507 * 3508 * RETURN: 0 on success, error code on failure. 3509 * 3510 * Timestamps: 3511 * dvp - ctime|mtime updated 3512 */ 3513 /*ARGSUSED*/ 3514 int 3515 zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap, 3516 const char *link, znode_t **zpp, cred_t *cr, int flags) 3517 { 3518 znode_t *zp; 3519 dmu_tx_t *tx; 3520 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3521 zilog_t *zilog; 3522 uint64_t len = strlen(link); 3523 int error; 3524 zfs_acl_ids_t acl_ids; 3525 boolean_t fuid_dirtied; 3526 uint64_t txtype = TX_SYMLINK; 3527 3528 ASSERT3S(vap->va_type, ==, VLNK); 3529 3530 ZFS_ENTER(zfsvfs); 3531 ZFS_VERIFY_ZP(dzp); 3532 zilog = zfsvfs->z_log; 3533 3534 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 3535 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3536 ZFS_EXIT(zfsvfs); 3537 return (SET_ERROR(EILSEQ)); 3538 } 3539 3540 if (len > MAXPATHLEN) { 3541 ZFS_EXIT(zfsvfs); 3542 return (SET_ERROR(ENAMETOOLONG)); 3543 } 3544 3545 if ((error = zfs_acl_ids_create(dzp, 0, 3546 vap, cr, NULL, &acl_ids)) != 0) { 3547 ZFS_EXIT(zfsvfs); 3548 return (error); 3549 } 3550 3551 /* 3552 * Attempt to lock directory; fail if entry already exists. 3553 */ 3554 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 3555 if (error) { 3556 zfs_acl_ids_free(&acl_ids); 3557 ZFS_EXIT(zfsvfs); 3558 return (error); 3559 } 3560 3561 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) { 3562 zfs_acl_ids_free(&acl_ids); 3563 ZFS_EXIT(zfsvfs); 3564 return (error); 3565 } 3566 3567 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, 3568 0 /* projid */)) { 3569 zfs_acl_ids_free(&acl_ids); 3570 ZFS_EXIT(zfsvfs); 3571 return (SET_ERROR(EDQUOT)); 3572 } 3573 3574 getnewvnode_reserve_(); 3575 tx = dmu_tx_create(zfsvfs->z_os); 3576 fuid_dirtied = zfsvfs->z_fuid_dirty; 3577 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 3578 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 3579 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 3580 ZFS_SA_BASE_ATTR_SIZE + len); 3581 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 3582 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3583 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 3584 acl_ids.z_aclp->z_acl_bytes); 3585 } 3586 if (fuid_dirtied) 3587 zfs_fuid_txhold(zfsvfs, tx); 3588 error = dmu_tx_assign(tx, TXG_WAIT); 3589 if (error) { 3590 zfs_acl_ids_free(&acl_ids); 3591 dmu_tx_abort(tx); 3592 getnewvnode_drop_reserve(); 3593 ZFS_EXIT(zfsvfs); 3594 return (error); 3595 } 3596 3597 /* 3598 * Create a new object for the symlink. 3599 * for version 4 ZPL datasets the symlink will be an SA attribute 3600 */ 3601 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 3602 3603 if (fuid_dirtied) 3604 zfs_fuid_sync(zfsvfs, tx); 3605 3606 if (zp->z_is_sa) 3607 error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 3608 __DECONST(void *, link), len, tx); 3609 else 3610 zfs_sa_symlink(zp, __DECONST(char *, link), len, tx); 3611 3612 zp->z_size = len; 3613 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 3614 &zp->z_size, sizeof (zp->z_size), tx); 3615 /* 3616 * Insert the new object into the directory. 3617 */ 3618 (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 3619 3620 zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 3621 *zpp = zp; 3622 3623 zfs_acl_ids_free(&acl_ids); 3624 3625 dmu_tx_commit(tx); 3626 3627 getnewvnode_drop_reserve(); 3628 3629 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3630 zil_commit(zilog, 0); 3631 3632 ZFS_EXIT(zfsvfs); 3633 return (error); 3634 } 3635 3636 /* 3637 * Return, in the buffer contained in the provided uio structure, 3638 * the symbolic path referred to by vp. 3639 * 3640 * IN: vp - vnode of symbolic link. 3641 * uio - structure to contain the link path. 3642 * cr - credentials of caller. 3643 * ct - caller context 3644 * 3645 * OUT: uio - structure containing the link path. 3646 * 3647 * RETURN: 0 on success, error code on failure. 3648 * 3649 * Timestamps: 3650 * vp - atime updated 3651 */ 3652 /* ARGSUSED */ 3653 static int 3654 zfs_readlink(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, caller_context_t *ct) 3655 { 3656 znode_t *zp = VTOZ(vp); 3657 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3658 int error; 3659 3660 ZFS_ENTER(zfsvfs); 3661 ZFS_VERIFY_ZP(zp); 3662 3663 if (zp->z_is_sa) 3664 error = sa_lookup_uio(zp->z_sa_hdl, 3665 SA_ZPL_SYMLINK(zfsvfs), uio); 3666 else 3667 error = zfs_sa_readlink(zp, uio); 3668 3669 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 3670 3671 ZFS_EXIT(zfsvfs); 3672 return (error); 3673 } 3674 3675 /* 3676 * Insert a new entry into directory tdvp referencing svp. 3677 * 3678 * IN: tdvp - Directory to contain new entry. 3679 * svp - vnode of new entry. 3680 * name - name of new entry. 3681 * cr - credentials of caller. 3682 * 3683 * RETURN: 0 on success, error code on failure. 3684 * 3685 * Timestamps: 3686 * tdvp - ctime|mtime updated 3687 * svp - ctime updated 3688 */ 3689 /* ARGSUSED */ 3690 int 3691 zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr, 3692 int flags) 3693 { 3694 znode_t *tzp; 3695 zfsvfs_t *zfsvfs = tdzp->z_zfsvfs; 3696 zilog_t *zilog; 3697 dmu_tx_t *tx; 3698 int error; 3699 uint64_t parent; 3700 uid_t owner; 3701 3702 ASSERT3S(ZTOV(tdzp)->v_type, ==, VDIR); 3703 3704 ZFS_ENTER(zfsvfs); 3705 ZFS_VERIFY_ZP(tdzp); 3706 zilog = zfsvfs->z_log; 3707 3708 /* 3709 * POSIX dictates that we return EPERM here. 3710 * Better choices include ENOTSUP or EISDIR. 3711 */ 3712 if (ZTOV(szp)->v_type == VDIR) { 3713 ZFS_EXIT(zfsvfs); 3714 return (SET_ERROR(EPERM)); 3715 } 3716 3717 ZFS_VERIFY_ZP(szp); 3718 3719 /* 3720 * If we are using project inheritance, means if the directory has 3721 * ZFS_PROJINHERIT set, then its descendant directories will inherit 3722 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under 3723 * such case, we only allow hard link creation in our tree when the 3724 * project IDs are the same. 3725 */ 3726 if (tdzp->z_pflags & ZFS_PROJINHERIT && 3727 tdzp->z_projid != szp->z_projid) { 3728 ZFS_EXIT(zfsvfs); 3729 return (SET_ERROR(EXDEV)); 3730 } 3731 3732 if (szp->z_pflags & (ZFS_APPENDONLY | 3733 ZFS_IMMUTABLE | ZFS_READONLY)) { 3734 ZFS_EXIT(zfsvfs); 3735 return (SET_ERROR(EPERM)); 3736 } 3737 3738 /* Prevent links to .zfs/shares files */ 3739 3740 if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 3741 &parent, sizeof (uint64_t))) != 0) { 3742 ZFS_EXIT(zfsvfs); 3743 return (error); 3744 } 3745 if (parent == zfsvfs->z_shares_dir) { 3746 ZFS_EXIT(zfsvfs); 3747 return (SET_ERROR(EPERM)); 3748 } 3749 3750 if (zfsvfs->z_utf8 && u8_validate(name, 3751 strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3752 ZFS_EXIT(zfsvfs); 3753 return (SET_ERROR(EILSEQ)); 3754 } 3755 3756 /* 3757 * We do not support links between attributes and non-attributes 3758 * because of the potential security risk of creating links 3759 * into "normal" file space in order to circumvent restrictions 3760 * imposed in attribute space. 3761 */ 3762 if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) { 3763 ZFS_EXIT(zfsvfs); 3764 return (SET_ERROR(EINVAL)); 3765 } 3766 3767 3768 owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 3769 if (owner != crgetuid(cr) && secpolicy_basic_link(ZTOV(szp), cr) != 0) { 3770 ZFS_EXIT(zfsvfs); 3771 return (SET_ERROR(EPERM)); 3772 } 3773 3774 if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr))) { 3775 ZFS_EXIT(zfsvfs); 3776 return (error); 3777 } 3778 3779 /* 3780 * Attempt to lock directory; fail if entry already exists. 3781 */ 3782 error = zfs_dirent_lookup(tdzp, name, &tzp, ZNEW); 3783 if (error) { 3784 ZFS_EXIT(zfsvfs); 3785 return (error); 3786 } 3787 3788 tx = dmu_tx_create(zfsvfs->z_os); 3789 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3790 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name); 3791 zfs_sa_upgrade_txholds(tx, szp); 3792 zfs_sa_upgrade_txholds(tx, tdzp); 3793 error = dmu_tx_assign(tx, TXG_WAIT); 3794 if (error) { 3795 dmu_tx_abort(tx); 3796 ZFS_EXIT(zfsvfs); 3797 return (error); 3798 } 3799 3800 error = zfs_link_create(tdzp, name, szp, tx, 0); 3801 3802 if (error == 0) { 3803 uint64_t txtype = TX_LINK; 3804 zfs_log_link(zilog, tx, txtype, tdzp, szp, name); 3805 } 3806 3807 dmu_tx_commit(tx); 3808 3809 if (error == 0) { 3810 vnevent_link(ZTOV(szp), ct); 3811 } 3812 3813 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3814 zil_commit(zilog, 0); 3815 3816 ZFS_EXIT(zfsvfs); 3817 return (error); 3818 } 3819 3820 /* 3821 * Free or allocate space in a file. Currently, this function only 3822 * supports the `F_FREESP' command. However, this command is somewhat 3823 * misnamed, as its functionality includes the ability to allocate as 3824 * well as free space. 3825 * 3826 * IN: ip - inode of file to free data in. 3827 * cmd - action to take (only F_FREESP supported). 3828 * bfp - section of file to free/alloc. 3829 * flag - current file open mode flags. 3830 * offset - current file offset. 3831 * cr - credentials of caller. 3832 * 3833 * RETURN: 0 on success, error code on failure. 3834 * 3835 * Timestamps: 3836 * ip - ctime|mtime updated 3837 */ 3838 /* ARGSUSED */ 3839 int 3840 zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag, 3841 offset_t offset, cred_t *cr) 3842 { 3843 zfsvfs_t *zfsvfs = ZTOZSB(zp); 3844 uint64_t off, len; 3845 int error; 3846 3847 ZFS_ENTER(zfsvfs); 3848 ZFS_VERIFY_ZP(zp); 3849 3850 if (cmd != F_FREESP) { 3851 ZFS_EXIT(zfsvfs); 3852 return (SET_ERROR(EINVAL)); 3853 } 3854 3855 /* 3856 * Callers might not be able to detect properly that we are read-only, 3857 * so check it explicitly here. 3858 */ 3859 if (zfs_is_readonly(zfsvfs)) { 3860 ZFS_EXIT(zfsvfs); 3861 return (SET_ERROR(EROFS)); 3862 } 3863 3864 if (bfp->l_len < 0) { 3865 ZFS_EXIT(zfsvfs); 3866 return (SET_ERROR(EINVAL)); 3867 } 3868 3869 /* 3870 * Permissions aren't checked on Solaris because on this OS 3871 * zfs_space() can only be called with an opened file handle. 3872 * On Linux we can get here through truncate_range() which 3873 * operates directly on inodes, so we need to check access rights. 3874 */ 3875 if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) { 3876 ZFS_EXIT(zfsvfs); 3877 return (error); 3878 } 3879 3880 off = bfp->l_start; 3881 len = bfp->l_len; /* 0 means from off to end of file */ 3882 3883 error = zfs_freesp(zp, off, len, flag, TRUE); 3884 3885 ZFS_EXIT(zfsvfs); 3886 return (error); 3887 } 3888 3889 /*ARGSUSED*/ 3890 static void 3891 zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 3892 { 3893 znode_t *zp = VTOZ(vp); 3894 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3895 int error; 3896 3897 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs); 3898 if (zp->z_sa_hdl == NULL) { 3899 /* 3900 * The fs has been unmounted, or we did a 3901 * suspend/resume and this file no longer exists. 3902 */ 3903 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 3904 vrecycle(vp); 3905 return; 3906 } 3907 3908 if (zp->z_unlinked) { 3909 /* 3910 * Fast path to recycle a vnode of a removed file. 3911 */ 3912 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 3913 vrecycle(vp); 3914 return; 3915 } 3916 3917 if (zp->z_atime_dirty && zp->z_unlinked == 0) { 3918 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 3919 3920 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3921 zfs_sa_upgrade_txholds(tx, zp); 3922 error = dmu_tx_assign(tx, TXG_WAIT); 3923 if (error) { 3924 dmu_tx_abort(tx); 3925 } else { 3926 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 3927 (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 3928 zp->z_atime_dirty = 0; 3929 dmu_tx_commit(tx); 3930 } 3931 } 3932 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 3933 } 3934 3935 3936 CTASSERT(sizeof (struct zfid_short) <= sizeof (struct fid)); 3937 CTASSERT(sizeof (struct zfid_long) <= sizeof (struct fid)); 3938 3939 /*ARGSUSED*/ 3940 static int 3941 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 3942 { 3943 znode_t *zp = VTOZ(vp); 3944 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3945 uint32_t gen; 3946 uint64_t gen64; 3947 uint64_t object = zp->z_id; 3948 zfid_short_t *zfid; 3949 int size, i, error; 3950 3951 ZFS_ENTER(zfsvfs); 3952 ZFS_VERIFY_ZP(zp); 3953 3954 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 3955 &gen64, sizeof (uint64_t))) != 0) { 3956 ZFS_EXIT(zfsvfs); 3957 return (error); 3958 } 3959 3960 gen = (uint32_t)gen64; 3961 3962 size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 3963 fidp->fid_len = size; 3964 3965 zfid = (zfid_short_t *)fidp; 3966 3967 zfid->zf_len = size; 3968 3969 for (i = 0; i < sizeof (zfid->zf_object); i++) 3970 zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 3971 3972 /* Must have a non-zero generation number to distinguish from .zfs */ 3973 if (gen == 0) 3974 gen = 1; 3975 for (i = 0; i < sizeof (zfid->zf_gen); i++) 3976 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 3977 3978 if (size == LONG_FID_LEN) { 3979 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 3980 zfid_long_t *zlfid; 3981 3982 zlfid = (zfid_long_t *)fidp; 3983 3984 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 3985 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 3986 3987 /* XXX - this should be the generation number for the objset */ 3988 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 3989 zlfid->zf_setgen[i] = 0; 3990 } 3991 3992 ZFS_EXIT(zfsvfs); 3993 return (0); 3994 } 3995 3996 static int 3997 zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 3998 caller_context_t *ct) 3999 { 4000 znode_t *zp; 4001 zfsvfs_t *zfsvfs; 4002 4003 switch (cmd) { 4004 case _PC_LINK_MAX: 4005 *valp = MIN(LONG_MAX, ZFS_LINK_MAX); 4006 return (0); 4007 4008 case _PC_FILESIZEBITS: 4009 *valp = 64; 4010 return (0); 4011 case _PC_MIN_HOLE_SIZE: 4012 *valp = (int)SPA_MINBLOCKSIZE; 4013 return (0); 4014 case _PC_ACL_EXTENDED: 4015 #if 0 /* POSIX ACLs are not implemented for ZFS on FreeBSD yet. */ 4016 zp = VTOZ(vp); 4017 zfsvfs = zp->z_zfsvfs; 4018 ZFS_ENTER(zfsvfs); 4019 ZFS_VERIFY_ZP(zp); 4020 *valp = zfsvfs->z_acl_type == ZFSACLTYPE_POSIX ? 1 : 0; 4021 ZFS_EXIT(zfsvfs); 4022 #else 4023 *valp = 0; 4024 #endif 4025 return (0); 4026 4027 case _PC_ACL_NFS4: 4028 zp = VTOZ(vp); 4029 zfsvfs = zp->z_zfsvfs; 4030 ZFS_ENTER(zfsvfs); 4031 ZFS_VERIFY_ZP(zp); 4032 *valp = zfsvfs->z_acl_type == ZFS_ACLTYPE_NFSV4 ? 1 : 0; 4033 ZFS_EXIT(zfsvfs); 4034 return (0); 4035 4036 case _PC_ACL_PATH_MAX: 4037 *valp = ACL_MAX_ENTRIES; 4038 return (0); 4039 4040 default: 4041 return (EOPNOTSUPP); 4042 } 4043 } 4044 4045 static int 4046 zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, 4047 int *rahead) 4048 { 4049 znode_t *zp = VTOZ(vp); 4050 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4051 zfs_locked_range_t *lr; 4052 vm_object_t object; 4053 off_t start, end, obj_size; 4054 uint_t blksz; 4055 int pgsin_b, pgsin_a; 4056 int error; 4057 4058 ZFS_ENTER(zfsvfs); 4059 ZFS_VERIFY_ZP(zp); 4060 4061 start = IDX_TO_OFF(ma[0]->pindex); 4062 end = IDX_TO_OFF(ma[count - 1]->pindex + 1); 4063 4064 /* 4065 * Lock a range covering all required and optional pages. 4066 * Note that we need to handle the case of the block size growing. 4067 */ 4068 for (;;) { 4069 blksz = zp->z_blksz; 4070 lr = zfs_rangelock_tryenter(&zp->z_rangelock, 4071 rounddown(start, blksz), 4072 roundup(end, blksz) - rounddown(start, blksz), RL_READER); 4073 if (lr == NULL) { 4074 if (rahead != NULL) { 4075 *rahead = 0; 4076 rahead = NULL; 4077 } 4078 if (rbehind != NULL) { 4079 *rbehind = 0; 4080 rbehind = NULL; 4081 } 4082 break; 4083 } 4084 if (blksz == zp->z_blksz) 4085 break; 4086 zfs_rangelock_exit(lr); 4087 } 4088 4089 object = ma[0]->object; 4090 zfs_vmobject_wlock(object); 4091 obj_size = object->un_pager.vnp.vnp_size; 4092 zfs_vmobject_wunlock(object); 4093 if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) { 4094 if (lr != NULL) 4095 zfs_rangelock_exit(lr); 4096 ZFS_EXIT(zfsvfs); 4097 return (zfs_vm_pagerret_bad); 4098 } 4099 4100 pgsin_b = 0; 4101 if (rbehind != NULL) { 4102 pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz)); 4103 pgsin_b = MIN(*rbehind, pgsin_b); 4104 } 4105 4106 pgsin_a = 0; 4107 if (rahead != NULL) { 4108 pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end); 4109 if (end + IDX_TO_OFF(pgsin_a) >= obj_size) 4110 pgsin_a = OFF_TO_IDX(round_page(obj_size) - end); 4111 pgsin_a = MIN(*rahead, pgsin_a); 4112 } 4113 4114 /* 4115 * NB: we need to pass the exact byte size of the data that we expect 4116 * to read after accounting for the file size. This is required because 4117 * ZFS will panic if we request DMU to read beyond the end of the last 4118 * allocated block. 4119 */ 4120 error = dmu_read_pages(zfsvfs->z_os, zp->z_id, ma, count, &pgsin_b, 4121 &pgsin_a, MIN(end, obj_size) - (end - PAGE_SIZE)); 4122 4123 if (lr != NULL) 4124 zfs_rangelock_exit(lr); 4125 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4126 ZFS_EXIT(zfsvfs); 4127 4128 if (error != 0) 4129 return (zfs_vm_pagerret_error); 4130 4131 VM_CNT_INC(v_vnodein); 4132 VM_CNT_ADD(v_vnodepgsin, count + pgsin_b + pgsin_a); 4133 if (rbehind != NULL) 4134 *rbehind = pgsin_b; 4135 if (rahead != NULL) 4136 *rahead = pgsin_a; 4137 return (zfs_vm_pagerret_ok); 4138 } 4139 4140 #ifndef _SYS_SYSPROTO_H_ 4141 struct vop_getpages_args { 4142 struct vnode *a_vp; 4143 vm_page_t *a_m; 4144 int a_count; 4145 int *a_rbehind; 4146 int *a_rahead; 4147 }; 4148 #endif 4149 4150 static int 4151 zfs_freebsd_getpages(struct vop_getpages_args *ap) 4152 { 4153 4154 return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, 4155 ap->a_rahead)); 4156 } 4157 4158 static int 4159 zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, 4160 int *rtvals) 4161 { 4162 znode_t *zp = VTOZ(vp); 4163 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4164 zfs_locked_range_t *lr; 4165 dmu_tx_t *tx; 4166 struct sf_buf *sf; 4167 vm_object_t object; 4168 vm_page_t m; 4169 caddr_t va; 4170 size_t tocopy; 4171 size_t lo_len; 4172 vm_ooffset_t lo_off; 4173 vm_ooffset_t off; 4174 uint_t blksz; 4175 int ncount; 4176 int pcount; 4177 int err; 4178 int i; 4179 4180 ZFS_ENTER(zfsvfs); 4181 ZFS_VERIFY_ZP(zp); 4182 4183 object = vp->v_object; 4184 pcount = btoc(len); 4185 ncount = pcount; 4186 4187 KASSERT(ma[0]->object == object, ("mismatching object")); 4188 KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length")); 4189 4190 for (i = 0; i < pcount; i++) 4191 rtvals[i] = zfs_vm_pagerret_error; 4192 4193 off = IDX_TO_OFF(ma[0]->pindex); 4194 blksz = zp->z_blksz; 4195 lo_off = rounddown(off, blksz); 4196 lo_len = roundup(len + (off - lo_off), blksz); 4197 lr = zfs_rangelock_enter(&zp->z_rangelock, lo_off, lo_len, RL_WRITER); 4198 4199 zfs_vmobject_wlock(object); 4200 if (len + off > object->un_pager.vnp.vnp_size) { 4201 if (object->un_pager.vnp.vnp_size > off) { 4202 int pgoff; 4203 4204 len = object->un_pager.vnp.vnp_size - off; 4205 ncount = btoc(len); 4206 if ((pgoff = (int)len & PAGE_MASK) != 0) { 4207 /* 4208 * If the object is locked and the following 4209 * conditions hold, then the page's dirty 4210 * field cannot be concurrently changed by a 4211 * pmap operation. 4212 */ 4213 m = ma[ncount - 1]; 4214 vm_page_assert_sbusied(m); 4215 KASSERT(!pmap_page_is_write_mapped(m), 4216 ("zfs_putpages: page %p is not read-only", 4217 m)); 4218 vm_page_clear_dirty(m, pgoff, PAGE_SIZE - 4219 pgoff); 4220 } 4221 } else { 4222 len = 0; 4223 ncount = 0; 4224 } 4225 if (ncount < pcount) { 4226 for (i = ncount; i < pcount; i++) { 4227 rtvals[i] = zfs_vm_pagerret_bad; 4228 } 4229 } 4230 } 4231 zfs_vmobject_wunlock(object); 4232 4233 if (ncount == 0) 4234 goto out; 4235 4236 if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, zp->z_uid) || 4237 zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, zp->z_gid) || 4238 (zp->z_projid != ZFS_DEFAULT_PROJID && 4239 zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT, 4240 zp->z_projid))) { 4241 goto out; 4242 } 4243 4244 tx = dmu_tx_create(zfsvfs->z_os); 4245 dmu_tx_hold_write(tx, zp->z_id, off, len); 4246 4247 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4248 zfs_sa_upgrade_txholds(tx, zp); 4249 err = dmu_tx_assign(tx, TXG_WAIT); 4250 if (err != 0) { 4251 dmu_tx_abort(tx); 4252 goto out; 4253 } 4254 4255 if (zp->z_blksz < PAGE_SIZE) { 4256 for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) { 4257 tocopy = len > PAGE_SIZE ? PAGE_SIZE : len; 4258 va = zfs_map_page(ma[i], &sf); 4259 dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx); 4260 zfs_unmap_page(sf); 4261 } 4262 } else { 4263 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx); 4264 } 4265 4266 if (err == 0) { 4267 uint64_t mtime[2], ctime[2]; 4268 sa_bulk_attr_t bulk[3]; 4269 int count = 0; 4270 4271 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 4272 &mtime, 16); 4273 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 4274 &ctime, 16); 4275 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4276 &zp->z_pflags, 8); 4277 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); 4278 err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 4279 ASSERT0(err); 4280 /* 4281 * XXX we should be passing a callback to undirty 4282 * but that would make the locking messier 4283 */ 4284 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, 4285 len, 0, NULL, NULL); 4286 4287 zfs_vmobject_wlock(object); 4288 for (i = 0; i < ncount; i++) { 4289 rtvals[i] = zfs_vm_pagerret_ok; 4290 vm_page_undirty(ma[i]); 4291 } 4292 zfs_vmobject_wunlock(object); 4293 VM_CNT_INC(v_vnodeout); 4294 VM_CNT_ADD(v_vnodepgsout, ncount); 4295 } 4296 dmu_tx_commit(tx); 4297 4298 out: 4299 zfs_rangelock_exit(lr); 4300 if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 || 4301 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4302 zil_commit(zfsvfs->z_log, zp->z_id); 4303 ZFS_EXIT(zfsvfs); 4304 return (rtvals[0]); 4305 } 4306 4307 #ifndef _SYS_SYSPROTO_H_ 4308 struct vop_putpages_args { 4309 struct vnode *a_vp; 4310 vm_page_t *a_m; 4311 int a_count; 4312 int a_sync; 4313 int *a_rtvals; 4314 }; 4315 #endif 4316 4317 static int 4318 zfs_freebsd_putpages(struct vop_putpages_args *ap) 4319 { 4320 4321 return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, 4322 ap->a_rtvals)); 4323 } 4324 4325 #ifndef _SYS_SYSPROTO_H_ 4326 struct vop_bmap_args { 4327 struct vnode *a_vp; 4328 daddr_t a_bn; 4329 struct bufobj **a_bop; 4330 daddr_t *a_bnp; 4331 int *a_runp; 4332 int *a_runb; 4333 }; 4334 #endif 4335 4336 static int 4337 zfs_freebsd_bmap(struct vop_bmap_args *ap) 4338 { 4339 4340 if (ap->a_bop != NULL) 4341 *ap->a_bop = &ap->a_vp->v_bufobj; 4342 if (ap->a_bnp != NULL) 4343 *ap->a_bnp = ap->a_bn; 4344 if (ap->a_runp != NULL) 4345 *ap->a_runp = 0; 4346 if (ap->a_runb != NULL) 4347 *ap->a_runb = 0; 4348 4349 return (0); 4350 } 4351 4352 #ifndef _SYS_SYSPROTO_H_ 4353 struct vop_open_args { 4354 struct vnode *a_vp; 4355 int a_mode; 4356 struct ucred *a_cred; 4357 struct thread *a_td; 4358 }; 4359 #endif 4360 4361 static int 4362 zfs_freebsd_open(struct vop_open_args *ap) 4363 { 4364 vnode_t *vp = ap->a_vp; 4365 znode_t *zp = VTOZ(vp); 4366 int error; 4367 4368 error = zfs_open(&vp, ap->a_mode, ap->a_cred); 4369 if (error == 0) 4370 vnode_create_vobject(vp, zp->z_size, ap->a_td); 4371 return (error); 4372 } 4373 4374 #ifndef _SYS_SYSPROTO_H_ 4375 struct vop_close_args { 4376 struct vnode *a_vp; 4377 int a_fflag; 4378 struct ucred *a_cred; 4379 struct thread *a_td; 4380 }; 4381 #endif 4382 4383 static int 4384 zfs_freebsd_close(struct vop_close_args *ap) 4385 { 4386 4387 return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred)); 4388 } 4389 4390 #ifndef _SYS_SYSPROTO_H_ 4391 struct vop_ioctl_args { 4392 struct vnode *a_vp; 4393 ulong_t a_command; 4394 caddr_t a_data; 4395 int a_fflag; 4396 struct ucred *cred; 4397 struct thread *td; 4398 }; 4399 #endif 4400 4401 static int 4402 zfs_freebsd_ioctl(struct vop_ioctl_args *ap) 4403 { 4404 4405 return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 4406 ap->a_fflag, ap->a_cred, NULL)); 4407 } 4408 4409 static int 4410 ioflags(int ioflags) 4411 { 4412 int flags = 0; 4413 4414 if (ioflags & IO_APPEND) 4415 flags |= FAPPEND; 4416 if (ioflags & IO_NDELAY) 4417 flags |= FNONBLOCK; 4418 if (ioflags & IO_SYNC) 4419 flags |= (FSYNC | FDSYNC | FRSYNC); 4420 4421 return (flags); 4422 } 4423 4424 #ifndef _SYS_SYSPROTO_H_ 4425 struct vop_read_args { 4426 struct vnode *a_vp; 4427 struct uio *a_uio; 4428 int a_ioflag; 4429 struct ucred *a_cred; 4430 }; 4431 #endif 4432 4433 static int 4434 zfs_freebsd_read(struct vop_read_args *ap) 4435 { 4436 zfs_uio_t uio; 4437 zfs_uio_init(&uio, ap->a_uio); 4438 return (zfs_read(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag), 4439 ap->a_cred)); 4440 } 4441 4442 #ifndef _SYS_SYSPROTO_H_ 4443 struct vop_write_args { 4444 struct vnode *a_vp; 4445 struct uio *a_uio; 4446 int a_ioflag; 4447 struct ucred *a_cred; 4448 }; 4449 #endif 4450 4451 static int 4452 zfs_freebsd_write(struct vop_write_args *ap) 4453 { 4454 zfs_uio_t uio; 4455 zfs_uio_init(&uio, ap->a_uio); 4456 return (zfs_write(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag), 4457 ap->a_cred)); 4458 } 4459 4460 #if __FreeBSD_version >= 1300102 4461 /* 4462 * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see 4463 * the comment above cache_fplookup for details. 4464 */ 4465 static int 4466 zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args *v) 4467 { 4468 vnode_t *vp; 4469 znode_t *zp; 4470 uint64_t pflags; 4471 4472 vp = v->a_vp; 4473 zp = VTOZ_SMR(vp); 4474 if (__predict_false(zp == NULL)) 4475 return (EAGAIN); 4476 pflags = atomic_load_64(&zp->z_pflags); 4477 if (pflags & ZFS_AV_QUARANTINED) 4478 return (EAGAIN); 4479 if (pflags & ZFS_XATTR) 4480 return (EAGAIN); 4481 if ((pflags & ZFS_NO_EXECS_DENIED) == 0) 4482 return (EAGAIN); 4483 return (0); 4484 } 4485 #endif 4486 4487 #if __FreeBSD_version >= 1300139 4488 static int 4489 zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args *v) 4490 { 4491 vnode_t *vp; 4492 znode_t *zp; 4493 char *target; 4494 4495 vp = v->a_vp; 4496 zp = VTOZ_SMR(vp); 4497 if (__predict_false(zp == NULL)) { 4498 return (EAGAIN); 4499 } 4500 4501 target = atomic_load_consume_ptr(&zp->z_cached_symlink); 4502 if (target == NULL) { 4503 return (EAGAIN); 4504 } 4505 return (cache_symlink_resolve(v->a_fpl, target, strlen(target))); 4506 } 4507 #endif 4508 4509 #ifndef _SYS_SYSPROTO_H_ 4510 struct vop_access_args { 4511 struct vnode *a_vp; 4512 accmode_t a_accmode; 4513 struct ucred *a_cred; 4514 struct thread *a_td; 4515 }; 4516 #endif 4517 4518 static int 4519 zfs_freebsd_access(struct vop_access_args *ap) 4520 { 4521 vnode_t *vp = ap->a_vp; 4522 znode_t *zp = VTOZ(vp); 4523 accmode_t accmode; 4524 int error = 0; 4525 4526 4527 if (ap->a_accmode == VEXEC) { 4528 if (zfs_fastaccesschk_execute(zp, ap->a_cred) == 0) 4529 return (0); 4530 } 4531 4532 /* 4533 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 4534 */ 4535 accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 4536 if (accmode != 0) 4537 error = zfs_access(zp, accmode, 0, ap->a_cred); 4538 4539 /* 4540 * VADMIN has to be handled by vaccess(). 4541 */ 4542 if (error == 0) { 4543 accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 4544 if (accmode != 0) { 4545 #if __FreeBSD_version >= 1300105 4546 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 4547 zp->z_gid, accmode, ap->a_cred); 4548 #else 4549 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 4550 zp->z_gid, accmode, ap->a_cred, NULL); 4551 #endif 4552 } 4553 } 4554 4555 /* 4556 * For VEXEC, ensure that at least one execute bit is set for 4557 * non-directories. 4558 */ 4559 if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 4560 (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { 4561 error = EACCES; 4562 } 4563 4564 return (error); 4565 } 4566 4567 #ifndef _SYS_SYSPROTO_H_ 4568 struct vop_lookup_args { 4569 struct vnode *a_dvp; 4570 struct vnode **a_vpp; 4571 struct componentname *a_cnp; 4572 }; 4573 #endif 4574 4575 static int 4576 zfs_freebsd_lookup(struct vop_lookup_args *ap, boolean_t cached) 4577 { 4578 struct componentname *cnp = ap->a_cnp; 4579 char nm[NAME_MAX + 1]; 4580 4581 ASSERT3U(cnp->cn_namelen, <, sizeof (nm)); 4582 strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof (nm))); 4583 4584 return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 4585 cnp->cn_cred, 0, cached)); 4586 } 4587 4588 static int 4589 zfs_freebsd_cachedlookup(struct vop_cachedlookup_args *ap) 4590 { 4591 4592 return (zfs_freebsd_lookup((struct vop_lookup_args *)ap, B_TRUE)); 4593 } 4594 4595 #ifndef _SYS_SYSPROTO_H_ 4596 struct vop_lookup_args { 4597 struct vnode *a_dvp; 4598 struct vnode **a_vpp; 4599 struct componentname *a_cnp; 4600 }; 4601 #endif 4602 4603 static int 4604 zfs_cache_lookup(struct vop_lookup_args *ap) 4605 { 4606 zfsvfs_t *zfsvfs; 4607 4608 zfsvfs = ap->a_dvp->v_mount->mnt_data; 4609 if (zfsvfs->z_use_namecache) 4610 return (vfs_cache_lookup(ap)); 4611 else 4612 return (zfs_freebsd_lookup(ap, B_FALSE)); 4613 } 4614 4615 #ifndef _SYS_SYSPROTO_H_ 4616 struct vop_create_args { 4617 struct vnode *a_dvp; 4618 struct vnode **a_vpp; 4619 struct componentname *a_cnp; 4620 struct vattr *a_vap; 4621 }; 4622 #endif 4623 4624 static int 4625 zfs_freebsd_create(struct vop_create_args *ap) 4626 { 4627 zfsvfs_t *zfsvfs; 4628 struct componentname *cnp = ap->a_cnp; 4629 vattr_t *vap = ap->a_vap; 4630 znode_t *zp = NULL; 4631 int rc, mode; 4632 4633 ASSERT(cnp->cn_flags & SAVENAME); 4634 4635 vattr_init_mask(vap); 4636 mode = vap->va_mode & ALLPERMS; 4637 zfsvfs = ap->a_dvp->v_mount->mnt_data; 4638 *ap->a_vpp = NULL; 4639 4640 rc = zfs_create(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, !EXCL, mode, 4641 &zp, cnp->cn_cred, 0 /* flag */, NULL /* vsecattr */); 4642 if (rc == 0) 4643 *ap->a_vpp = ZTOV(zp); 4644 if (zfsvfs->z_use_namecache && 4645 rc == 0 && (cnp->cn_flags & MAKEENTRY) != 0) 4646 cache_enter(ap->a_dvp, *ap->a_vpp, cnp); 4647 4648 return (rc); 4649 } 4650 4651 #ifndef _SYS_SYSPROTO_H_ 4652 struct vop_remove_args { 4653 struct vnode *a_dvp; 4654 struct vnode *a_vp; 4655 struct componentname *a_cnp; 4656 }; 4657 #endif 4658 4659 static int 4660 zfs_freebsd_remove(struct vop_remove_args *ap) 4661 { 4662 4663 ASSERT(ap->a_cnp->cn_flags & SAVENAME); 4664 4665 return (zfs_remove_(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr, 4666 ap->a_cnp->cn_cred)); 4667 } 4668 4669 #ifndef _SYS_SYSPROTO_H_ 4670 struct vop_mkdir_args { 4671 struct vnode *a_dvp; 4672 struct vnode **a_vpp; 4673 struct componentname *a_cnp; 4674 struct vattr *a_vap; 4675 }; 4676 #endif 4677 4678 static int 4679 zfs_freebsd_mkdir(struct vop_mkdir_args *ap) 4680 { 4681 vattr_t *vap = ap->a_vap; 4682 znode_t *zp = NULL; 4683 int rc; 4684 4685 ASSERT(ap->a_cnp->cn_flags & SAVENAME); 4686 4687 vattr_init_mask(vap); 4688 *ap->a_vpp = NULL; 4689 4690 rc = zfs_mkdir(VTOZ(ap->a_dvp), ap->a_cnp->cn_nameptr, vap, &zp, 4691 ap->a_cnp->cn_cred, 0, NULL); 4692 4693 if (rc == 0) 4694 *ap->a_vpp = ZTOV(zp); 4695 return (rc); 4696 } 4697 4698 #ifndef _SYS_SYSPROTO_H_ 4699 struct vop_rmdir_args { 4700 struct vnode *a_dvp; 4701 struct vnode *a_vp; 4702 struct componentname *a_cnp; 4703 }; 4704 #endif 4705 4706 static int 4707 zfs_freebsd_rmdir(struct vop_rmdir_args *ap) 4708 { 4709 struct componentname *cnp = ap->a_cnp; 4710 4711 ASSERT(cnp->cn_flags & SAVENAME); 4712 4713 return (zfs_rmdir_(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred)); 4714 } 4715 4716 #ifndef _SYS_SYSPROTO_H_ 4717 struct vop_readdir_args { 4718 struct vnode *a_vp; 4719 struct uio *a_uio; 4720 struct ucred *a_cred; 4721 int *a_eofflag; 4722 int *a_ncookies; 4723 ulong_t **a_cookies; 4724 }; 4725 #endif 4726 4727 static int 4728 zfs_freebsd_readdir(struct vop_readdir_args *ap) 4729 { 4730 zfs_uio_t uio; 4731 zfs_uio_init(&uio, ap->a_uio); 4732 return (zfs_readdir(ap->a_vp, &uio, ap->a_cred, ap->a_eofflag, 4733 ap->a_ncookies, ap->a_cookies)); 4734 } 4735 4736 #ifndef _SYS_SYSPROTO_H_ 4737 struct vop_fsync_args { 4738 struct vnode *a_vp; 4739 int a_waitfor; 4740 struct thread *a_td; 4741 }; 4742 #endif 4743 4744 static int 4745 zfs_freebsd_fsync(struct vop_fsync_args *ap) 4746 { 4747 4748 vop_stdfsync(ap); 4749 return (zfs_fsync(VTOZ(ap->a_vp), 0, ap->a_td->td_ucred)); 4750 } 4751 4752 #ifndef _SYS_SYSPROTO_H_ 4753 struct vop_getattr_args { 4754 struct vnode *a_vp; 4755 struct vattr *a_vap; 4756 struct ucred *a_cred; 4757 }; 4758 #endif 4759 4760 static int 4761 zfs_freebsd_getattr(struct vop_getattr_args *ap) 4762 { 4763 vattr_t *vap = ap->a_vap; 4764 xvattr_t xvap; 4765 ulong_t fflags = 0; 4766 int error; 4767 4768 xva_init(&xvap); 4769 xvap.xva_vattr = *vap; 4770 xvap.xva_vattr.va_mask |= AT_XVATTR; 4771 4772 /* Convert chflags into ZFS-type flags. */ 4773 /* XXX: what about SF_SETTABLE?. */ 4774 XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 4775 XVA_SET_REQ(&xvap, XAT_APPENDONLY); 4776 XVA_SET_REQ(&xvap, XAT_NOUNLINK); 4777 XVA_SET_REQ(&xvap, XAT_NODUMP); 4778 XVA_SET_REQ(&xvap, XAT_READONLY); 4779 XVA_SET_REQ(&xvap, XAT_ARCHIVE); 4780 XVA_SET_REQ(&xvap, XAT_SYSTEM); 4781 XVA_SET_REQ(&xvap, XAT_HIDDEN); 4782 XVA_SET_REQ(&xvap, XAT_REPARSE); 4783 XVA_SET_REQ(&xvap, XAT_OFFLINE); 4784 XVA_SET_REQ(&xvap, XAT_SPARSE); 4785 4786 error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred); 4787 if (error != 0) 4788 return (error); 4789 4790 /* Convert ZFS xattr into chflags. */ 4791 #define FLAG_CHECK(fflag, xflag, xfield) do { \ 4792 if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 4793 fflags |= (fflag); \ 4794 } while (0) 4795 FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 4796 xvap.xva_xoptattrs.xoa_immutable); 4797 FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 4798 xvap.xva_xoptattrs.xoa_appendonly); 4799 FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 4800 xvap.xva_xoptattrs.xoa_nounlink); 4801 FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE, 4802 xvap.xva_xoptattrs.xoa_archive); 4803 FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 4804 xvap.xva_xoptattrs.xoa_nodump); 4805 FLAG_CHECK(UF_READONLY, XAT_READONLY, 4806 xvap.xva_xoptattrs.xoa_readonly); 4807 FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM, 4808 xvap.xva_xoptattrs.xoa_system); 4809 FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN, 4810 xvap.xva_xoptattrs.xoa_hidden); 4811 FLAG_CHECK(UF_REPARSE, XAT_REPARSE, 4812 xvap.xva_xoptattrs.xoa_reparse); 4813 FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE, 4814 xvap.xva_xoptattrs.xoa_offline); 4815 FLAG_CHECK(UF_SPARSE, XAT_SPARSE, 4816 xvap.xva_xoptattrs.xoa_sparse); 4817 4818 #undef FLAG_CHECK 4819 *vap = xvap.xva_vattr; 4820 vap->va_flags = fflags; 4821 return (0); 4822 } 4823 4824 #ifndef _SYS_SYSPROTO_H_ 4825 struct vop_setattr_args { 4826 struct vnode *a_vp; 4827 struct vattr *a_vap; 4828 struct ucred *a_cred; 4829 }; 4830 #endif 4831 4832 static int 4833 zfs_freebsd_setattr(struct vop_setattr_args *ap) 4834 { 4835 vnode_t *vp = ap->a_vp; 4836 vattr_t *vap = ap->a_vap; 4837 cred_t *cred = ap->a_cred; 4838 xvattr_t xvap; 4839 ulong_t fflags; 4840 uint64_t zflags; 4841 4842 vattr_init_mask(vap); 4843 vap->va_mask &= ~AT_NOSET; 4844 4845 xva_init(&xvap); 4846 xvap.xva_vattr = *vap; 4847 4848 zflags = VTOZ(vp)->z_pflags; 4849 4850 if (vap->va_flags != VNOVAL) { 4851 zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 4852 int error; 4853 4854 if (zfsvfs->z_use_fuids == B_FALSE) 4855 return (EOPNOTSUPP); 4856 4857 fflags = vap->va_flags; 4858 /* 4859 * XXX KDM 4860 * We need to figure out whether it makes sense to allow 4861 * UF_REPARSE through, since we don't really have other 4862 * facilities to handle reparse points and zfs_setattr() 4863 * doesn't currently allow setting that attribute anyway. 4864 */ 4865 if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE| 4866 UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE| 4867 UF_OFFLINE|UF_SPARSE)) != 0) 4868 return (EOPNOTSUPP); 4869 /* 4870 * Unprivileged processes are not permitted to unset system 4871 * flags, or modify flags if any system flags are set. 4872 * Privileged non-jail processes may not modify system flags 4873 * if securelevel > 0 and any existing system flags are set. 4874 * Privileged jail processes behave like privileged non-jail 4875 * processes if the PR_ALLOW_CHFLAGS permission bit is set; 4876 * otherwise, they behave like unprivileged processes. 4877 */ 4878 if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 4879 spl_priv_check_cred(cred, PRIV_VFS_SYSFLAGS) == 0) { 4880 if (zflags & 4881 (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 4882 error = securelevel_gt(cred, 0); 4883 if (error != 0) 4884 return (error); 4885 } 4886 } else { 4887 /* 4888 * Callers may only modify the file flags on 4889 * objects they have VADMIN rights for. 4890 */ 4891 if ((error = VOP_ACCESS(vp, VADMIN, cred, 4892 curthread)) != 0) 4893 return (error); 4894 if (zflags & 4895 (ZFS_IMMUTABLE | ZFS_APPENDONLY | 4896 ZFS_NOUNLINK)) { 4897 return (EPERM); 4898 } 4899 if (fflags & 4900 (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 4901 return (EPERM); 4902 } 4903 } 4904 4905 #define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 4906 if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 4907 ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 4908 XVA_SET_REQ(&xvap, (xflag)); \ 4909 (xfield) = ((fflags & (fflag)) != 0); \ 4910 } \ 4911 } while (0) 4912 /* Convert chflags into ZFS-type flags. */ 4913 /* XXX: what about SF_SETTABLE?. */ 4914 FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 4915 xvap.xva_xoptattrs.xoa_immutable); 4916 FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 4917 xvap.xva_xoptattrs.xoa_appendonly); 4918 FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 4919 xvap.xva_xoptattrs.xoa_nounlink); 4920 FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE, 4921 xvap.xva_xoptattrs.xoa_archive); 4922 FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 4923 xvap.xva_xoptattrs.xoa_nodump); 4924 FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY, 4925 xvap.xva_xoptattrs.xoa_readonly); 4926 FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM, 4927 xvap.xva_xoptattrs.xoa_system); 4928 FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN, 4929 xvap.xva_xoptattrs.xoa_hidden); 4930 FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE, 4931 xvap.xva_xoptattrs.xoa_reparse); 4932 FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE, 4933 xvap.xva_xoptattrs.xoa_offline); 4934 FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE, 4935 xvap.xva_xoptattrs.xoa_sparse); 4936 #undef FLAG_CHANGE 4937 } 4938 if (vap->va_birthtime.tv_sec != VNOVAL) { 4939 xvap.xva_vattr.va_mask |= AT_XVATTR; 4940 XVA_SET_REQ(&xvap, XAT_CREATETIME); 4941 } 4942 return (zfs_setattr(VTOZ(vp), (vattr_t *)&xvap, 0, cred)); 4943 } 4944 4945 #ifndef _SYS_SYSPROTO_H_ 4946 struct vop_rename_args { 4947 struct vnode *a_fdvp; 4948 struct vnode *a_fvp; 4949 struct componentname *a_fcnp; 4950 struct vnode *a_tdvp; 4951 struct vnode *a_tvp; 4952 struct componentname *a_tcnp; 4953 }; 4954 #endif 4955 4956 static int 4957 zfs_freebsd_rename(struct vop_rename_args *ap) 4958 { 4959 vnode_t *fdvp = ap->a_fdvp; 4960 vnode_t *fvp = ap->a_fvp; 4961 vnode_t *tdvp = ap->a_tdvp; 4962 vnode_t *tvp = ap->a_tvp; 4963 int error; 4964 4965 ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 4966 ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 4967 4968 error = zfs_rename_(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp, 4969 ap->a_tcnp, ap->a_fcnp->cn_cred, 1); 4970 4971 vrele(fdvp); 4972 vrele(fvp); 4973 vrele(tdvp); 4974 if (tvp != NULL) 4975 vrele(tvp); 4976 4977 return (error); 4978 } 4979 4980 #ifndef _SYS_SYSPROTO_H_ 4981 struct vop_symlink_args { 4982 struct vnode *a_dvp; 4983 struct vnode **a_vpp; 4984 struct componentname *a_cnp; 4985 struct vattr *a_vap; 4986 char *a_target; 4987 }; 4988 #endif 4989 4990 static int 4991 zfs_freebsd_symlink(struct vop_symlink_args *ap) 4992 { 4993 struct componentname *cnp = ap->a_cnp; 4994 vattr_t *vap = ap->a_vap; 4995 znode_t *zp = NULL; 4996 #if __FreeBSD_version >= 1300139 4997 char *symlink; 4998 size_t symlink_len; 4999 #endif 5000 int rc; 5001 5002 ASSERT(cnp->cn_flags & SAVENAME); 5003 5004 vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 5005 vattr_init_mask(vap); 5006 *ap->a_vpp = NULL; 5007 5008 rc = zfs_symlink(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, 5009 ap->a_target, &zp, cnp->cn_cred, 0 /* flags */); 5010 if (rc == 0) { 5011 *ap->a_vpp = ZTOV(zp); 5012 ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); 5013 #if __FreeBSD_version >= 1300139 5014 MPASS(zp->z_cached_symlink == NULL); 5015 symlink_len = strlen(ap->a_target); 5016 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK); 5017 if (symlink != NULL) { 5018 memcpy(symlink, ap->a_target, symlink_len); 5019 symlink[symlink_len] = '\0'; 5020 atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink, 5021 (uintptr_t)symlink); 5022 } 5023 #endif 5024 } 5025 return (rc); 5026 } 5027 5028 #ifndef _SYS_SYSPROTO_H_ 5029 struct vop_readlink_args { 5030 struct vnode *a_vp; 5031 struct uio *a_uio; 5032 struct ucred *a_cred; 5033 }; 5034 #endif 5035 5036 static int 5037 zfs_freebsd_readlink(struct vop_readlink_args *ap) 5038 { 5039 zfs_uio_t uio; 5040 int error; 5041 #if __FreeBSD_version >= 1300139 5042 znode_t *zp = VTOZ(ap->a_vp); 5043 char *symlink, *base; 5044 size_t symlink_len; 5045 bool trycache; 5046 #endif 5047 5048 zfs_uio_init(&uio, ap->a_uio); 5049 #if __FreeBSD_version >= 1300139 5050 trycache = false; 5051 if (zfs_uio_segflg(&uio) == UIO_SYSSPACE && 5052 zfs_uio_iovcnt(&uio) == 1) { 5053 base = zfs_uio_iovbase(&uio, 0); 5054 symlink_len = zfs_uio_iovlen(&uio, 0); 5055 trycache = true; 5056 } 5057 #endif 5058 error = zfs_readlink(ap->a_vp, &uio, ap->a_cred, NULL); 5059 #if __FreeBSD_version >= 1300139 5060 if (atomic_load_ptr(&zp->z_cached_symlink) != NULL || 5061 error != 0 || !trycache) { 5062 return (error); 5063 } 5064 symlink_len -= zfs_uio_resid(&uio); 5065 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK); 5066 if (symlink != NULL) { 5067 memcpy(symlink, base, symlink_len); 5068 symlink[symlink_len] = '\0'; 5069 if (!atomic_cmpset_rel_ptr((uintptr_t *)&zp->z_cached_symlink, 5070 (uintptr_t)NULL, (uintptr_t)symlink)) { 5071 cache_symlink_free(symlink, symlink_len + 1); 5072 } 5073 } 5074 #endif 5075 return (error); 5076 } 5077 5078 #ifndef _SYS_SYSPROTO_H_ 5079 struct vop_link_args { 5080 struct vnode *a_tdvp; 5081 struct vnode *a_vp; 5082 struct componentname *a_cnp; 5083 }; 5084 #endif 5085 5086 static int 5087 zfs_freebsd_link(struct vop_link_args *ap) 5088 { 5089 struct componentname *cnp = ap->a_cnp; 5090 vnode_t *vp = ap->a_vp; 5091 vnode_t *tdvp = ap->a_tdvp; 5092 5093 if (tdvp->v_mount != vp->v_mount) 5094 return (EXDEV); 5095 5096 ASSERT(cnp->cn_flags & SAVENAME); 5097 5098 return (zfs_link(VTOZ(tdvp), VTOZ(vp), 5099 cnp->cn_nameptr, cnp->cn_cred, 0)); 5100 } 5101 5102 #ifndef _SYS_SYSPROTO_H_ 5103 struct vop_inactive_args { 5104 struct vnode *a_vp; 5105 struct thread *a_td; 5106 }; 5107 #endif 5108 5109 static int 5110 zfs_freebsd_inactive(struct vop_inactive_args *ap) 5111 { 5112 vnode_t *vp = ap->a_vp; 5113 5114 #if __FreeBSD_version >= 1300123 5115 zfs_inactive(vp, curthread->td_ucred, NULL); 5116 #else 5117 zfs_inactive(vp, ap->a_td->td_ucred, NULL); 5118 #endif 5119 return (0); 5120 } 5121 5122 #if __FreeBSD_version >= 1300042 5123 #ifndef _SYS_SYSPROTO_H_ 5124 struct vop_need_inactive_args { 5125 struct vnode *a_vp; 5126 struct thread *a_td; 5127 }; 5128 #endif 5129 5130 static int 5131 zfs_freebsd_need_inactive(struct vop_need_inactive_args *ap) 5132 { 5133 vnode_t *vp = ap->a_vp; 5134 znode_t *zp = VTOZ(vp); 5135 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5136 int need; 5137 5138 if (vn_need_pageq_flush(vp)) 5139 return (1); 5140 5141 if (!ZFS_TEARDOWN_INACTIVE_TRY_ENTER_READ(zfsvfs)) 5142 return (1); 5143 need = (zp->z_sa_hdl == NULL || zp->z_unlinked || zp->z_atime_dirty); 5144 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 5145 5146 return (need); 5147 } 5148 #endif 5149 5150 #ifndef _SYS_SYSPROTO_H_ 5151 struct vop_reclaim_args { 5152 struct vnode *a_vp; 5153 struct thread *a_td; 5154 }; 5155 #endif 5156 5157 static int 5158 zfs_freebsd_reclaim(struct vop_reclaim_args *ap) 5159 { 5160 vnode_t *vp = ap->a_vp; 5161 znode_t *zp = VTOZ(vp); 5162 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5163 5164 ASSERT3P(zp, !=, NULL); 5165 5166 #if __FreeBSD_version < 1300042 5167 /* Destroy the vm object and flush associated pages. */ 5168 vnode_destroy_vobject(vp); 5169 #endif 5170 /* 5171 * z_teardown_inactive_lock protects from a race with 5172 * zfs_znode_dmu_fini in zfsvfs_teardown during 5173 * force unmount. 5174 */ 5175 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs); 5176 if (zp->z_sa_hdl == NULL) 5177 zfs_znode_free(zp); 5178 else 5179 zfs_zinactive(zp); 5180 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 5181 5182 vp->v_data = NULL; 5183 return (0); 5184 } 5185 5186 #ifndef _SYS_SYSPROTO_H_ 5187 struct vop_fid_args { 5188 struct vnode *a_vp; 5189 struct fid *a_fid; 5190 }; 5191 #endif 5192 5193 static int 5194 zfs_freebsd_fid(struct vop_fid_args *ap) 5195 { 5196 5197 return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 5198 } 5199 5200 5201 #ifndef _SYS_SYSPROTO_H_ 5202 struct vop_pathconf_args { 5203 struct vnode *a_vp; 5204 int a_name; 5205 register_t *a_retval; 5206 } *ap; 5207 #endif 5208 5209 static int 5210 zfs_freebsd_pathconf(struct vop_pathconf_args *ap) 5211 { 5212 ulong_t val; 5213 int error; 5214 5215 error = zfs_pathconf(ap->a_vp, ap->a_name, &val, 5216 curthread->td_ucred, NULL); 5217 if (error == 0) { 5218 *ap->a_retval = val; 5219 return (error); 5220 } 5221 if (error != EOPNOTSUPP) 5222 return (error); 5223 5224 switch (ap->a_name) { 5225 case _PC_NAME_MAX: 5226 *ap->a_retval = NAME_MAX; 5227 return (0); 5228 #if __FreeBSD_version >= 1400032 5229 case _PC_DEALLOC_PRESENT: 5230 *ap->a_retval = 1; 5231 return (0); 5232 #endif 5233 case _PC_PIPE_BUF: 5234 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) { 5235 *ap->a_retval = PIPE_BUF; 5236 return (0); 5237 } 5238 return (EINVAL); 5239 default: 5240 return (vop_stdpathconf(ap)); 5241 } 5242 } 5243 5244 /* 5245 * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 5246 * extended attribute name: 5247 * 5248 * NAMESPACE PREFIX 5249 * system freebsd:system: 5250 * user (none, can be used to access ZFS fsattr(5) attributes 5251 * created on Solaris) 5252 */ 5253 static int 5254 zfs_create_attrname(int attrnamespace, const char *name, char *attrname, 5255 size_t size) 5256 { 5257 const char *namespace, *prefix, *suffix; 5258 5259 /* We don't allow '/' character in attribute name. */ 5260 if (strchr(name, '/') != NULL) 5261 return (SET_ERROR(EINVAL)); 5262 /* We don't allow attribute names that start with "freebsd:" string. */ 5263 if (strncmp(name, "freebsd:", 8) == 0) 5264 return (SET_ERROR(EINVAL)); 5265 5266 bzero(attrname, size); 5267 5268 switch (attrnamespace) { 5269 case EXTATTR_NAMESPACE_USER: 5270 #if 0 5271 prefix = "freebsd:"; 5272 namespace = EXTATTR_NAMESPACE_USER_STRING; 5273 suffix = ":"; 5274 #else 5275 /* 5276 * This is the default namespace by which we can access all 5277 * attributes created on Solaris. 5278 */ 5279 prefix = namespace = suffix = ""; 5280 #endif 5281 break; 5282 case EXTATTR_NAMESPACE_SYSTEM: 5283 prefix = "freebsd:"; 5284 namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 5285 suffix = ":"; 5286 break; 5287 case EXTATTR_NAMESPACE_EMPTY: 5288 default: 5289 return (SET_ERROR(EINVAL)); 5290 } 5291 if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 5292 name) >= size) { 5293 return (SET_ERROR(ENAMETOOLONG)); 5294 } 5295 return (0); 5296 } 5297 5298 static int 5299 zfs_ensure_xattr_cached(znode_t *zp) 5300 { 5301 int error = 0; 5302 5303 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); 5304 5305 if (zp->z_xattr_cached != NULL) 5306 return (0); 5307 5308 if (rw_write_held(&zp->z_xattr_lock)) 5309 return (zfs_sa_get_xattr(zp)); 5310 5311 if (!rw_tryupgrade(&zp->z_xattr_lock)) { 5312 rw_exit(&zp->z_xattr_lock); 5313 rw_enter(&zp->z_xattr_lock, RW_WRITER); 5314 } 5315 if (zp->z_xattr_cached == NULL) 5316 error = zfs_sa_get_xattr(zp); 5317 rw_downgrade(&zp->z_xattr_lock); 5318 return (error); 5319 } 5320 5321 #ifndef _SYS_SYSPROTO_H_ 5322 struct vop_getextattr { 5323 IN struct vnode *a_vp; 5324 IN int a_attrnamespace; 5325 IN const char *a_name; 5326 INOUT struct uio *a_uio; 5327 OUT size_t *a_size; 5328 IN struct ucred *a_cred; 5329 IN struct thread *a_td; 5330 }; 5331 #endif 5332 5333 static int 5334 zfs_getextattr_dir(struct vop_getextattr_args *ap, const char *attrname) 5335 { 5336 struct thread *td = ap->a_td; 5337 struct nameidata nd; 5338 struct vattr va; 5339 vnode_t *xvp = NULL, *vp; 5340 int error, flags; 5341 5342 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, 5343 LOOKUP_XATTR, B_FALSE); 5344 if (error != 0) 5345 return (error); 5346 5347 flags = FREAD; 5348 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp); 5349 error = vn_open_cred(&nd, &flags, 0, VN_OPEN_INVFS, ap->a_cred, NULL); 5350 vp = nd.ni_vp; 5351 NDFREE(&nd, NDF_ONLY_PNBUF); 5352 if (error != 0) 5353 return (SET_ERROR(error)); 5354 5355 if (ap->a_size != NULL) { 5356 error = VOP_GETATTR(vp, &va, ap->a_cred); 5357 if (error == 0) 5358 *ap->a_size = (size_t)va.va_size; 5359 } else if (ap->a_uio != NULL) 5360 error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred); 5361 5362 VOP_UNLOCK1(vp); 5363 vn_close(vp, flags, ap->a_cred, td); 5364 return (error); 5365 } 5366 5367 static int 5368 zfs_getextattr_sa(struct vop_getextattr_args *ap, const char *attrname) 5369 { 5370 znode_t *zp = VTOZ(ap->a_vp); 5371 uchar_t *nv_value; 5372 uint_t nv_size; 5373 int error; 5374 5375 error = zfs_ensure_xattr_cached(zp); 5376 if (error != 0) 5377 return (error); 5378 5379 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); 5380 ASSERT3P(zp->z_xattr_cached, !=, NULL); 5381 5382 error = nvlist_lookup_byte_array(zp->z_xattr_cached, attrname, 5383 &nv_value, &nv_size); 5384 if (error != 0) 5385 return (SET_ERROR(error)); 5386 5387 if (ap->a_size != NULL) 5388 *ap->a_size = nv_size; 5389 else if (ap->a_uio != NULL) 5390 error = uiomove(nv_value, nv_size, ap->a_uio); 5391 if (error != 0) 5392 return (SET_ERROR(error)); 5393 5394 return (0); 5395 } 5396 5397 /* 5398 * Vnode operation to retrieve a named extended attribute. 5399 */ 5400 static int 5401 zfs_getextattr(struct vop_getextattr_args *ap) 5402 { 5403 znode_t *zp = VTOZ(ap->a_vp); 5404 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5405 char attrname[EXTATTR_MAXNAMELEN+1]; 5406 int error; 5407 5408 /* 5409 * If the xattr property is off, refuse the request. 5410 */ 5411 if (!(zfsvfs->z_flags & ZSB_XATTR)) 5412 return (SET_ERROR(EOPNOTSUPP)); 5413 5414 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5415 ap->a_cred, ap->a_td, VREAD); 5416 if (error != 0) 5417 return (SET_ERROR(error)); 5418 5419 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5420 sizeof (attrname)); 5421 if (error != 0) 5422 return (error); 5423 5424 error = ENOENT; 5425 ZFS_ENTER(zfsvfs); 5426 ZFS_VERIFY_ZP(zp) 5427 rw_enter(&zp->z_xattr_lock, RW_READER); 5428 if (zfsvfs->z_use_sa && zp->z_is_sa) 5429 error = zfs_getextattr_sa(ap, attrname); 5430 if (error == ENOENT) 5431 error = zfs_getextattr_dir(ap, attrname); 5432 rw_exit(&zp->z_xattr_lock); 5433 ZFS_EXIT(zfsvfs); 5434 if (error == ENOENT) 5435 error = SET_ERROR(ENOATTR); 5436 return (error); 5437 } 5438 5439 #ifndef _SYS_SYSPROTO_H_ 5440 struct vop_deleteextattr { 5441 IN struct vnode *a_vp; 5442 IN int a_attrnamespace; 5443 IN const char *a_name; 5444 IN struct ucred *a_cred; 5445 IN struct thread *a_td; 5446 }; 5447 #endif 5448 5449 static int 5450 zfs_deleteextattr_dir(struct vop_deleteextattr_args *ap, const char *attrname) 5451 { 5452 struct nameidata nd; 5453 vnode_t *xvp = NULL, *vp; 5454 int error; 5455 5456 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, 5457 LOOKUP_XATTR, B_FALSE); 5458 if (error != 0) 5459 return (error); 5460 5461 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, 5462 UIO_SYSSPACE, attrname, xvp); 5463 error = namei(&nd); 5464 vp = nd.ni_vp; 5465 if (error != 0) { 5466 NDFREE(&nd, NDF_ONLY_PNBUF); 5467 return (SET_ERROR(error)); 5468 } 5469 5470 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 5471 NDFREE(&nd, NDF_ONLY_PNBUF); 5472 5473 vput(nd.ni_dvp); 5474 if (vp == nd.ni_dvp) 5475 vrele(vp); 5476 else 5477 vput(vp); 5478 5479 return (error); 5480 } 5481 5482 static int 5483 zfs_deleteextattr_sa(struct vop_deleteextattr_args *ap, const char *attrname) 5484 { 5485 znode_t *zp = VTOZ(ap->a_vp); 5486 nvlist_t *nvl; 5487 int error; 5488 5489 error = zfs_ensure_xattr_cached(zp); 5490 if (error != 0) 5491 return (error); 5492 5493 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock)); 5494 ASSERT3P(zp->z_xattr_cached, !=, NULL); 5495 5496 nvl = zp->z_xattr_cached; 5497 error = nvlist_remove(nvl, attrname, DATA_TYPE_BYTE_ARRAY); 5498 if (error != 0) 5499 error = SET_ERROR(error); 5500 else 5501 error = zfs_sa_set_xattr(zp); 5502 if (error != 0) { 5503 zp->z_xattr_cached = NULL; 5504 nvlist_free(nvl); 5505 } 5506 return (error); 5507 } 5508 5509 /* 5510 * Vnode operation to remove a named attribute. 5511 */ 5512 static int 5513 zfs_deleteextattr(struct vop_deleteextattr_args *ap) 5514 { 5515 znode_t *zp = VTOZ(ap->a_vp); 5516 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5517 char attrname[EXTATTR_MAXNAMELEN+1]; 5518 int error; 5519 5520 /* 5521 * If the xattr property is off, refuse the request. 5522 */ 5523 if (!(zfsvfs->z_flags & ZSB_XATTR)) 5524 return (SET_ERROR(EOPNOTSUPP)); 5525 5526 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5527 ap->a_cred, ap->a_td, VWRITE); 5528 if (error != 0) 5529 return (SET_ERROR(error)); 5530 5531 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5532 sizeof (attrname)); 5533 if (error != 0) 5534 return (error); 5535 5536 size_t size = 0; 5537 struct vop_getextattr_args vga = { 5538 .a_vp = ap->a_vp, 5539 .a_size = &size, 5540 .a_cred = ap->a_cred, 5541 .a_td = ap->a_td, 5542 }; 5543 error = ENOENT; 5544 ZFS_ENTER(zfsvfs); 5545 ZFS_VERIFY_ZP(zp); 5546 rw_enter(&zp->z_xattr_lock, RW_WRITER); 5547 if (zfsvfs->z_use_sa && zp->z_is_sa) { 5548 error = zfs_getextattr_sa(&vga, attrname); 5549 if (error == 0) 5550 error = zfs_deleteextattr_sa(ap, attrname); 5551 } 5552 if (error == ENOENT) { 5553 error = zfs_getextattr_dir(&vga, attrname); 5554 if (error == 0) 5555 error = zfs_deleteextattr_dir(ap, attrname); 5556 } 5557 rw_exit(&zp->z_xattr_lock); 5558 ZFS_EXIT(zfsvfs); 5559 if (error == ENOENT) 5560 error = SET_ERROR(ENOATTR); 5561 return (error); 5562 } 5563 5564 #ifndef _SYS_SYSPROTO_H_ 5565 struct vop_setextattr { 5566 IN struct vnode *a_vp; 5567 IN int a_attrnamespace; 5568 IN const char *a_name; 5569 INOUT struct uio *a_uio; 5570 IN struct ucred *a_cred; 5571 IN struct thread *a_td; 5572 }; 5573 #endif 5574 5575 static int 5576 zfs_setextattr_dir(struct vop_setextattr_args *ap, const char *attrname) 5577 { 5578 struct thread *td = ap->a_td; 5579 struct nameidata nd; 5580 struct vattr va; 5581 vnode_t *xvp = NULL, *vp; 5582 int error, flags; 5583 5584 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, 5585 LOOKUP_XATTR | CREATE_XATTR_DIR, B_FALSE); 5586 if (error != 0) 5587 return (error); 5588 5589 flags = FFLAGS(O_WRONLY | O_CREAT); 5590 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp); 5591 error = vn_open_cred(&nd, &flags, 0600, VN_OPEN_INVFS, ap->a_cred, 5592 NULL); 5593 vp = nd.ni_vp; 5594 NDFREE(&nd, NDF_ONLY_PNBUF); 5595 if (error != 0) 5596 return (SET_ERROR(error)); 5597 5598 VATTR_NULL(&va); 5599 va.va_size = 0; 5600 error = VOP_SETATTR(vp, &va, ap->a_cred); 5601 if (error == 0) 5602 VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred); 5603 5604 VOP_UNLOCK1(vp); 5605 vn_close(vp, flags, ap->a_cred, td); 5606 return (error); 5607 } 5608 5609 static int 5610 zfs_setextattr_sa(struct vop_setextattr_args *ap, const char *attrname) 5611 { 5612 znode_t *zp = VTOZ(ap->a_vp); 5613 nvlist_t *nvl; 5614 size_t sa_size; 5615 int error; 5616 5617 error = zfs_ensure_xattr_cached(zp); 5618 if (error != 0) 5619 return (error); 5620 5621 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock)); 5622 ASSERT3P(zp->z_xattr_cached, !=, NULL); 5623 5624 nvl = zp->z_xattr_cached; 5625 size_t entry_size = ap->a_uio->uio_resid; 5626 if (entry_size > DXATTR_MAX_ENTRY_SIZE) 5627 return (SET_ERROR(EFBIG)); 5628 error = nvlist_size(nvl, &sa_size, NV_ENCODE_XDR); 5629 if (error != 0) 5630 return (SET_ERROR(error)); 5631 if (sa_size > DXATTR_MAX_SA_SIZE) 5632 return (SET_ERROR(EFBIG)); 5633 uchar_t *buf = kmem_alloc(entry_size, KM_SLEEP); 5634 error = uiomove(buf, entry_size, ap->a_uio); 5635 if (error != 0) { 5636 error = SET_ERROR(error); 5637 } else { 5638 error = nvlist_add_byte_array(nvl, attrname, buf, entry_size); 5639 if (error != 0) 5640 error = SET_ERROR(error); 5641 } 5642 kmem_free(buf, entry_size); 5643 if (error == 0) 5644 error = zfs_sa_set_xattr(zp); 5645 if (error != 0) { 5646 zp->z_xattr_cached = NULL; 5647 nvlist_free(nvl); 5648 } 5649 return (error); 5650 } 5651 5652 /* 5653 * Vnode operation to set a named attribute. 5654 */ 5655 static int 5656 zfs_setextattr(struct vop_setextattr_args *ap) 5657 { 5658 znode_t *zp = VTOZ(ap->a_vp); 5659 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5660 char attrname[EXTATTR_MAXNAMELEN+1]; 5661 int error; 5662 5663 /* 5664 * If the xattr property is off, refuse the request. 5665 */ 5666 if (!(zfsvfs->z_flags & ZSB_XATTR)) 5667 return (SET_ERROR(EOPNOTSUPP)); 5668 5669 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5670 ap->a_cred, ap->a_td, VWRITE); 5671 if (error != 0) 5672 return (SET_ERROR(error)); 5673 5674 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5675 sizeof (attrname)); 5676 if (error != 0) 5677 return (error); 5678 5679 struct vop_deleteextattr_args vda = { 5680 .a_vp = ap->a_vp, 5681 .a_cred = ap->a_cred, 5682 .a_td = ap->a_td, 5683 }; 5684 error = ENOENT; 5685 ZFS_ENTER(zfsvfs); 5686 ZFS_VERIFY_ZP(zp); 5687 rw_enter(&zp->z_xattr_lock, RW_WRITER); 5688 if (zfsvfs->z_use_sa && zp->z_is_sa && zfsvfs->z_xattr_sa) { 5689 error = zfs_setextattr_sa(ap, attrname); 5690 if (error == 0) 5691 /* 5692 * Successfully put into SA, we need to clear the one 5693 * in dir if present. 5694 */ 5695 zfs_deleteextattr_dir(&vda, attrname); 5696 } 5697 if (error) { 5698 error = zfs_setextattr_dir(ap, attrname); 5699 if (error == 0 && zp->z_is_sa) 5700 /* 5701 * Successfully put into dir, we need to clear the one 5702 * in SA if present. 5703 */ 5704 zfs_deleteextattr_sa(&vda, attrname); 5705 } 5706 rw_exit(&zp->z_xattr_lock); 5707 ZFS_EXIT(zfsvfs); 5708 return (error); 5709 } 5710 5711 #ifndef _SYS_SYSPROTO_H_ 5712 struct vop_listextattr { 5713 IN struct vnode *a_vp; 5714 IN int a_attrnamespace; 5715 INOUT struct uio *a_uio; 5716 OUT size_t *a_size; 5717 IN struct ucred *a_cred; 5718 IN struct thread *a_td; 5719 }; 5720 #endif 5721 5722 static int 5723 zfs_listextattr_dir(struct vop_listextattr_args *ap, const char *attrprefix) 5724 { 5725 struct thread *td = ap->a_td; 5726 struct nameidata nd; 5727 uint8_t dirbuf[sizeof (struct dirent)]; 5728 struct iovec aiov; 5729 struct uio auio; 5730 vnode_t *xvp = NULL, *vp; 5731 int error, eof; 5732 5733 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, 5734 LOOKUP_XATTR, B_FALSE); 5735 if (error != 0) { 5736 /* 5737 * ENOATTR means that the EA directory does not yet exist, 5738 * i.e. there are no extended attributes there. 5739 */ 5740 if (error == ENOATTR) 5741 error = 0; 5742 return (error); 5743 } 5744 5745 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED, 5746 UIO_SYSSPACE, ".", xvp); 5747 error = namei(&nd); 5748 vp = nd.ni_vp; 5749 NDFREE(&nd, NDF_ONLY_PNBUF); 5750 if (error != 0) 5751 return (SET_ERROR(error)); 5752 5753 auio.uio_iov = &aiov; 5754 auio.uio_iovcnt = 1; 5755 auio.uio_segflg = UIO_SYSSPACE; 5756 auio.uio_td = td; 5757 auio.uio_rw = UIO_READ; 5758 auio.uio_offset = 0; 5759 5760 size_t plen = strlen(attrprefix); 5761 5762 do { 5763 aiov.iov_base = (void *)dirbuf; 5764 aiov.iov_len = sizeof (dirbuf); 5765 auio.uio_resid = sizeof (dirbuf); 5766 error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 5767 if (error != 0) 5768 break; 5769 int done = sizeof (dirbuf) - auio.uio_resid; 5770 for (int pos = 0; pos < done; ) { 5771 struct dirent *dp = (struct dirent *)(dirbuf + pos); 5772 pos += dp->d_reclen; 5773 /* 5774 * XXX: Temporarily we also accept DT_UNKNOWN, as this 5775 * is what we get when attribute was created on Solaris. 5776 */ 5777 if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 5778 continue; 5779 else if (plen == 0 && 5780 strncmp(dp->d_name, "freebsd:", 8) == 0) 5781 continue; 5782 else if (strncmp(dp->d_name, attrprefix, plen) != 0) 5783 continue; 5784 uint8_t nlen = dp->d_namlen - plen; 5785 if (ap->a_size != NULL) { 5786 *ap->a_size += 1 + nlen; 5787 } else if (ap->a_uio != NULL) { 5788 /* 5789 * Format of extattr name entry is one byte for 5790 * length and the rest for name. 5791 */ 5792 error = uiomove(&nlen, 1, ap->a_uio); 5793 if (error == 0) { 5794 char *namep = dp->d_name + plen; 5795 error = uiomove(namep, nlen, ap->a_uio); 5796 } 5797 if (error != 0) { 5798 error = SET_ERROR(error); 5799 break; 5800 } 5801 } 5802 } 5803 } while (!eof && error == 0); 5804 5805 vput(vp); 5806 return (error); 5807 } 5808 5809 static int 5810 zfs_listextattr_sa(struct vop_listextattr_args *ap, const char *attrprefix) 5811 { 5812 znode_t *zp = VTOZ(ap->a_vp); 5813 int error; 5814 5815 error = zfs_ensure_xattr_cached(zp); 5816 if (error != 0) 5817 return (error); 5818 5819 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); 5820 ASSERT3P(zp->z_xattr_cached, !=, NULL); 5821 5822 size_t plen = strlen(attrprefix); 5823 nvpair_t *nvp = NULL; 5824 while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) { 5825 ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY); 5826 5827 const char *name = nvpair_name(nvp); 5828 if (plen == 0 && strncmp(name, "freebsd:", 8) == 0) 5829 continue; 5830 else if (strncmp(name, attrprefix, plen) != 0) 5831 continue; 5832 uint8_t nlen = strlen(name) - plen; 5833 if (ap->a_size != NULL) { 5834 *ap->a_size += 1 + nlen; 5835 } else if (ap->a_uio != NULL) { 5836 /* 5837 * Format of extattr name entry is one byte for 5838 * length and the rest for name. 5839 */ 5840 error = uiomove(&nlen, 1, ap->a_uio); 5841 if (error == 0) { 5842 char *namep = __DECONST(char *, name) + plen; 5843 error = uiomove(namep, nlen, ap->a_uio); 5844 } 5845 if (error != 0) { 5846 error = SET_ERROR(error); 5847 break; 5848 } 5849 } 5850 } 5851 5852 return (error); 5853 } 5854 5855 /* 5856 * Vnode operation to retrieve extended attributes on a vnode. 5857 */ 5858 static int 5859 zfs_listextattr(struct vop_listextattr_args *ap) 5860 { 5861 znode_t *zp = VTOZ(ap->a_vp); 5862 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5863 char attrprefix[16]; 5864 int error; 5865 5866 if (ap->a_size != NULL) 5867 *ap->a_size = 0; 5868 5869 /* 5870 * If the xattr property is off, refuse the request. 5871 */ 5872 if (!(zfsvfs->z_flags & ZSB_XATTR)) 5873 return (SET_ERROR(EOPNOTSUPP)); 5874 5875 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5876 ap->a_cred, ap->a_td, VREAD); 5877 if (error != 0) 5878 return (SET_ERROR(error)); 5879 5880 error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 5881 sizeof (attrprefix)); 5882 if (error != 0) 5883 return (error); 5884 5885 ZFS_ENTER(zfsvfs); 5886 ZFS_VERIFY_ZP(zp); 5887 rw_enter(&zp->z_xattr_lock, RW_READER); 5888 if (zfsvfs->z_use_sa && zp->z_is_sa) 5889 error = zfs_listextattr_sa(ap, attrprefix); 5890 if (error == 0) 5891 error = zfs_listextattr_dir(ap, attrprefix); 5892 rw_exit(&zp->z_xattr_lock); 5893 ZFS_EXIT(zfsvfs); 5894 return (error); 5895 } 5896 5897 #ifndef _SYS_SYSPROTO_H_ 5898 struct vop_getacl_args { 5899 struct vnode *vp; 5900 acl_type_t type; 5901 struct acl *aclp; 5902 struct ucred *cred; 5903 struct thread *td; 5904 }; 5905 #endif 5906 5907 static int 5908 zfs_freebsd_getacl(struct vop_getacl_args *ap) 5909 { 5910 int error; 5911 vsecattr_t vsecattr; 5912 5913 if (ap->a_type != ACL_TYPE_NFS4) 5914 return (EINVAL); 5915 5916 vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 5917 if ((error = zfs_getsecattr(VTOZ(ap->a_vp), 5918 &vsecattr, 0, ap->a_cred))) 5919 return (error); 5920 5921 error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, 5922 vsecattr.vsa_aclcnt); 5923 if (vsecattr.vsa_aclentp != NULL) 5924 kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 5925 5926 return (error); 5927 } 5928 5929 #ifndef _SYS_SYSPROTO_H_ 5930 struct vop_setacl_args { 5931 struct vnode *vp; 5932 acl_type_t type; 5933 struct acl *aclp; 5934 struct ucred *cred; 5935 struct thread *td; 5936 }; 5937 #endif 5938 5939 static int 5940 zfs_freebsd_setacl(struct vop_setacl_args *ap) 5941 { 5942 int error; 5943 vsecattr_t vsecattr; 5944 int aclbsize; /* size of acl list in bytes */ 5945 aclent_t *aaclp; 5946 5947 if (ap->a_type != ACL_TYPE_NFS4) 5948 return (EINVAL); 5949 5950 if (ap->a_aclp == NULL) 5951 return (EINVAL); 5952 5953 if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 5954 return (EINVAL); 5955 5956 /* 5957 * With NFSv4 ACLs, chmod(2) may need to add additional entries, 5958 * splitting every entry into two and appending "canonical six" 5959 * entries at the end. Don't allow for setting an ACL that would 5960 * cause chmod(2) to run out of ACL entries. 5961 */ 5962 if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 5963 return (ENOSPC); 5964 5965 error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 5966 if (error != 0) 5967 return (error); 5968 5969 vsecattr.vsa_mask = VSA_ACE; 5970 aclbsize = ap->a_aclp->acl_cnt * sizeof (ace_t); 5971 vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 5972 aaclp = vsecattr.vsa_aclentp; 5973 vsecattr.vsa_aclentsz = aclbsize; 5974 5975 aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 5976 error = zfs_setsecattr(VTOZ(ap->a_vp), &vsecattr, 0, ap->a_cred); 5977 kmem_free(aaclp, aclbsize); 5978 5979 return (error); 5980 } 5981 5982 #ifndef _SYS_SYSPROTO_H_ 5983 struct vop_aclcheck_args { 5984 struct vnode *vp; 5985 acl_type_t type; 5986 struct acl *aclp; 5987 struct ucred *cred; 5988 struct thread *td; 5989 }; 5990 #endif 5991 5992 static int 5993 zfs_freebsd_aclcheck(struct vop_aclcheck_args *ap) 5994 { 5995 5996 return (EOPNOTSUPP); 5997 } 5998 5999 static int 6000 zfs_vptocnp(struct vop_vptocnp_args *ap) 6001 { 6002 vnode_t *covered_vp; 6003 vnode_t *vp = ap->a_vp; 6004 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 6005 znode_t *zp = VTOZ(vp); 6006 int ltype; 6007 int error; 6008 6009 ZFS_ENTER(zfsvfs); 6010 ZFS_VERIFY_ZP(zp); 6011 6012 /* 6013 * If we are a snapshot mounted under .zfs, run the operation 6014 * on the covered vnode. 6015 */ 6016 if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) { 6017 char name[MAXNAMLEN + 1]; 6018 znode_t *dzp; 6019 size_t len; 6020 6021 error = zfs_znode_parent_and_name(zp, &dzp, name); 6022 if (error == 0) { 6023 len = strlen(name); 6024 if (*ap->a_buflen < len) 6025 error = SET_ERROR(ENOMEM); 6026 } 6027 if (error == 0) { 6028 *ap->a_buflen -= len; 6029 bcopy(name, ap->a_buf + *ap->a_buflen, len); 6030 *ap->a_vpp = ZTOV(dzp); 6031 } 6032 ZFS_EXIT(zfsvfs); 6033 return (error); 6034 } 6035 ZFS_EXIT(zfsvfs); 6036 6037 covered_vp = vp->v_mount->mnt_vnodecovered; 6038 #if __FreeBSD_version >= 1300045 6039 enum vgetstate vs = vget_prep(covered_vp); 6040 #else 6041 vhold(covered_vp); 6042 #endif 6043 ltype = VOP_ISLOCKED(vp); 6044 VOP_UNLOCK1(vp); 6045 #if __FreeBSD_version >= 1300045 6046 error = vget_finish(covered_vp, LK_SHARED, vs); 6047 #else 6048 error = vget(covered_vp, LK_SHARED | LK_VNHELD, curthread); 6049 #endif 6050 if (error == 0) { 6051 #if __FreeBSD_version >= 1300123 6052 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_buf, 6053 ap->a_buflen); 6054 #else 6055 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred, 6056 ap->a_buf, ap->a_buflen); 6057 #endif 6058 vput(covered_vp); 6059 } 6060 vn_lock(vp, ltype | LK_RETRY); 6061 if (VN_IS_DOOMED(vp)) 6062 error = SET_ERROR(ENOENT); 6063 return (error); 6064 } 6065 6066 #if __FreeBSD_version >= 1400032 6067 static int 6068 zfs_deallocate(struct vop_deallocate_args *ap) 6069 { 6070 znode_t *zp = VTOZ(ap->a_vp); 6071 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6072 zilog_t *zilog; 6073 off_t off, len, file_sz; 6074 int error; 6075 6076 ZFS_ENTER(zfsvfs); 6077 ZFS_VERIFY_ZP(zp); 6078 6079 /* 6080 * Callers might not be able to detect properly that we are read-only, 6081 * so check it explicitly here. 6082 */ 6083 if (zfs_is_readonly(zfsvfs)) { 6084 ZFS_EXIT(zfsvfs); 6085 return (SET_ERROR(EROFS)); 6086 } 6087 6088 zilog = zfsvfs->z_log; 6089 off = *ap->a_offset; 6090 len = *ap->a_len; 6091 file_sz = zp->z_size; 6092 if (off + len > file_sz) 6093 len = file_sz - off; 6094 /* Fast path for out-of-range request. */ 6095 if (len <= 0) { 6096 *ap->a_len = 0; 6097 ZFS_EXIT(zfsvfs); 6098 return (0); 6099 } 6100 6101 error = zfs_freesp(zp, off, len, O_RDWR, TRUE); 6102 if (error == 0) { 6103 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS || 6104 (ap->a_ioflag & IO_SYNC) != 0) 6105 zil_commit(zilog, zp->z_id); 6106 *ap->a_offset = off + len; 6107 *ap->a_len = 0; 6108 } 6109 6110 ZFS_EXIT(zfsvfs); 6111 return (error); 6112 } 6113 #endif 6114 6115 struct vop_vector zfs_vnodeops; 6116 struct vop_vector zfs_fifoops; 6117 struct vop_vector zfs_shareops; 6118 6119 struct vop_vector zfs_vnodeops = { 6120 .vop_default = &default_vnodeops, 6121 .vop_inactive = zfs_freebsd_inactive, 6122 #if __FreeBSD_version >= 1300042 6123 .vop_need_inactive = zfs_freebsd_need_inactive, 6124 #endif 6125 .vop_reclaim = zfs_freebsd_reclaim, 6126 #if __FreeBSD_version >= 1300102 6127 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec, 6128 #endif 6129 #if __FreeBSD_version >= 1300139 6130 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink, 6131 #endif 6132 .vop_access = zfs_freebsd_access, 6133 .vop_allocate = VOP_EINVAL, 6134 #if __FreeBSD_version >= 1400032 6135 .vop_deallocate = zfs_deallocate, 6136 #endif 6137 .vop_lookup = zfs_cache_lookup, 6138 .vop_cachedlookup = zfs_freebsd_cachedlookup, 6139 .vop_getattr = zfs_freebsd_getattr, 6140 .vop_setattr = zfs_freebsd_setattr, 6141 .vop_create = zfs_freebsd_create, 6142 .vop_mknod = (vop_mknod_t *)zfs_freebsd_create, 6143 .vop_mkdir = zfs_freebsd_mkdir, 6144 .vop_readdir = zfs_freebsd_readdir, 6145 .vop_fsync = zfs_freebsd_fsync, 6146 .vop_open = zfs_freebsd_open, 6147 .vop_close = zfs_freebsd_close, 6148 .vop_rmdir = zfs_freebsd_rmdir, 6149 .vop_ioctl = zfs_freebsd_ioctl, 6150 .vop_link = zfs_freebsd_link, 6151 .vop_symlink = zfs_freebsd_symlink, 6152 .vop_readlink = zfs_freebsd_readlink, 6153 .vop_read = zfs_freebsd_read, 6154 .vop_write = zfs_freebsd_write, 6155 .vop_remove = zfs_freebsd_remove, 6156 .vop_rename = zfs_freebsd_rename, 6157 .vop_pathconf = zfs_freebsd_pathconf, 6158 .vop_bmap = zfs_freebsd_bmap, 6159 .vop_fid = zfs_freebsd_fid, 6160 .vop_getextattr = zfs_getextattr, 6161 .vop_deleteextattr = zfs_deleteextattr, 6162 .vop_setextattr = zfs_setextattr, 6163 .vop_listextattr = zfs_listextattr, 6164 .vop_getacl = zfs_freebsd_getacl, 6165 .vop_setacl = zfs_freebsd_setacl, 6166 .vop_aclcheck = zfs_freebsd_aclcheck, 6167 .vop_getpages = zfs_freebsd_getpages, 6168 .vop_putpages = zfs_freebsd_putpages, 6169 .vop_vptocnp = zfs_vptocnp, 6170 #if __FreeBSD_version >= 1300064 6171 .vop_lock1 = vop_lock, 6172 .vop_unlock = vop_unlock, 6173 .vop_islocked = vop_islocked, 6174 #endif 6175 .vop_add_writecount = vop_stdadd_writecount_nomsync, 6176 }; 6177 VFS_VOP_VECTOR_REGISTER(zfs_vnodeops); 6178 6179 struct vop_vector zfs_fifoops = { 6180 .vop_default = &fifo_specops, 6181 .vop_fsync = zfs_freebsd_fsync, 6182 #if __FreeBSD_version >= 1300102 6183 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec, 6184 #endif 6185 #if __FreeBSD_version >= 1300139 6186 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink, 6187 #endif 6188 .vop_access = zfs_freebsd_access, 6189 .vop_getattr = zfs_freebsd_getattr, 6190 .vop_inactive = zfs_freebsd_inactive, 6191 .vop_read = VOP_PANIC, 6192 .vop_reclaim = zfs_freebsd_reclaim, 6193 .vop_setattr = zfs_freebsd_setattr, 6194 .vop_write = VOP_PANIC, 6195 .vop_pathconf = zfs_freebsd_pathconf, 6196 .vop_fid = zfs_freebsd_fid, 6197 .vop_getacl = zfs_freebsd_getacl, 6198 .vop_setacl = zfs_freebsd_setacl, 6199 .vop_aclcheck = zfs_freebsd_aclcheck, 6200 .vop_add_writecount = vop_stdadd_writecount_nomsync, 6201 }; 6202 VFS_VOP_VECTOR_REGISTER(zfs_fifoops); 6203 6204 /* 6205 * special share hidden files vnode operations template 6206 */ 6207 struct vop_vector zfs_shareops = { 6208 .vop_default = &default_vnodeops, 6209 #if __FreeBSD_version >= 1300121 6210 .vop_fplookup_vexec = VOP_EAGAIN, 6211 #endif 6212 #if __FreeBSD_version >= 1300139 6213 .vop_fplookup_symlink = VOP_EAGAIN, 6214 #endif 6215 .vop_access = zfs_freebsd_access, 6216 .vop_inactive = zfs_freebsd_inactive, 6217 .vop_reclaim = zfs_freebsd_reclaim, 6218 .vop_fid = zfs_freebsd_fid, 6219 .vop_pathconf = zfs_freebsd_pathconf, 6220 .vop_add_writecount = vop_stdadd_writecount_nomsync, 6221 }; 6222 VFS_VOP_VECTOR_REGISTER(zfs_shareops); 6223