1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 25 * Copyright (c) 2014 Integros [integros.com] 26 * Copyright 2017 Nexenta Systems, Inc. 27 */ 28 29 /* Portions Copyright 2007 Jeremy Teo */ 30 /* Portions Copyright 2010 Robert Milkowski */ 31 32 33 #include <sys/types.h> 34 #include <sys/param.h> 35 #include <sys/time.h> 36 #include <sys/systm.h> 37 #include <sys/sysmacros.h> 38 #include <sys/resource.h> 39 #include <sys/vfs.h> 40 #include <sys/endian.h> 41 #include <sys/vm.h> 42 #include <sys/vnode.h> 43 #if __FreeBSD_version >= 1300102 44 #include <sys/smr.h> 45 #endif 46 #include <sys/dirent.h> 47 #include <sys/file.h> 48 #include <sys/stat.h> 49 #include <sys/kmem.h> 50 #include <sys/taskq.h> 51 #include <sys/uio.h> 52 #include <sys/atomic.h> 53 #include <sys/namei.h> 54 #include <sys/mman.h> 55 #include <sys/cmn_err.h> 56 #include <sys/kdb.h> 57 #include <sys/sysproto.h> 58 #include <sys/errno.h> 59 #include <sys/unistd.h> 60 #include <sys/zfs_dir.h> 61 #include <sys/zfs_ioctl.h> 62 #include <sys/fs/zfs.h> 63 #include <sys/dmu.h> 64 #include <sys/dmu_objset.h> 65 #include <sys/spa.h> 66 #include <sys/txg.h> 67 #include <sys/dbuf.h> 68 #include <sys/zap.h> 69 #include <sys/sa.h> 70 #include <sys/policy.h> 71 #include <sys/sunddi.h> 72 #include <sys/filio.h> 73 #include <sys/sid.h> 74 #include <sys/zfs_ctldir.h> 75 #include <sys/zfs_fuid.h> 76 #include <sys/zfs_quota.h> 77 #include <sys/zfs_sa.h> 78 #include <sys/zfs_rlock.h> 79 #include <sys/extdirent.h> 80 #include <sys/bio.h> 81 #include <sys/buf.h> 82 #include <sys/sched.h> 83 #include <sys/acl.h> 84 #include <sys/vmmeter.h> 85 #include <vm/vm_param.h> 86 #include <sys/zil.h> 87 #include <sys/zfs_vnops.h> 88 89 #include <vm/vm_object.h> 90 91 #include <sys/extattr.h> 92 #include <sys/priv.h> 93 94 #ifndef VN_OPEN_INVFS 95 #define VN_OPEN_INVFS 0x0 96 #endif 97 98 VFS_SMR_DECLARE; 99 100 #if __FreeBSD_version >= 1300047 101 #define vm_page_wire_lock(pp) 102 #define vm_page_wire_unlock(pp) 103 #else 104 #define vm_page_wire_lock(pp) vm_page_lock(pp) 105 #define vm_page_wire_unlock(pp) vm_page_unlock(pp) 106 #endif 107 108 #ifdef DEBUG_VFS_LOCKS 109 #define VNCHECKREF(vp) \ 110 VNASSERT((vp)->v_holdcnt > 0 && (vp)->v_usecount > 0, vp, \ 111 ("%s: wrong ref counts", __func__)); 112 #else 113 #define VNCHECKREF(vp) 114 #endif 115 116 /* 117 * Programming rules. 118 * 119 * Each vnode op performs some logical unit of work. To do this, the ZPL must 120 * properly lock its in-core state, create a DMU transaction, do the work, 121 * record this work in the intent log (ZIL), commit the DMU transaction, 122 * and wait for the intent log to commit if it is a synchronous operation. 123 * Moreover, the vnode ops must work in both normal and log replay context. 124 * The ordering of events is important to avoid deadlocks and references 125 * to freed memory. The example below illustrates the following Big Rules: 126 * 127 * (1) A check must be made in each zfs thread for a mounted file system. 128 * This is done avoiding races using ZFS_ENTER(zfsvfs). 129 * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 130 * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 131 * can return EIO from the calling function. 132 * 133 * (2) VN_RELE() should always be the last thing except for zil_commit() 134 * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 135 * First, if it's the last reference, the vnode/znode 136 * can be freed, so the zp may point to freed memory. Second, the last 137 * reference will call zfs_zinactive(), which may induce a lot of work -- 138 * pushing cached pages (which acquires range locks) and syncing out 139 * cached atime changes. Third, zfs_zinactive() may require a new tx, 140 * which could deadlock the system if you were already holding one. 141 * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 142 * 143 * (3) All range locks must be grabbed before calling dmu_tx_assign(), 144 * as they can span dmu_tx_assign() calls. 145 * 146 * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to 147 * dmu_tx_assign(). This is critical because we don't want to block 148 * while holding locks. 149 * 150 * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This 151 * reduces lock contention and CPU usage when we must wait (note that if 152 * throughput is constrained by the storage, nearly every transaction 153 * must wait). 154 * 155 * Note, in particular, that if a lock is sometimes acquired before 156 * the tx assigns, and sometimes after (e.g. z_lock), then failing 157 * to use a non-blocking assign can deadlock the system. The scenario: 158 * 159 * Thread A has grabbed a lock before calling dmu_tx_assign(). 160 * Thread B is in an already-assigned tx, and blocks for this lock. 161 * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 162 * forever, because the previous txg can't quiesce until B's tx commits. 163 * 164 * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 165 * then drop all locks, call dmu_tx_wait(), and try again. On subsequent 166 * calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT, 167 * to indicate that this operation has already called dmu_tx_wait(). 168 * This will ensure that we don't retry forever, waiting a short bit 169 * each time. 170 * 171 * (5) If the operation succeeded, generate the intent log entry for it 172 * before dropping locks. This ensures that the ordering of events 173 * in the intent log matches the order in which they actually occurred. 174 * During ZIL replay the zfs_log_* functions will update the sequence 175 * number to indicate the zil transaction has replayed. 176 * 177 * (6) At the end of each vnode op, the DMU tx must always commit, 178 * regardless of whether there were any errors. 179 * 180 * (7) After dropping all locks, invoke zil_commit(zilog, foid) 181 * to ensure that synchronous semantics are provided when necessary. 182 * 183 * In general, this is how things should be ordered in each vnode op: 184 * 185 * ZFS_ENTER(zfsvfs); // exit if unmounted 186 * top: 187 * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD()) 188 * rw_enter(...); // grab any other locks you need 189 * tx = dmu_tx_create(...); // get DMU tx 190 * dmu_tx_hold_*(); // hold each object you might modify 191 * error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); 192 * if (error) { 193 * rw_exit(...); // drop locks 194 * zfs_dirent_unlock(dl); // unlock directory entry 195 * VN_RELE(...); // release held vnodes 196 * if (error == ERESTART) { 197 * waited = B_TRUE; 198 * dmu_tx_wait(tx); 199 * dmu_tx_abort(tx); 200 * goto top; 201 * } 202 * dmu_tx_abort(tx); // abort DMU tx 203 * ZFS_EXIT(zfsvfs); // finished in zfs 204 * return (error); // really out of space 205 * } 206 * error = do_real_work(); // do whatever this VOP does 207 * if (error == 0) 208 * zfs_log_*(...); // on success, make ZIL entry 209 * dmu_tx_commit(tx); // commit DMU tx -- error or not 210 * rw_exit(...); // drop locks 211 * zfs_dirent_unlock(dl); // unlock directory entry 212 * VN_RELE(...); // release held vnodes 213 * zil_commit(zilog, foid); // synchronous when necessary 214 * ZFS_EXIT(zfsvfs); // finished in zfs 215 * return (error); // done, report error 216 */ 217 218 /* ARGSUSED */ 219 static int 220 zfs_open(vnode_t **vpp, int flag, cred_t *cr) 221 { 222 znode_t *zp = VTOZ(*vpp); 223 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 224 225 ZFS_ENTER(zfsvfs); 226 ZFS_VERIFY_ZP(zp); 227 228 if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 229 ((flag & FAPPEND) == 0)) { 230 ZFS_EXIT(zfsvfs); 231 return (SET_ERROR(EPERM)); 232 } 233 234 /* Keep a count of the synchronous opens in the znode */ 235 if (flag & (FSYNC | FDSYNC)) 236 atomic_inc_32(&zp->z_sync_cnt); 237 238 ZFS_EXIT(zfsvfs); 239 return (0); 240 } 241 242 /* ARGSUSED */ 243 static int 244 zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr) 245 { 246 znode_t *zp = VTOZ(vp); 247 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 248 249 ZFS_ENTER(zfsvfs); 250 ZFS_VERIFY_ZP(zp); 251 252 /* Decrement the synchronous opens in the znode */ 253 if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 254 atomic_dec_32(&zp->z_sync_cnt); 255 256 ZFS_EXIT(zfsvfs); 257 return (0); 258 } 259 260 /* ARGSUSED */ 261 static int 262 zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag, cred_t *cred, 263 int *rvalp) 264 { 265 loff_t off; 266 int error; 267 268 switch (com) { 269 case _FIOFFS: 270 { 271 return (0); 272 273 /* 274 * The following two ioctls are used by bfu. Faking out, 275 * necessary to avoid bfu errors. 276 */ 277 } 278 case _FIOGDIO: 279 case _FIOSDIO: 280 { 281 return (0); 282 } 283 284 case F_SEEK_DATA: 285 case F_SEEK_HOLE: 286 { 287 off = *(offset_t *)data; 288 /* offset parameter is in/out */ 289 error = zfs_holey(VTOZ(vp), com, &off); 290 if (error) 291 return (error); 292 *(offset_t *)data = off; 293 return (0); 294 } 295 } 296 return (SET_ERROR(ENOTTY)); 297 } 298 299 static vm_page_t 300 page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) 301 { 302 vm_object_t obj; 303 vm_page_t pp; 304 int64_t end; 305 306 /* 307 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE 308 * aligned boundaries, if the range is not aligned. As a result a 309 * DEV_BSIZE subrange with partially dirty data may get marked as clean. 310 * It may happen that all DEV_BSIZE subranges are marked clean and thus 311 * the whole page would be considered clean despite have some 312 * dirty data. 313 * For this reason we should shrink the range to DEV_BSIZE aligned 314 * boundaries before calling vm_page_clear_dirty. 315 */ 316 end = rounddown2(off + nbytes, DEV_BSIZE); 317 off = roundup2(off, DEV_BSIZE); 318 nbytes = end - off; 319 320 obj = vp->v_object; 321 zfs_vmobject_assert_wlocked_12(obj); 322 #if __FreeBSD_version < 1300050 323 for (;;) { 324 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 325 pp->valid) { 326 if (vm_page_xbusied(pp)) { 327 /* 328 * Reference the page before unlocking and 329 * sleeping so that the page daemon is less 330 * likely to reclaim it. 331 */ 332 vm_page_reference(pp); 333 vm_page_lock(pp); 334 zfs_vmobject_wunlock(obj); 335 vm_page_busy_sleep(pp, "zfsmwb", true); 336 zfs_vmobject_wlock(obj); 337 continue; 338 } 339 vm_page_sbusy(pp); 340 } else if (pp != NULL) { 341 ASSERT(!pp->valid); 342 pp = NULL; 343 } 344 if (pp != NULL) { 345 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 346 vm_object_pip_add(obj, 1); 347 pmap_remove_write(pp); 348 if (nbytes != 0) 349 vm_page_clear_dirty(pp, off, nbytes); 350 } 351 break; 352 } 353 #else 354 vm_page_grab_valid_unlocked(&pp, obj, OFF_TO_IDX(start), 355 VM_ALLOC_NOCREAT | VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | 356 VM_ALLOC_IGN_SBUSY); 357 if (pp != NULL) { 358 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 359 vm_object_pip_add(obj, 1); 360 pmap_remove_write(pp); 361 if (nbytes != 0) 362 vm_page_clear_dirty(pp, off, nbytes); 363 } 364 #endif 365 return (pp); 366 } 367 368 static void 369 page_unbusy(vm_page_t pp) 370 { 371 372 vm_page_sunbusy(pp); 373 #if __FreeBSD_version >= 1300041 374 vm_object_pip_wakeup(pp->object); 375 #else 376 vm_object_pip_subtract(pp->object, 1); 377 #endif 378 } 379 380 #if __FreeBSD_version > 1300051 381 static vm_page_t 382 page_hold(vnode_t *vp, int64_t start) 383 { 384 vm_object_t obj; 385 vm_page_t m; 386 387 obj = vp->v_object; 388 vm_page_grab_valid_unlocked(&m, obj, OFF_TO_IDX(start), 389 VM_ALLOC_NOCREAT | VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY | 390 VM_ALLOC_NOBUSY); 391 return (m); 392 } 393 #else 394 static vm_page_t 395 page_hold(vnode_t *vp, int64_t start) 396 { 397 vm_object_t obj; 398 vm_page_t pp; 399 400 obj = vp->v_object; 401 zfs_vmobject_assert_wlocked(obj); 402 403 for (;;) { 404 if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && 405 pp->valid) { 406 if (vm_page_xbusied(pp)) { 407 /* 408 * Reference the page before unlocking and 409 * sleeping so that the page daemon is less 410 * likely to reclaim it. 411 */ 412 vm_page_reference(pp); 413 vm_page_lock(pp); 414 zfs_vmobject_wunlock(obj); 415 vm_page_busy_sleep(pp, "zfsmwb", true); 416 zfs_vmobject_wlock(obj); 417 continue; 418 } 419 420 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 421 vm_page_wire_lock(pp); 422 vm_page_hold(pp); 423 vm_page_wire_unlock(pp); 424 425 } else 426 pp = NULL; 427 break; 428 } 429 return (pp); 430 } 431 #endif 432 433 static void 434 page_unhold(vm_page_t pp) 435 { 436 437 vm_page_wire_lock(pp); 438 #if __FreeBSD_version >= 1300035 439 vm_page_unwire(pp, PQ_ACTIVE); 440 #else 441 vm_page_unhold(pp); 442 #endif 443 vm_page_wire_unlock(pp); 444 } 445 446 /* 447 * When a file is memory mapped, we must keep the IO data synchronized 448 * between the DMU cache and the memory mapped pages. What this means: 449 * 450 * On Write: If we find a memory mapped page, we write to *both* 451 * the page and the dmu buffer. 452 */ 453 void 454 update_pages(znode_t *zp, int64_t start, int len, objset_t *os) 455 { 456 vm_object_t obj; 457 struct sf_buf *sf; 458 vnode_t *vp = ZTOV(zp); 459 caddr_t va; 460 int off; 461 462 ASSERT3P(vp->v_mount, !=, NULL); 463 obj = vp->v_object; 464 ASSERT3P(obj, !=, NULL); 465 466 off = start & PAGEOFFSET; 467 zfs_vmobject_wlock_12(obj); 468 #if __FreeBSD_version >= 1300041 469 vm_object_pip_add(obj, 1); 470 #endif 471 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 472 vm_page_t pp; 473 int nbytes = imin(PAGESIZE - off, len); 474 475 if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { 476 zfs_vmobject_wunlock_12(obj); 477 478 va = zfs_map_page(pp, &sf); 479 (void) dmu_read(os, zp->z_id, start + off, nbytes, 480 va + off, DMU_READ_PREFETCH); 481 zfs_unmap_page(sf); 482 483 zfs_vmobject_wlock_12(obj); 484 page_unbusy(pp); 485 } 486 len -= nbytes; 487 off = 0; 488 } 489 #if __FreeBSD_version >= 1300041 490 vm_object_pip_wakeup(obj); 491 #else 492 vm_object_pip_wakeupn(obj, 0); 493 #endif 494 zfs_vmobject_wunlock_12(obj); 495 } 496 497 /* 498 * Read with UIO_NOCOPY flag means that sendfile(2) requests 499 * ZFS to populate a range of page cache pages with data. 500 * 501 * NOTE: this function could be optimized to pre-allocate 502 * all pages in advance, drain exclusive busy on all of them, 503 * map them into contiguous KVA region and populate them 504 * in one single dmu_read() call. 505 */ 506 int 507 mappedread_sf(znode_t *zp, int nbytes, zfs_uio_t *uio) 508 { 509 vnode_t *vp = ZTOV(zp); 510 objset_t *os = zp->z_zfsvfs->z_os; 511 struct sf_buf *sf; 512 vm_object_t obj; 513 vm_page_t pp; 514 int64_t start; 515 caddr_t va; 516 int len = nbytes; 517 int error = 0; 518 519 ASSERT3U(zfs_uio_segflg(uio), ==, UIO_NOCOPY); 520 ASSERT3P(vp->v_mount, !=, NULL); 521 obj = vp->v_object; 522 ASSERT3P(obj, !=, NULL); 523 ASSERT0(zfs_uio_offset(uio) & PAGEOFFSET); 524 525 zfs_vmobject_wlock_12(obj); 526 for (start = zfs_uio_offset(uio); len > 0; start += PAGESIZE) { 527 int bytes = MIN(PAGESIZE, len); 528 529 pp = vm_page_grab_unlocked(obj, OFF_TO_IDX(start), 530 VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); 531 if (vm_page_none_valid(pp)) { 532 zfs_vmobject_wunlock_12(obj); 533 va = zfs_map_page(pp, &sf); 534 error = dmu_read(os, zp->z_id, start, bytes, va, 535 DMU_READ_PREFETCH); 536 if (bytes != PAGESIZE && error == 0) 537 bzero(va + bytes, PAGESIZE - bytes); 538 zfs_unmap_page(sf); 539 zfs_vmobject_wlock_12(obj); 540 #if __FreeBSD_version >= 1300081 541 if (error == 0) { 542 vm_page_valid(pp); 543 vm_page_activate(pp); 544 vm_page_do_sunbusy(pp); 545 } else { 546 zfs_vmobject_wlock(obj); 547 if (!vm_page_wired(pp) && pp->valid == 0 && 548 vm_page_busy_tryupgrade(pp)) 549 vm_page_free(pp); 550 else 551 vm_page_sunbusy(pp); 552 zfs_vmobject_wunlock(obj); 553 } 554 #else 555 vm_page_do_sunbusy(pp); 556 vm_page_lock(pp); 557 if (error) { 558 if (pp->wire_count == 0 && pp->valid == 0 && 559 !vm_page_busied(pp)) 560 vm_page_free(pp); 561 } else { 562 pp->valid = VM_PAGE_BITS_ALL; 563 vm_page_activate(pp); 564 } 565 vm_page_unlock(pp); 566 #endif 567 } else { 568 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); 569 vm_page_do_sunbusy(pp); 570 } 571 if (error) 572 break; 573 zfs_uio_advance(uio, bytes); 574 len -= bytes; 575 } 576 zfs_vmobject_wunlock_12(obj); 577 return (error); 578 } 579 580 /* 581 * When a file is memory mapped, we must keep the IO data synchronized 582 * between the DMU cache and the memory mapped pages. What this means: 583 * 584 * On Read: We "read" preferentially from memory mapped pages, 585 * else we default from the dmu buffer. 586 * 587 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 588 * the file is memory mapped. 589 */ 590 int 591 mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio) 592 { 593 vnode_t *vp = ZTOV(zp); 594 vm_object_t obj; 595 int64_t start; 596 int len = nbytes; 597 int off; 598 int error = 0; 599 600 ASSERT3P(vp->v_mount, !=, NULL); 601 obj = vp->v_object; 602 ASSERT3P(obj, !=, NULL); 603 604 start = zfs_uio_offset(uio); 605 off = start & PAGEOFFSET; 606 zfs_vmobject_wlock_12(obj); 607 for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 608 vm_page_t pp; 609 uint64_t bytes = MIN(PAGESIZE - off, len); 610 611 if ((pp = page_hold(vp, start))) { 612 struct sf_buf *sf; 613 caddr_t va; 614 615 zfs_vmobject_wunlock_12(obj); 616 va = zfs_map_page(pp, &sf); 617 error = vn_io_fault_uiomove(va + off, bytes, 618 GET_UIO_STRUCT(uio)); 619 zfs_unmap_page(sf); 620 zfs_vmobject_wlock_12(obj); 621 page_unhold(pp); 622 } else { 623 zfs_vmobject_wunlock_12(obj); 624 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 625 uio, bytes); 626 zfs_vmobject_wlock_12(obj); 627 } 628 len -= bytes; 629 off = 0; 630 if (error) 631 break; 632 } 633 zfs_vmobject_wunlock_12(obj); 634 return (error); 635 } 636 637 int 638 zfs_write_simple(znode_t *zp, const void *data, size_t len, 639 loff_t pos, size_t *presid) 640 { 641 int error = 0; 642 ssize_t resid; 643 644 error = vn_rdwr(UIO_WRITE, ZTOV(zp), __DECONST(void *, data), len, pos, 645 UIO_SYSSPACE, IO_SYNC, kcred, NOCRED, &resid, curthread); 646 647 if (error) { 648 return (SET_ERROR(error)); 649 } else if (presid == NULL) { 650 if (resid != 0) { 651 error = SET_ERROR(EIO); 652 } 653 } else { 654 *presid = resid; 655 } 656 return (error); 657 } 658 659 void 660 zfs_zrele_async(znode_t *zp) 661 { 662 vnode_t *vp = ZTOV(zp); 663 objset_t *os = ITOZSB(vp)->z_os; 664 665 VN_RELE_ASYNC(vp, dsl_pool_zrele_taskq(dmu_objset_pool(os))); 666 } 667 668 static int 669 zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp) 670 { 671 int error; 672 673 *vpp = arg; 674 error = vn_lock(*vpp, lkflags); 675 if (error != 0) 676 vrele(*vpp); 677 return (error); 678 } 679 680 static int 681 zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags) 682 { 683 znode_t *zdp = VTOZ(dvp); 684 zfsvfs_t *zfsvfs __unused = zdp->z_zfsvfs; 685 int error; 686 int ltype; 687 688 if (zfsvfs->z_replay == B_FALSE) 689 ASSERT_VOP_LOCKED(dvp, __func__); 690 691 if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { 692 ASSERT3P(dvp, ==, vp); 693 vref(dvp); 694 ltype = lkflags & LK_TYPE_MASK; 695 if (ltype != VOP_ISLOCKED(dvp)) { 696 if (ltype == LK_EXCLUSIVE) 697 vn_lock(dvp, LK_UPGRADE | LK_RETRY); 698 else /* if (ltype == LK_SHARED) */ 699 vn_lock(dvp, LK_DOWNGRADE | LK_RETRY); 700 701 /* 702 * Relock for the "." case could leave us with 703 * reclaimed vnode. 704 */ 705 if (VN_IS_DOOMED(dvp)) { 706 vrele(dvp); 707 return (SET_ERROR(ENOENT)); 708 } 709 } 710 return (0); 711 } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { 712 /* 713 * Note that in this case, dvp is the child vnode, and we 714 * are looking up the parent vnode - exactly reverse from 715 * normal operation. Unlocking dvp requires some rather 716 * tricky unlock/relock dance to prevent mp from being freed; 717 * use vn_vget_ino_gen() which takes care of all that. 718 * 719 * XXX Note that there is a time window when both vnodes are 720 * unlocked. It is possible, although highly unlikely, that 721 * during that window the parent-child relationship between 722 * the vnodes may change, for example, get reversed. 723 * In that case we would have a wrong lock order for the vnodes. 724 * All other filesystems seem to ignore this problem, so we 725 * do the same here. 726 * A potential solution could be implemented as follows: 727 * - using LK_NOWAIT when locking the second vnode and retrying 728 * if necessary 729 * - checking that the parent-child relationship still holds 730 * after locking both vnodes and retrying if it doesn't 731 */ 732 error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp); 733 return (error); 734 } else { 735 error = vn_lock(vp, lkflags); 736 if (error != 0) 737 vrele(vp); 738 return (error); 739 } 740 } 741 742 /* 743 * Lookup an entry in a directory, or an extended attribute directory. 744 * If it exists, return a held vnode reference for it. 745 * 746 * IN: dvp - vnode of directory to search. 747 * nm - name of entry to lookup. 748 * pnp - full pathname to lookup [UNUSED]. 749 * flags - LOOKUP_XATTR set if looking for an attribute. 750 * rdir - root directory vnode [UNUSED]. 751 * cr - credentials of caller. 752 * ct - caller context 753 * 754 * OUT: vpp - vnode of located entry, NULL if not found. 755 * 756 * RETURN: 0 on success, error code on failure. 757 * 758 * Timestamps: 759 * NA 760 */ 761 /* ARGSUSED */ 762 static int 763 zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, 764 struct componentname *cnp, int nameiop, cred_t *cr, kthread_t *td, 765 int flags, boolean_t cached) 766 { 767 znode_t *zdp = VTOZ(dvp); 768 znode_t *zp; 769 zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 770 #if __FreeBSD_version > 1300124 771 seqc_t dvp_seqc; 772 #endif 773 int error = 0; 774 775 /* 776 * Fast path lookup, however we must skip DNLC lookup 777 * for case folding or normalizing lookups because the 778 * DNLC code only stores the passed in name. This means 779 * creating 'a' and removing 'A' on a case insensitive 780 * file system would work, but DNLC still thinks 'a' 781 * exists and won't let you create it again on the next 782 * pass through fast path. 783 */ 784 if (!(flags & LOOKUP_XATTR)) { 785 if (dvp->v_type != VDIR) { 786 return (SET_ERROR(ENOTDIR)); 787 } else if (zdp->z_sa_hdl == NULL) { 788 return (SET_ERROR(EIO)); 789 } 790 } 791 792 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, 793 const char *, nm); 794 795 ZFS_ENTER(zfsvfs); 796 ZFS_VERIFY_ZP(zdp); 797 798 #if __FreeBSD_version > 1300124 799 dvp_seqc = vn_seqc_read_notmodify(dvp); 800 #endif 801 802 *vpp = NULL; 803 804 if (flags & LOOKUP_XATTR) { 805 /* 806 * If the xattr property is off, refuse the lookup request. 807 */ 808 if (!(zfsvfs->z_flags & ZSB_XATTR)) { 809 ZFS_EXIT(zfsvfs); 810 return (SET_ERROR(EOPNOTSUPP)); 811 } 812 813 /* 814 * We don't allow recursive attributes.. 815 * Maybe someday we will. 816 */ 817 if (zdp->z_pflags & ZFS_XATTR) { 818 ZFS_EXIT(zfsvfs); 819 return (SET_ERROR(EINVAL)); 820 } 821 822 if ((error = zfs_get_xattrdir(VTOZ(dvp), &zp, cr, flags))) { 823 ZFS_EXIT(zfsvfs); 824 return (error); 825 } 826 *vpp = ZTOV(zp); 827 828 /* 829 * Do we have permission to get into attribute directory? 830 */ 831 error = zfs_zaccess(zp, ACE_EXECUTE, 0, B_FALSE, cr); 832 if (error) { 833 vrele(ZTOV(zp)); 834 } 835 836 ZFS_EXIT(zfsvfs); 837 return (error); 838 } 839 840 /* 841 * Check accessibility of directory if we're not coming in via 842 * VOP_CACHEDLOOKUP. 843 */ 844 if (!cached) { 845 #ifdef NOEXECCHECK 846 if ((cnp->cn_flags & NOEXECCHECK) != 0) { 847 cnp->cn_flags &= ~NOEXECCHECK; 848 } else 849 #endif 850 if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) { 851 ZFS_EXIT(zfsvfs); 852 return (error); 853 } 854 } 855 856 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 857 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 858 ZFS_EXIT(zfsvfs); 859 return (SET_ERROR(EILSEQ)); 860 } 861 862 863 /* 864 * First handle the special cases. 865 */ 866 if ((cnp->cn_flags & ISDOTDOT) != 0) { 867 /* 868 * If we are a snapshot mounted under .zfs, return 869 * the vp for the snapshot directory. 870 */ 871 if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) { 872 struct componentname cn; 873 vnode_t *zfsctl_vp; 874 int ltype; 875 876 ZFS_EXIT(zfsvfs); 877 ltype = VOP_ISLOCKED(dvp); 878 VOP_UNLOCK1(dvp); 879 error = zfsctl_root(zfsvfs->z_parent, LK_SHARED, 880 &zfsctl_vp); 881 if (error == 0) { 882 cn.cn_nameptr = "snapshot"; 883 cn.cn_namelen = strlen(cn.cn_nameptr); 884 cn.cn_nameiop = cnp->cn_nameiop; 885 cn.cn_flags = cnp->cn_flags & ~ISDOTDOT; 886 cn.cn_lkflags = cnp->cn_lkflags; 887 error = VOP_LOOKUP(zfsctl_vp, vpp, &cn); 888 vput(zfsctl_vp); 889 } 890 vn_lock(dvp, ltype | LK_RETRY); 891 return (error); 892 } 893 } 894 if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) { 895 ZFS_EXIT(zfsvfs); 896 if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP) 897 return (SET_ERROR(ENOTSUP)); 898 error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp); 899 return (error); 900 } 901 902 /* 903 * The loop is retry the lookup if the parent-child relationship 904 * changes during the dot-dot locking complexities. 905 */ 906 for (;;) { 907 uint64_t parent; 908 909 error = zfs_dirlook(zdp, nm, &zp); 910 if (error == 0) 911 *vpp = ZTOV(zp); 912 913 ZFS_EXIT(zfsvfs); 914 if (error != 0) 915 break; 916 917 error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags); 918 if (error != 0) { 919 /* 920 * If we've got a locking error, then the vnode 921 * got reclaimed because of a force unmount. 922 * We never enter doomed vnodes into the name cache. 923 */ 924 *vpp = NULL; 925 return (error); 926 } 927 928 if ((cnp->cn_flags & ISDOTDOT) == 0) 929 break; 930 931 ZFS_ENTER(zfsvfs); 932 if (zdp->z_sa_hdl == NULL) { 933 error = SET_ERROR(EIO); 934 } else { 935 error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 936 &parent, sizeof (parent)); 937 } 938 if (error != 0) { 939 ZFS_EXIT(zfsvfs); 940 vput(ZTOV(zp)); 941 break; 942 } 943 if (zp->z_id == parent) { 944 ZFS_EXIT(zfsvfs); 945 break; 946 } 947 vput(ZTOV(zp)); 948 } 949 950 if (error != 0) 951 *vpp = NULL; 952 953 /* Translate errors and add SAVENAME when needed. */ 954 if (cnp->cn_flags & ISLASTCN) { 955 switch (nameiop) { 956 case CREATE: 957 case RENAME: 958 if (error == ENOENT) { 959 error = EJUSTRETURN; 960 cnp->cn_flags |= SAVENAME; 961 break; 962 } 963 fallthrough; 964 case DELETE: 965 if (error == 0) 966 cnp->cn_flags |= SAVENAME; 967 break; 968 } 969 } 970 971 #if __FreeBSD_version > 1300124 972 if ((cnp->cn_flags & ISDOTDOT) != 0) { 973 /* 974 * FIXME: zfs_lookup_lock relocks vnodes and does nothing to 975 * handle races. In particular different callers may end up 976 * with different vnodes and will try to add conflicting 977 * entries to the namecache. 978 * 979 * While finding different result may be acceptable in face 980 * of concurrent modification, adding conflicting entries 981 * trips over an assert in the namecache. 982 * 983 * Ultimately let an entry through once everything settles. 984 */ 985 if (!vn_seqc_consistent(dvp, dvp_seqc)) { 986 cnp->cn_flags &= ~MAKEENTRY; 987 } 988 } 989 #endif 990 991 /* Insert name into cache (as non-existent) if appropriate. */ 992 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay && 993 error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0) 994 cache_enter(dvp, NULL, cnp); 995 996 /* Insert name into cache if appropriate. */ 997 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay && 998 error == 0 && (cnp->cn_flags & MAKEENTRY)) { 999 if (!(cnp->cn_flags & ISLASTCN) || 1000 (nameiop != DELETE && nameiop != RENAME)) { 1001 cache_enter(dvp, *vpp, cnp); 1002 } 1003 } 1004 1005 return (error); 1006 } 1007 1008 /* 1009 * Attempt to create a new entry in a directory. If the entry 1010 * already exists, truncate the file if permissible, else return 1011 * an error. Return the vp of the created or trunc'd file. 1012 * 1013 * IN: dvp - vnode of directory to put new file entry in. 1014 * name - name of new file entry. 1015 * vap - attributes of new file. 1016 * excl - flag indicating exclusive or non-exclusive mode. 1017 * mode - mode to open file with. 1018 * cr - credentials of caller. 1019 * flag - large file flag [UNUSED]. 1020 * ct - caller context 1021 * vsecp - ACL to be set 1022 * 1023 * OUT: vpp - vnode of created or trunc'd entry. 1024 * 1025 * RETURN: 0 on success, error code on failure. 1026 * 1027 * Timestamps: 1028 * dvp - ctime|mtime updated if new entry created 1029 * vp - ctime|mtime always, atime if new 1030 */ 1031 1032 /* ARGSUSED */ 1033 int 1034 zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode, 1035 znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp) 1036 { 1037 znode_t *zp; 1038 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1039 zilog_t *zilog; 1040 objset_t *os; 1041 dmu_tx_t *tx; 1042 int error; 1043 ksid_t *ksid; 1044 uid_t uid; 1045 gid_t gid = crgetgid(cr); 1046 uint64_t projid = ZFS_DEFAULT_PROJID; 1047 zfs_acl_ids_t acl_ids; 1048 boolean_t fuid_dirtied; 1049 uint64_t txtype; 1050 #ifdef DEBUG_VFS_LOCKS 1051 vnode_t *dvp = ZTOV(dzp); 1052 #endif 1053 1054 /* 1055 * If we have an ephemeral id, ACL, or XVATTR then 1056 * make sure file system is at proper version 1057 */ 1058 1059 ksid = crgetsid(cr, KSID_OWNER); 1060 if (ksid) 1061 uid = ksid_getid(ksid); 1062 else 1063 uid = crgetuid(cr); 1064 1065 if (zfsvfs->z_use_fuids == B_FALSE && 1066 (vsecp || (vap->va_mask & AT_XVATTR) || 1067 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1068 return (SET_ERROR(EINVAL)); 1069 1070 ZFS_ENTER(zfsvfs); 1071 ZFS_VERIFY_ZP(dzp); 1072 os = zfsvfs->z_os; 1073 zilog = zfsvfs->z_log; 1074 1075 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1076 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1077 ZFS_EXIT(zfsvfs); 1078 return (SET_ERROR(EILSEQ)); 1079 } 1080 1081 if (vap->va_mask & AT_XVATTR) { 1082 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap, 1083 crgetuid(cr), cr, vap->va_type)) != 0) { 1084 ZFS_EXIT(zfsvfs); 1085 return (error); 1086 } 1087 } 1088 1089 *zpp = NULL; 1090 1091 if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr)) 1092 vap->va_mode &= ~S_ISVTX; 1093 1094 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 1095 if (error) { 1096 ZFS_EXIT(zfsvfs); 1097 return (error); 1098 } 1099 ASSERT3P(zp, ==, NULL); 1100 1101 /* 1102 * Create a new file object and update the directory 1103 * to reference it. 1104 */ 1105 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) { 1106 goto out; 1107 } 1108 1109 /* 1110 * We only support the creation of regular files in 1111 * extended attribute directories. 1112 */ 1113 1114 if ((dzp->z_pflags & ZFS_XATTR) && 1115 (vap->va_type != VREG)) { 1116 error = SET_ERROR(EINVAL); 1117 goto out; 1118 } 1119 1120 if ((error = zfs_acl_ids_create(dzp, 0, vap, 1121 cr, vsecp, &acl_ids)) != 0) 1122 goto out; 1123 1124 if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode)) 1125 projid = zfs_inherit_projid(dzp); 1126 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) { 1127 zfs_acl_ids_free(&acl_ids); 1128 error = SET_ERROR(EDQUOT); 1129 goto out; 1130 } 1131 1132 getnewvnode_reserve_(); 1133 1134 tx = dmu_tx_create(os); 1135 1136 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1137 ZFS_SA_BASE_ATTR_SIZE); 1138 1139 fuid_dirtied = zfsvfs->z_fuid_dirty; 1140 if (fuid_dirtied) 1141 zfs_fuid_txhold(zfsvfs, tx); 1142 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1143 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 1144 if (!zfsvfs->z_use_sa && 1145 acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1146 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1147 0, acl_ids.z_aclp->z_acl_bytes); 1148 } 1149 error = dmu_tx_assign(tx, TXG_WAIT); 1150 if (error) { 1151 zfs_acl_ids_free(&acl_ids); 1152 dmu_tx_abort(tx); 1153 getnewvnode_drop_reserve(); 1154 ZFS_EXIT(zfsvfs); 1155 return (error); 1156 } 1157 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1158 if (fuid_dirtied) 1159 zfs_fuid_sync(zfsvfs, tx); 1160 1161 (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 1162 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1163 zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1164 vsecp, acl_ids.z_fuidp, vap); 1165 zfs_acl_ids_free(&acl_ids); 1166 dmu_tx_commit(tx); 1167 1168 getnewvnode_drop_reserve(); 1169 1170 out: 1171 VNCHECKREF(dvp); 1172 if (error == 0) { 1173 *zpp = zp; 1174 } 1175 1176 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1177 zil_commit(zilog, 0); 1178 1179 ZFS_EXIT(zfsvfs); 1180 return (error); 1181 } 1182 1183 /* 1184 * Remove an entry from a directory. 1185 * 1186 * IN: dvp - vnode of directory to remove entry from. 1187 * name - name of entry to remove. 1188 * cr - credentials of caller. 1189 * ct - caller context 1190 * flags - case flags 1191 * 1192 * RETURN: 0 on success, error code on failure. 1193 * 1194 * Timestamps: 1195 * dvp - ctime|mtime 1196 * vp - ctime (if nlink > 0) 1197 */ 1198 1199 /*ARGSUSED*/ 1200 static int 1201 zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr) 1202 { 1203 znode_t *dzp = VTOZ(dvp); 1204 znode_t *zp; 1205 znode_t *xzp; 1206 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1207 zilog_t *zilog; 1208 uint64_t xattr_obj; 1209 uint64_t obj = 0; 1210 dmu_tx_t *tx; 1211 boolean_t unlinked; 1212 uint64_t txtype; 1213 int error; 1214 1215 1216 ZFS_ENTER(zfsvfs); 1217 ZFS_VERIFY_ZP(dzp); 1218 zp = VTOZ(vp); 1219 ZFS_VERIFY_ZP(zp); 1220 zilog = zfsvfs->z_log; 1221 1222 xattr_obj = 0; 1223 xzp = NULL; 1224 1225 if ((error = zfs_zaccess_delete(dzp, zp, cr))) { 1226 goto out; 1227 } 1228 1229 /* 1230 * Need to use rmdir for removing directories. 1231 */ 1232 if (vp->v_type == VDIR) { 1233 error = SET_ERROR(EPERM); 1234 goto out; 1235 } 1236 1237 vnevent_remove(vp, dvp, name, ct); 1238 1239 obj = zp->z_id; 1240 1241 /* are there any extended attributes? */ 1242 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 1243 &xattr_obj, sizeof (xattr_obj)); 1244 if (error == 0 && xattr_obj) { 1245 error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1246 ASSERT0(error); 1247 } 1248 1249 /* 1250 * We may delete the znode now, or we may put it in the unlinked set; 1251 * it depends on whether we're the last link, and on whether there are 1252 * other holds on the vnode. So we dmu_tx_hold() the right things to 1253 * allow for either case. 1254 */ 1255 tx = dmu_tx_create(zfsvfs->z_os); 1256 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1257 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1258 zfs_sa_upgrade_txholds(tx, zp); 1259 zfs_sa_upgrade_txholds(tx, dzp); 1260 1261 if (xzp) { 1262 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 1263 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1264 } 1265 1266 /* charge as an update -- would be nice not to charge at all */ 1267 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1268 1269 /* 1270 * Mark this transaction as typically resulting in a net free of space 1271 */ 1272 dmu_tx_mark_netfree(tx); 1273 1274 error = dmu_tx_assign(tx, TXG_WAIT); 1275 if (error) { 1276 dmu_tx_abort(tx); 1277 ZFS_EXIT(zfsvfs); 1278 return (error); 1279 } 1280 1281 /* 1282 * Remove the directory entry. 1283 */ 1284 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked); 1285 1286 if (error) { 1287 dmu_tx_commit(tx); 1288 goto out; 1289 } 1290 1291 if (unlinked) { 1292 zfs_unlinked_add(zp, tx); 1293 vp->v_vflag |= VV_NOSYNC; 1294 } 1295 /* XXX check changes to linux vnops */ 1296 txtype = TX_REMOVE; 1297 zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked); 1298 1299 dmu_tx_commit(tx); 1300 out: 1301 1302 if (xzp) 1303 vrele(ZTOV(xzp)); 1304 1305 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1306 zil_commit(zilog, 0); 1307 1308 1309 ZFS_EXIT(zfsvfs); 1310 return (error); 1311 } 1312 1313 1314 static int 1315 zfs_lookup_internal(znode_t *dzp, const char *name, vnode_t **vpp, 1316 struct componentname *cnp, int nameiop) 1317 { 1318 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1319 int error; 1320 1321 cnp->cn_nameptr = __DECONST(char *, name); 1322 cnp->cn_namelen = strlen(name); 1323 cnp->cn_nameiop = nameiop; 1324 cnp->cn_flags = ISLASTCN | SAVENAME; 1325 cnp->cn_lkflags = LK_EXCLUSIVE | LK_RETRY; 1326 cnp->cn_cred = kcred; 1327 cnp->cn_thread = curthread; 1328 1329 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay) { 1330 struct vop_lookup_args a; 1331 1332 a.a_gen.a_desc = &vop_lookup_desc; 1333 a.a_dvp = ZTOV(dzp); 1334 a.a_vpp = vpp; 1335 a.a_cnp = cnp; 1336 error = vfs_cache_lookup(&a); 1337 } else { 1338 error = zfs_lookup(ZTOV(dzp), name, vpp, cnp, nameiop, kcred, 1339 curthread, 0, B_FALSE); 1340 } 1341 #ifdef ZFS_DEBUG 1342 if (error) { 1343 printf("got error %d on name %s on op %d\n", error, name, 1344 nameiop); 1345 kdb_backtrace(); 1346 } 1347 #endif 1348 return (error); 1349 } 1350 1351 int 1352 zfs_remove(znode_t *dzp, const char *name, cred_t *cr, int flags) 1353 { 1354 vnode_t *vp; 1355 int error; 1356 struct componentname cn; 1357 1358 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE))) 1359 return (error); 1360 1361 error = zfs_remove_(ZTOV(dzp), vp, name, cr); 1362 vput(vp); 1363 return (error); 1364 } 1365 /* 1366 * Create a new directory and insert it into dvp using the name 1367 * provided. Return a pointer to the inserted directory. 1368 * 1369 * IN: dvp - vnode of directory to add subdir to. 1370 * dirname - name of new directory. 1371 * vap - attributes of new directory. 1372 * cr - credentials of caller. 1373 * ct - caller context 1374 * flags - case flags 1375 * vsecp - ACL to be set 1376 * 1377 * OUT: vpp - vnode of created directory. 1378 * 1379 * RETURN: 0 on success, error code on failure. 1380 * 1381 * Timestamps: 1382 * dvp - ctime|mtime updated 1383 * vp - ctime|mtime|atime updated 1384 */ 1385 /*ARGSUSED*/ 1386 int 1387 zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp, 1388 cred_t *cr, int flags, vsecattr_t *vsecp) 1389 { 1390 znode_t *zp; 1391 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1392 zilog_t *zilog; 1393 uint64_t txtype; 1394 dmu_tx_t *tx; 1395 int error; 1396 ksid_t *ksid; 1397 uid_t uid; 1398 gid_t gid = crgetgid(cr); 1399 zfs_acl_ids_t acl_ids; 1400 boolean_t fuid_dirtied; 1401 1402 ASSERT3U(vap->va_type, ==, VDIR); 1403 1404 /* 1405 * If we have an ephemeral id, ACL, or XVATTR then 1406 * make sure file system is at proper version 1407 */ 1408 1409 ksid = crgetsid(cr, KSID_OWNER); 1410 if (ksid) 1411 uid = ksid_getid(ksid); 1412 else 1413 uid = crgetuid(cr); 1414 if (zfsvfs->z_use_fuids == B_FALSE && 1415 ((vap->va_mask & AT_XVATTR) || 1416 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1417 return (SET_ERROR(EINVAL)); 1418 1419 ZFS_ENTER(zfsvfs); 1420 ZFS_VERIFY_ZP(dzp); 1421 zilog = zfsvfs->z_log; 1422 1423 if (dzp->z_pflags & ZFS_XATTR) { 1424 ZFS_EXIT(zfsvfs); 1425 return (SET_ERROR(EINVAL)); 1426 } 1427 1428 if (zfsvfs->z_utf8 && u8_validate(dirname, 1429 strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1430 ZFS_EXIT(zfsvfs); 1431 return (SET_ERROR(EILSEQ)); 1432 } 1433 1434 if (vap->va_mask & AT_XVATTR) { 1435 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap, 1436 crgetuid(cr), cr, vap->va_type)) != 0) { 1437 ZFS_EXIT(zfsvfs); 1438 return (error); 1439 } 1440 } 1441 1442 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 1443 NULL, &acl_ids)) != 0) { 1444 ZFS_EXIT(zfsvfs); 1445 return (error); 1446 } 1447 1448 /* 1449 * First make sure the new directory doesn't exist. 1450 * 1451 * Existence is checked first to make sure we don't return 1452 * EACCES instead of EEXIST which can cause some applications 1453 * to fail. 1454 */ 1455 *zpp = NULL; 1456 1457 if ((error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW))) { 1458 zfs_acl_ids_free(&acl_ids); 1459 ZFS_EXIT(zfsvfs); 1460 return (error); 1461 } 1462 ASSERT3P(zp, ==, NULL); 1463 1464 if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) { 1465 zfs_acl_ids_free(&acl_ids); 1466 ZFS_EXIT(zfsvfs); 1467 return (error); 1468 } 1469 1470 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) { 1471 zfs_acl_ids_free(&acl_ids); 1472 ZFS_EXIT(zfsvfs); 1473 return (SET_ERROR(EDQUOT)); 1474 } 1475 1476 /* 1477 * Add a new entry to the directory. 1478 */ 1479 getnewvnode_reserve_(); 1480 tx = dmu_tx_create(zfsvfs->z_os); 1481 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 1482 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 1483 fuid_dirtied = zfsvfs->z_fuid_dirty; 1484 if (fuid_dirtied) 1485 zfs_fuid_txhold(zfsvfs, tx); 1486 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1487 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 1488 acl_ids.z_aclp->z_acl_bytes); 1489 } 1490 1491 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 1492 ZFS_SA_BASE_ATTR_SIZE); 1493 1494 error = dmu_tx_assign(tx, TXG_WAIT); 1495 if (error) { 1496 zfs_acl_ids_free(&acl_ids); 1497 dmu_tx_abort(tx); 1498 getnewvnode_drop_reserve(); 1499 ZFS_EXIT(zfsvfs); 1500 return (error); 1501 } 1502 1503 /* 1504 * Create new node. 1505 */ 1506 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1507 1508 if (fuid_dirtied) 1509 zfs_fuid_sync(zfsvfs, tx); 1510 1511 /* 1512 * Now put new name in parent dir. 1513 */ 1514 (void) zfs_link_create(dzp, dirname, zp, tx, ZNEW); 1515 1516 *zpp = zp; 1517 1518 txtype = zfs_log_create_txtype(Z_DIR, NULL, vap); 1519 zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL, 1520 acl_ids.z_fuidp, vap); 1521 1522 zfs_acl_ids_free(&acl_ids); 1523 1524 dmu_tx_commit(tx); 1525 1526 getnewvnode_drop_reserve(); 1527 1528 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1529 zil_commit(zilog, 0); 1530 1531 ZFS_EXIT(zfsvfs); 1532 return (0); 1533 } 1534 1535 #if __FreeBSD_version < 1300124 1536 static void 1537 cache_vop_rmdir(struct vnode *dvp, struct vnode *vp) 1538 { 1539 1540 cache_purge(dvp); 1541 cache_purge(vp); 1542 } 1543 #endif 1544 1545 /* 1546 * Remove a directory subdir entry. If the current working 1547 * directory is the same as the subdir to be removed, the 1548 * remove will fail. 1549 * 1550 * IN: dvp - vnode of directory to remove from. 1551 * name - name of directory to be removed. 1552 * cwd - vnode of current working directory. 1553 * cr - credentials of caller. 1554 * ct - caller context 1555 * flags - case flags 1556 * 1557 * RETURN: 0 on success, error code on failure. 1558 * 1559 * Timestamps: 1560 * dvp - ctime|mtime updated 1561 */ 1562 /*ARGSUSED*/ 1563 static int 1564 zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr) 1565 { 1566 znode_t *dzp = VTOZ(dvp); 1567 znode_t *zp = VTOZ(vp); 1568 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1569 zilog_t *zilog; 1570 dmu_tx_t *tx; 1571 int error; 1572 1573 ZFS_ENTER(zfsvfs); 1574 ZFS_VERIFY_ZP(dzp); 1575 ZFS_VERIFY_ZP(zp); 1576 zilog = zfsvfs->z_log; 1577 1578 1579 if ((error = zfs_zaccess_delete(dzp, zp, cr))) { 1580 goto out; 1581 } 1582 1583 if (vp->v_type != VDIR) { 1584 error = SET_ERROR(ENOTDIR); 1585 goto out; 1586 } 1587 1588 vnevent_rmdir(vp, dvp, name, ct); 1589 1590 tx = dmu_tx_create(zfsvfs->z_os); 1591 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1592 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1593 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1594 zfs_sa_upgrade_txholds(tx, zp); 1595 zfs_sa_upgrade_txholds(tx, dzp); 1596 dmu_tx_mark_netfree(tx); 1597 error = dmu_tx_assign(tx, TXG_WAIT); 1598 if (error) { 1599 dmu_tx_abort(tx); 1600 ZFS_EXIT(zfsvfs); 1601 return (error); 1602 } 1603 1604 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL); 1605 1606 if (error == 0) { 1607 uint64_t txtype = TX_RMDIR; 1608 zfs_log_remove(zilog, tx, txtype, dzp, name, 1609 ZFS_NO_OBJECT, B_FALSE); 1610 } 1611 1612 dmu_tx_commit(tx); 1613 1614 cache_vop_rmdir(dvp, vp); 1615 out: 1616 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1617 zil_commit(zilog, 0); 1618 1619 ZFS_EXIT(zfsvfs); 1620 return (error); 1621 } 1622 1623 int 1624 zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd, cred_t *cr, int flags) 1625 { 1626 struct componentname cn; 1627 vnode_t *vp; 1628 int error; 1629 1630 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE))) 1631 return (error); 1632 1633 error = zfs_rmdir_(ZTOV(dzp), vp, name, cr); 1634 vput(vp); 1635 return (error); 1636 } 1637 1638 /* 1639 * Read as many directory entries as will fit into the provided 1640 * buffer from the given directory cursor position (specified in 1641 * the uio structure). 1642 * 1643 * IN: vp - vnode of directory to read. 1644 * uio - structure supplying read location, range info, 1645 * and return buffer. 1646 * cr - credentials of caller. 1647 * ct - caller context 1648 * flags - case flags 1649 * 1650 * OUT: uio - updated offset and range, buffer filled. 1651 * eofp - set to true if end-of-file detected. 1652 * 1653 * RETURN: 0 on success, error code on failure. 1654 * 1655 * Timestamps: 1656 * vp - atime updated 1657 * 1658 * Note that the low 4 bits of the cookie returned by zap is always zero. 1659 * This allows us to use the low range for "special" directory entries: 1660 * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 1661 * we use the offset 2 for the '.zfs' directory. 1662 */ 1663 /* ARGSUSED */ 1664 static int 1665 zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp, 1666 int *ncookies, ulong_t **cookies) 1667 { 1668 znode_t *zp = VTOZ(vp); 1669 iovec_t *iovp; 1670 edirent_t *eodp; 1671 dirent64_t *odp; 1672 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1673 objset_t *os; 1674 caddr_t outbuf; 1675 size_t bufsize; 1676 zap_cursor_t zc; 1677 zap_attribute_t zap; 1678 uint_t bytes_wanted; 1679 uint64_t offset; /* must be unsigned; checks for < 1 */ 1680 uint64_t parent; 1681 int local_eof; 1682 int outcount; 1683 int error; 1684 uint8_t prefetch; 1685 boolean_t check_sysattrs; 1686 uint8_t type; 1687 int ncooks; 1688 ulong_t *cooks = NULL; 1689 int flags = 0; 1690 1691 ZFS_ENTER(zfsvfs); 1692 ZFS_VERIFY_ZP(zp); 1693 1694 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 1695 &parent, sizeof (parent))) != 0) { 1696 ZFS_EXIT(zfsvfs); 1697 return (error); 1698 } 1699 1700 /* 1701 * If we are not given an eof variable, 1702 * use a local one. 1703 */ 1704 if (eofp == NULL) 1705 eofp = &local_eof; 1706 1707 /* 1708 * Check for valid iov_len. 1709 */ 1710 if (GET_UIO_STRUCT(uio)->uio_iov->iov_len <= 0) { 1711 ZFS_EXIT(zfsvfs); 1712 return (SET_ERROR(EINVAL)); 1713 } 1714 1715 /* 1716 * Quit if directory has been removed (posix) 1717 */ 1718 if ((*eofp = zp->z_unlinked) != 0) { 1719 ZFS_EXIT(zfsvfs); 1720 return (0); 1721 } 1722 1723 error = 0; 1724 os = zfsvfs->z_os; 1725 offset = zfs_uio_offset(uio); 1726 prefetch = zp->z_zn_prefetch; 1727 1728 /* 1729 * Initialize the iterator cursor. 1730 */ 1731 if (offset <= 3) { 1732 /* 1733 * Start iteration from the beginning of the directory. 1734 */ 1735 zap_cursor_init(&zc, os, zp->z_id); 1736 } else { 1737 /* 1738 * The offset is a serialized cursor. 1739 */ 1740 zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 1741 } 1742 1743 /* 1744 * Get space to change directory entries into fs independent format. 1745 */ 1746 iovp = GET_UIO_STRUCT(uio)->uio_iov; 1747 bytes_wanted = iovp->iov_len; 1748 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) { 1749 bufsize = bytes_wanted; 1750 outbuf = kmem_alloc(bufsize, KM_SLEEP); 1751 odp = (struct dirent64 *)outbuf; 1752 } else { 1753 bufsize = bytes_wanted; 1754 outbuf = NULL; 1755 odp = (struct dirent64 *)iovp->iov_base; 1756 } 1757 eodp = (struct edirent *)odp; 1758 1759 if (ncookies != NULL) { 1760 /* 1761 * Minimum entry size is dirent size and 1 byte for a file name. 1762 */ 1763 ncooks = zfs_uio_resid(uio) / (sizeof (struct dirent) - 1764 sizeof (((struct dirent *)NULL)->d_name) + 1); 1765 cooks = malloc(ncooks * sizeof (ulong_t), M_TEMP, M_WAITOK); 1766 *cookies = cooks; 1767 *ncookies = ncooks; 1768 } 1769 /* 1770 * If this VFS supports the system attribute view interface; and 1771 * we're looking at an extended attribute directory; and we care 1772 * about normalization conflicts on this vfs; then we must check 1773 * for normalization conflicts with the sysattr name space. 1774 */ 1775 #ifdef TODO 1776 check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 1777 (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 1778 (flags & V_RDDIR_ENTFLAGS); 1779 #else 1780 check_sysattrs = 0; 1781 #endif 1782 1783 /* 1784 * Transform to file-system independent format 1785 */ 1786 outcount = 0; 1787 while (outcount < bytes_wanted) { 1788 ino64_t objnum; 1789 ushort_t reclen; 1790 off64_t *next = NULL; 1791 1792 /* 1793 * Special case `.', `..', and `.zfs'. 1794 */ 1795 if (offset == 0) { 1796 (void) strcpy(zap.za_name, "."); 1797 zap.za_normalization_conflict = 0; 1798 objnum = zp->z_id; 1799 type = DT_DIR; 1800 } else if (offset == 1) { 1801 (void) strcpy(zap.za_name, ".."); 1802 zap.za_normalization_conflict = 0; 1803 objnum = parent; 1804 type = DT_DIR; 1805 } else if (offset == 2 && zfs_show_ctldir(zp)) { 1806 (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 1807 zap.za_normalization_conflict = 0; 1808 objnum = ZFSCTL_INO_ROOT; 1809 type = DT_DIR; 1810 } else { 1811 /* 1812 * Grab next entry. 1813 */ 1814 if ((error = zap_cursor_retrieve(&zc, &zap))) { 1815 if ((*eofp = (error == ENOENT)) != 0) 1816 break; 1817 else 1818 goto update; 1819 } 1820 1821 if (zap.za_integer_length != 8 || 1822 zap.za_num_integers != 1) { 1823 cmn_err(CE_WARN, "zap_readdir: bad directory " 1824 "entry, obj = %lld, offset = %lld\n", 1825 (u_longlong_t)zp->z_id, 1826 (u_longlong_t)offset); 1827 error = SET_ERROR(ENXIO); 1828 goto update; 1829 } 1830 1831 objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 1832 /* 1833 * MacOS X can extract the object type here such as: 1834 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 1835 */ 1836 type = ZFS_DIRENT_TYPE(zap.za_first_integer); 1837 1838 if (check_sysattrs && !zap.za_normalization_conflict) { 1839 #ifdef TODO 1840 zap.za_normalization_conflict = 1841 xattr_sysattr_casechk(zap.za_name); 1842 #else 1843 panic("%s:%u: TODO", __func__, __LINE__); 1844 #endif 1845 } 1846 } 1847 1848 if (flags & V_RDDIR_ACCFILTER) { 1849 /* 1850 * If we have no access at all, don't include 1851 * this entry in the returned information 1852 */ 1853 znode_t *ezp; 1854 if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 1855 goto skip_entry; 1856 if (!zfs_has_access(ezp, cr)) { 1857 vrele(ZTOV(ezp)); 1858 goto skip_entry; 1859 } 1860 vrele(ZTOV(ezp)); 1861 } 1862 1863 if (flags & V_RDDIR_ENTFLAGS) 1864 reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 1865 else 1866 reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 1867 1868 /* 1869 * Will this entry fit in the buffer? 1870 */ 1871 if (outcount + reclen > bufsize) { 1872 /* 1873 * Did we manage to fit anything in the buffer? 1874 */ 1875 if (!outcount) { 1876 error = SET_ERROR(EINVAL); 1877 goto update; 1878 } 1879 break; 1880 } 1881 if (flags & V_RDDIR_ENTFLAGS) { 1882 /* 1883 * Add extended flag entry: 1884 */ 1885 eodp->ed_ino = objnum; 1886 eodp->ed_reclen = reclen; 1887 /* NOTE: ed_off is the offset for the *next* entry */ 1888 next = &(eodp->ed_off); 1889 eodp->ed_eflags = zap.za_normalization_conflict ? 1890 ED_CASE_CONFLICT : 0; 1891 (void) strncpy(eodp->ed_name, zap.za_name, 1892 EDIRENT_NAMELEN(reclen)); 1893 eodp = (edirent_t *)((intptr_t)eodp + reclen); 1894 } else { 1895 /* 1896 * Add normal entry: 1897 */ 1898 odp->d_ino = objnum; 1899 odp->d_reclen = reclen; 1900 odp->d_namlen = strlen(zap.za_name); 1901 /* NOTE: d_off is the offset for the *next* entry. */ 1902 next = &odp->d_off; 1903 strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1); 1904 odp->d_type = type; 1905 dirent_terminate(odp); 1906 odp = (dirent64_t *)((intptr_t)odp + reclen); 1907 } 1908 outcount += reclen; 1909 1910 ASSERT3S(outcount, <=, bufsize); 1911 1912 /* Prefetch znode */ 1913 if (prefetch) 1914 dmu_prefetch(os, objnum, 0, 0, 0, 1915 ZIO_PRIORITY_SYNC_READ); 1916 1917 skip_entry: 1918 /* 1919 * Move to the next entry, fill in the previous offset. 1920 */ 1921 if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 1922 zap_cursor_advance(&zc); 1923 offset = zap_cursor_serialize(&zc); 1924 } else { 1925 offset += 1; 1926 } 1927 1928 /* Fill the offset right after advancing the cursor. */ 1929 if (next != NULL) 1930 *next = offset; 1931 if (cooks != NULL) { 1932 *cooks++ = offset; 1933 ncooks--; 1934 KASSERT(ncooks >= 0, ("ncookies=%d", ncooks)); 1935 } 1936 } 1937 zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 1938 1939 /* Subtract unused cookies */ 1940 if (ncookies != NULL) 1941 *ncookies -= ncooks; 1942 1943 if (zfs_uio_segflg(uio) == UIO_SYSSPACE && zfs_uio_iovcnt(uio) == 1) { 1944 iovp->iov_base += outcount; 1945 iovp->iov_len -= outcount; 1946 zfs_uio_resid(uio) -= outcount; 1947 } else if ((error = 1948 zfs_uiomove(outbuf, (long)outcount, UIO_READ, uio))) { 1949 /* 1950 * Reset the pointer. 1951 */ 1952 offset = zfs_uio_offset(uio); 1953 } 1954 1955 update: 1956 zap_cursor_fini(&zc); 1957 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) 1958 kmem_free(outbuf, bufsize); 1959 1960 if (error == ENOENT) 1961 error = 0; 1962 1963 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 1964 1965 zfs_uio_setoffset(uio, offset); 1966 ZFS_EXIT(zfsvfs); 1967 if (error != 0 && cookies != NULL) { 1968 free(*cookies, M_TEMP); 1969 *cookies = NULL; 1970 *ncookies = 0; 1971 } 1972 return (error); 1973 } 1974 1975 /* 1976 * Get the requested file attributes and place them in the provided 1977 * vattr structure. 1978 * 1979 * IN: vp - vnode of file. 1980 * vap - va_mask identifies requested attributes. 1981 * If AT_XVATTR set, then optional attrs are requested 1982 * flags - ATTR_NOACLCHECK (CIFS server context) 1983 * cr - credentials of caller. 1984 * 1985 * OUT: vap - attribute values. 1986 * 1987 * RETURN: 0 (always succeeds). 1988 */ 1989 /* ARGSUSED */ 1990 static int 1991 zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr) 1992 { 1993 znode_t *zp = VTOZ(vp); 1994 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1995 int error = 0; 1996 uint32_t blksize; 1997 u_longlong_t nblocks; 1998 uint64_t mtime[2], ctime[2], crtime[2], rdev; 1999 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2000 xoptattr_t *xoap = NULL; 2001 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2002 sa_bulk_attr_t bulk[4]; 2003 int count = 0; 2004 2005 ZFS_ENTER(zfsvfs); 2006 ZFS_VERIFY_ZP(zp); 2007 2008 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2009 2010 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 2011 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 2012 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); 2013 if (vp->v_type == VBLK || vp->v_type == VCHR) 2014 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, 2015 &rdev, 8); 2016 2017 if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 2018 ZFS_EXIT(zfsvfs); 2019 return (error); 2020 } 2021 2022 /* 2023 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2024 * Also, if we are the owner don't bother, since owner should 2025 * always be allowed to read basic attributes of file. 2026 */ 2027 if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2028 (vap->va_uid != crgetuid(cr))) { 2029 if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2030 skipaclchk, cr))) { 2031 ZFS_EXIT(zfsvfs); 2032 return (error); 2033 } 2034 } 2035 2036 /* 2037 * Return all attributes. It's cheaper to provide the answer 2038 * than to determine whether we were asked the question. 2039 */ 2040 2041 vap->va_type = IFTOVT(zp->z_mode); 2042 vap->va_mode = zp->z_mode & ~S_IFMT; 2043 vn_fsid(vp, vap); 2044 vap->va_nodeid = zp->z_id; 2045 vap->va_nlink = zp->z_links; 2046 if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp) && 2047 zp->z_links < ZFS_LINK_MAX) 2048 vap->va_nlink++; 2049 vap->va_size = zp->z_size; 2050 if (vp->v_type == VBLK || vp->v_type == VCHR) 2051 vap->va_rdev = zfs_cmpldev(rdev); 2052 vap->va_seq = zp->z_seq; 2053 vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */ 2054 vap->va_filerev = zp->z_seq; 2055 2056 /* 2057 * Add in any requested optional attributes and the create time. 2058 * Also set the corresponding bits in the returned attribute bitmap. 2059 */ 2060 if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2061 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2062 xoap->xoa_archive = 2063 ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2064 XVA_SET_RTN(xvap, XAT_ARCHIVE); 2065 } 2066 2067 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2068 xoap->xoa_readonly = 2069 ((zp->z_pflags & ZFS_READONLY) != 0); 2070 XVA_SET_RTN(xvap, XAT_READONLY); 2071 } 2072 2073 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2074 xoap->xoa_system = 2075 ((zp->z_pflags & ZFS_SYSTEM) != 0); 2076 XVA_SET_RTN(xvap, XAT_SYSTEM); 2077 } 2078 2079 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2080 xoap->xoa_hidden = 2081 ((zp->z_pflags & ZFS_HIDDEN) != 0); 2082 XVA_SET_RTN(xvap, XAT_HIDDEN); 2083 } 2084 2085 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2086 xoap->xoa_nounlink = 2087 ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2088 XVA_SET_RTN(xvap, XAT_NOUNLINK); 2089 } 2090 2091 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2092 xoap->xoa_immutable = 2093 ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2094 XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2095 } 2096 2097 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2098 xoap->xoa_appendonly = 2099 ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2100 XVA_SET_RTN(xvap, XAT_APPENDONLY); 2101 } 2102 2103 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2104 xoap->xoa_nodump = 2105 ((zp->z_pflags & ZFS_NODUMP) != 0); 2106 XVA_SET_RTN(xvap, XAT_NODUMP); 2107 } 2108 2109 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2110 xoap->xoa_opaque = 2111 ((zp->z_pflags & ZFS_OPAQUE) != 0); 2112 XVA_SET_RTN(xvap, XAT_OPAQUE); 2113 } 2114 2115 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2116 xoap->xoa_av_quarantined = 2117 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2118 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2119 } 2120 2121 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2122 xoap->xoa_av_modified = 2123 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2124 XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2125 } 2126 2127 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2128 vp->v_type == VREG) { 2129 zfs_sa_get_scanstamp(zp, xvap); 2130 } 2131 2132 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2133 xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 2134 XVA_SET_RTN(xvap, XAT_REPARSE); 2135 } 2136 if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 2137 xoap->xoa_generation = zp->z_gen; 2138 XVA_SET_RTN(xvap, XAT_GEN); 2139 } 2140 2141 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2142 xoap->xoa_offline = 2143 ((zp->z_pflags & ZFS_OFFLINE) != 0); 2144 XVA_SET_RTN(xvap, XAT_OFFLINE); 2145 } 2146 2147 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2148 xoap->xoa_sparse = 2149 ((zp->z_pflags & ZFS_SPARSE) != 0); 2150 XVA_SET_RTN(xvap, XAT_SPARSE); 2151 } 2152 2153 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) { 2154 xoap->xoa_projinherit = 2155 ((zp->z_pflags & ZFS_PROJINHERIT) != 0); 2156 XVA_SET_RTN(xvap, XAT_PROJINHERIT); 2157 } 2158 2159 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) { 2160 xoap->xoa_projid = zp->z_projid; 2161 XVA_SET_RTN(xvap, XAT_PROJID); 2162 } 2163 } 2164 2165 ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 2166 ZFS_TIME_DECODE(&vap->va_mtime, mtime); 2167 ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2168 ZFS_TIME_DECODE(&vap->va_birthtime, crtime); 2169 2170 2171 sa_object_size(zp->z_sa_hdl, &blksize, &nblocks); 2172 vap->va_blksize = blksize; 2173 vap->va_bytes = nblocks << 9; /* nblocks * 512 */ 2174 2175 if (zp->z_blksz == 0) { 2176 /* 2177 * Block size hasn't been set; suggest maximal I/O transfers. 2178 */ 2179 vap->va_blksize = zfsvfs->z_max_blksz; 2180 } 2181 2182 ZFS_EXIT(zfsvfs); 2183 return (0); 2184 } 2185 2186 /* 2187 * Set the file attributes to the values contained in the 2188 * vattr structure. 2189 * 2190 * IN: zp - znode of file to be modified. 2191 * vap - new attribute values. 2192 * If AT_XVATTR set, then optional attrs are being set 2193 * flags - ATTR_UTIME set if non-default time values provided. 2194 * - ATTR_NOACLCHECK (CIFS context only). 2195 * cr - credentials of caller. 2196 * ct - caller context 2197 * 2198 * RETURN: 0 on success, error code on failure. 2199 * 2200 * Timestamps: 2201 * vp - ctime updated, mtime updated if size changed. 2202 */ 2203 /* ARGSUSED */ 2204 int 2205 zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr) 2206 { 2207 vnode_t *vp = ZTOV(zp); 2208 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2209 objset_t *os = zfsvfs->z_os; 2210 zilog_t *zilog; 2211 dmu_tx_t *tx; 2212 vattr_t oldva; 2213 xvattr_t tmpxvattr; 2214 uint_t mask = vap->va_mask; 2215 uint_t saved_mask = 0; 2216 uint64_t saved_mode; 2217 int trim_mask = 0; 2218 uint64_t new_mode; 2219 uint64_t new_uid, new_gid; 2220 uint64_t xattr_obj; 2221 uint64_t mtime[2], ctime[2]; 2222 uint64_t projid = ZFS_INVALID_PROJID; 2223 znode_t *attrzp; 2224 int need_policy = FALSE; 2225 int err, err2; 2226 zfs_fuid_info_t *fuidp = NULL; 2227 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2228 xoptattr_t *xoap; 2229 zfs_acl_t *aclp; 2230 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2231 boolean_t fuid_dirtied = B_FALSE; 2232 sa_bulk_attr_t bulk[7], xattr_bulk[7]; 2233 int count = 0, xattr_count = 0; 2234 2235 if (mask == 0) 2236 return (0); 2237 2238 if (mask & AT_NOSET) 2239 return (SET_ERROR(EINVAL)); 2240 2241 ZFS_ENTER(zfsvfs); 2242 ZFS_VERIFY_ZP(zp); 2243 2244 zilog = zfsvfs->z_log; 2245 2246 /* 2247 * Make sure that if we have ephemeral uid/gid or xvattr specified 2248 * that file system is at proper version level 2249 */ 2250 2251 if (zfsvfs->z_use_fuids == B_FALSE && 2252 (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2253 ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 2254 (mask & AT_XVATTR))) { 2255 ZFS_EXIT(zfsvfs); 2256 return (SET_ERROR(EINVAL)); 2257 } 2258 2259 if (mask & AT_SIZE && vp->v_type == VDIR) { 2260 ZFS_EXIT(zfsvfs); 2261 return (SET_ERROR(EISDIR)); 2262 } 2263 2264 if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 2265 ZFS_EXIT(zfsvfs); 2266 return (SET_ERROR(EINVAL)); 2267 } 2268 2269 /* 2270 * If this is an xvattr_t, then get a pointer to the structure of 2271 * optional attributes. If this is NULL, then we have a vattr_t. 2272 */ 2273 xoap = xva_getxoptattr(xvap); 2274 2275 xva_init(&tmpxvattr); 2276 2277 /* 2278 * Immutable files can only alter immutable bit and atime 2279 */ 2280 if ((zp->z_pflags & ZFS_IMMUTABLE) && 2281 ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 2282 ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 2283 ZFS_EXIT(zfsvfs); 2284 return (SET_ERROR(EPERM)); 2285 } 2286 2287 /* 2288 * Note: ZFS_READONLY is handled in zfs_zaccess_common. 2289 */ 2290 2291 /* 2292 * Verify timestamps doesn't overflow 32 bits. 2293 * ZFS can handle large timestamps, but 32bit syscalls can't 2294 * handle times greater than 2039. This check should be removed 2295 * once large timestamps are fully supported. 2296 */ 2297 if (mask & (AT_ATIME | AT_MTIME)) { 2298 if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 2299 ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 2300 ZFS_EXIT(zfsvfs); 2301 return (SET_ERROR(EOVERFLOW)); 2302 } 2303 } 2304 if (xoap != NULL && (mask & AT_XVATTR)) { 2305 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME) && 2306 TIMESPEC_OVERFLOW(&vap->va_birthtime)) { 2307 ZFS_EXIT(zfsvfs); 2308 return (SET_ERROR(EOVERFLOW)); 2309 } 2310 2311 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) { 2312 if (!dmu_objset_projectquota_enabled(os) || 2313 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode))) { 2314 ZFS_EXIT(zfsvfs); 2315 return (SET_ERROR(EOPNOTSUPP)); 2316 } 2317 2318 projid = xoap->xoa_projid; 2319 if (unlikely(projid == ZFS_INVALID_PROJID)) { 2320 ZFS_EXIT(zfsvfs); 2321 return (SET_ERROR(EINVAL)); 2322 } 2323 2324 if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID) 2325 projid = ZFS_INVALID_PROJID; 2326 else 2327 need_policy = TRUE; 2328 } 2329 2330 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) && 2331 (xoap->xoa_projinherit != 2332 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) && 2333 (!dmu_objset_projectquota_enabled(os) || 2334 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode)))) { 2335 ZFS_EXIT(zfsvfs); 2336 return (SET_ERROR(EOPNOTSUPP)); 2337 } 2338 } 2339 2340 attrzp = NULL; 2341 aclp = NULL; 2342 2343 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2344 ZFS_EXIT(zfsvfs); 2345 return (SET_ERROR(EROFS)); 2346 } 2347 2348 /* 2349 * First validate permissions 2350 */ 2351 2352 if (mask & AT_SIZE) { 2353 /* 2354 * XXX - Note, we are not providing any open 2355 * mode flags here (like FNDELAY), so we may 2356 * block if there are locks present... this 2357 * should be addressed in openat(). 2358 */ 2359 /* XXX - would it be OK to generate a log record here? */ 2360 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 2361 if (err) { 2362 ZFS_EXIT(zfsvfs); 2363 return (err); 2364 } 2365 } 2366 2367 if (mask & (AT_ATIME|AT_MTIME) || 2368 ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 2369 XVA_ISSET_REQ(xvap, XAT_READONLY) || 2370 XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 2371 XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 2372 XVA_ISSET_REQ(xvap, XAT_SPARSE) || 2373 XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 2374 XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 2375 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 2376 skipaclchk, cr); 2377 } 2378 2379 if (mask & (AT_UID|AT_GID)) { 2380 int idmask = (mask & (AT_UID|AT_GID)); 2381 int take_owner; 2382 int take_group; 2383 2384 /* 2385 * NOTE: even if a new mode is being set, 2386 * we may clear S_ISUID/S_ISGID bits. 2387 */ 2388 2389 if (!(mask & AT_MODE)) 2390 vap->va_mode = zp->z_mode; 2391 2392 /* 2393 * Take ownership or chgrp to group we are a member of 2394 */ 2395 2396 take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 2397 take_group = (mask & AT_GID) && 2398 zfs_groupmember(zfsvfs, vap->va_gid, cr); 2399 2400 /* 2401 * If both AT_UID and AT_GID are set then take_owner and 2402 * take_group must both be set in order to allow taking 2403 * ownership. 2404 * 2405 * Otherwise, send the check through secpolicy_vnode_setattr() 2406 * 2407 */ 2408 2409 if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 2410 ((idmask == AT_UID) && take_owner) || 2411 ((idmask == AT_GID) && take_group)) { 2412 if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 2413 skipaclchk, cr) == 0) { 2414 /* 2415 * Remove setuid/setgid for non-privileged users 2416 */ 2417 secpolicy_setid_clear(vap, vp, cr); 2418 trim_mask = (mask & (AT_UID|AT_GID)); 2419 } else { 2420 need_policy = TRUE; 2421 } 2422 } else { 2423 need_policy = TRUE; 2424 } 2425 } 2426 2427 oldva.va_mode = zp->z_mode; 2428 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 2429 if (mask & AT_XVATTR) { 2430 /* 2431 * Update xvattr mask to include only those attributes 2432 * that are actually changing. 2433 * 2434 * the bits will be restored prior to actually setting 2435 * the attributes so the caller thinks they were set. 2436 */ 2437 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2438 if (xoap->xoa_appendonly != 2439 ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 2440 need_policy = TRUE; 2441 } else { 2442 XVA_CLR_REQ(xvap, XAT_APPENDONLY); 2443 XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 2444 } 2445 } 2446 2447 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) { 2448 if (xoap->xoa_projinherit != 2449 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) { 2450 need_policy = TRUE; 2451 } else { 2452 XVA_CLR_REQ(xvap, XAT_PROJINHERIT); 2453 XVA_SET_REQ(&tmpxvattr, XAT_PROJINHERIT); 2454 } 2455 } 2456 2457 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2458 if (xoap->xoa_nounlink != 2459 ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 2460 need_policy = TRUE; 2461 } else { 2462 XVA_CLR_REQ(xvap, XAT_NOUNLINK); 2463 XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 2464 } 2465 } 2466 2467 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2468 if (xoap->xoa_immutable != 2469 ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 2470 need_policy = TRUE; 2471 } else { 2472 XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 2473 XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 2474 } 2475 } 2476 2477 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2478 if (xoap->xoa_nodump != 2479 ((zp->z_pflags & ZFS_NODUMP) != 0)) { 2480 need_policy = TRUE; 2481 } else { 2482 XVA_CLR_REQ(xvap, XAT_NODUMP); 2483 XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 2484 } 2485 } 2486 2487 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2488 if (xoap->xoa_av_modified != 2489 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 2490 need_policy = TRUE; 2491 } else { 2492 XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 2493 XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 2494 } 2495 } 2496 2497 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2498 if ((vp->v_type != VREG && 2499 xoap->xoa_av_quarantined) || 2500 xoap->xoa_av_quarantined != 2501 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 2502 need_policy = TRUE; 2503 } else { 2504 XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 2505 XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 2506 } 2507 } 2508 2509 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2510 ZFS_EXIT(zfsvfs); 2511 return (SET_ERROR(EPERM)); 2512 } 2513 2514 if (need_policy == FALSE && 2515 (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 2516 XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 2517 need_policy = TRUE; 2518 } 2519 } 2520 2521 if (mask & AT_MODE) { 2522 if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 2523 err = secpolicy_setid_setsticky_clear(vp, vap, 2524 &oldva, cr); 2525 if (err) { 2526 ZFS_EXIT(zfsvfs); 2527 return (err); 2528 } 2529 trim_mask |= AT_MODE; 2530 } else { 2531 need_policy = TRUE; 2532 } 2533 } 2534 2535 if (need_policy) { 2536 /* 2537 * If trim_mask is set then take ownership 2538 * has been granted or write_acl is present and user 2539 * has the ability to modify mode. In that case remove 2540 * UID|GID and or MODE from mask so that 2541 * secpolicy_vnode_setattr() doesn't revoke it. 2542 */ 2543 2544 if (trim_mask) { 2545 saved_mask = vap->va_mask; 2546 vap->va_mask &= ~trim_mask; 2547 if (trim_mask & AT_MODE) { 2548 /* 2549 * Save the mode, as secpolicy_vnode_setattr() 2550 * will overwrite it with ova.va_mode. 2551 */ 2552 saved_mode = vap->va_mode; 2553 } 2554 } 2555 err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 2556 (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 2557 if (err) { 2558 ZFS_EXIT(zfsvfs); 2559 return (err); 2560 } 2561 2562 if (trim_mask) { 2563 vap->va_mask |= saved_mask; 2564 if (trim_mask & AT_MODE) { 2565 /* 2566 * Recover the mode after 2567 * secpolicy_vnode_setattr(). 2568 */ 2569 vap->va_mode = saved_mode; 2570 } 2571 } 2572 } 2573 2574 /* 2575 * secpolicy_vnode_setattr, or take ownership may have 2576 * changed va_mask 2577 */ 2578 mask = vap->va_mask; 2579 2580 if ((mask & (AT_UID | AT_GID)) || projid != ZFS_INVALID_PROJID) { 2581 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 2582 &xattr_obj, sizeof (xattr_obj)); 2583 2584 if (err == 0 && xattr_obj) { 2585 err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 2586 if (err == 0) { 2587 err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE); 2588 if (err != 0) 2589 vrele(ZTOV(attrzp)); 2590 } 2591 if (err) 2592 goto out2; 2593 } 2594 if (mask & AT_UID) { 2595 new_uid = zfs_fuid_create(zfsvfs, 2596 (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 2597 if (new_uid != zp->z_uid && 2598 zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT, 2599 new_uid)) { 2600 if (attrzp) 2601 vput(ZTOV(attrzp)); 2602 err = SET_ERROR(EDQUOT); 2603 goto out2; 2604 } 2605 } 2606 2607 if (mask & AT_GID) { 2608 new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 2609 cr, ZFS_GROUP, &fuidp); 2610 if (new_gid != zp->z_gid && 2611 zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT, 2612 new_gid)) { 2613 if (attrzp) 2614 vput(ZTOV(attrzp)); 2615 err = SET_ERROR(EDQUOT); 2616 goto out2; 2617 } 2618 } 2619 2620 if (projid != ZFS_INVALID_PROJID && 2621 zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) { 2622 if (attrzp) 2623 vput(ZTOV(attrzp)); 2624 err = SET_ERROR(EDQUOT); 2625 goto out2; 2626 } 2627 } 2628 tx = dmu_tx_create(os); 2629 2630 if (mask & AT_MODE) { 2631 uint64_t pmode = zp->z_mode; 2632 uint64_t acl_obj; 2633 new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 2634 2635 if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 2636 !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 2637 err = SET_ERROR(EPERM); 2638 goto out; 2639 } 2640 2641 if ((err = zfs_acl_chmod_setattr(zp, &aclp, new_mode))) 2642 goto out; 2643 2644 if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 2645 /* 2646 * Are we upgrading ACL from old V0 format 2647 * to V1 format? 2648 */ 2649 if (zfsvfs->z_version >= ZPL_VERSION_FUID && 2650 zfs_znode_acl_version(zp) == 2651 ZFS_ACL_VERSION_INITIAL) { 2652 dmu_tx_hold_free(tx, acl_obj, 0, 2653 DMU_OBJECT_END); 2654 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 2655 0, aclp->z_acl_bytes); 2656 } else { 2657 dmu_tx_hold_write(tx, acl_obj, 0, 2658 aclp->z_acl_bytes); 2659 } 2660 } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2661 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 2662 0, aclp->z_acl_bytes); 2663 } 2664 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 2665 } else { 2666 if (((mask & AT_XVATTR) && 2667 XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) || 2668 (projid != ZFS_INVALID_PROJID && 2669 !(zp->z_pflags & ZFS_PROJID))) 2670 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 2671 else 2672 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2673 } 2674 2675 if (attrzp) { 2676 dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 2677 } 2678 2679 fuid_dirtied = zfsvfs->z_fuid_dirty; 2680 if (fuid_dirtied) 2681 zfs_fuid_txhold(zfsvfs, tx); 2682 2683 zfs_sa_upgrade_txholds(tx, zp); 2684 2685 err = dmu_tx_assign(tx, TXG_WAIT); 2686 if (err) 2687 goto out; 2688 2689 count = 0; 2690 /* 2691 * Set each attribute requested. 2692 * We group settings according to the locks they need to acquire. 2693 * 2694 * Note: you cannot set ctime directly, although it will be 2695 * updated as a side-effect of calling this function. 2696 */ 2697 2698 if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) { 2699 /* 2700 * For the existed object that is upgraded from old system, 2701 * its on-disk layout has no slot for the project ID attribute. 2702 * But quota accounting logic needs to access related slots by 2703 * offset directly. So we need to adjust old objects' layout 2704 * to make the project ID to some unified and fixed offset. 2705 */ 2706 if (attrzp) 2707 err = sa_add_projid(attrzp->z_sa_hdl, tx, projid); 2708 if (err == 0) 2709 err = sa_add_projid(zp->z_sa_hdl, tx, projid); 2710 2711 if (unlikely(err == EEXIST)) 2712 err = 0; 2713 else if (err != 0) 2714 goto out; 2715 else 2716 projid = ZFS_INVALID_PROJID; 2717 } 2718 2719 if (mask & (AT_UID|AT_GID|AT_MODE)) 2720 mutex_enter(&zp->z_acl_lock); 2721 2722 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 2723 &zp->z_pflags, sizeof (zp->z_pflags)); 2724 2725 if (attrzp) { 2726 if (mask & (AT_UID|AT_GID|AT_MODE)) 2727 mutex_enter(&attrzp->z_acl_lock); 2728 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2729 SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 2730 sizeof (attrzp->z_pflags)); 2731 if (projid != ZFS_INVALID_PROJID) { 2732 attrzp->z_projid = projid; 2733 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2734 SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid, 2735 sizeof (attrzp->z_projid)); 2736 } 2737 } 2738 2739 if (mask & (AT_UID|AT_GID)) { 2740 2741 if (mask & AT_UID) { 2742 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 2743 &new_uid, sizeof (new_uid)); 2744 zp->z_uid = new_uid; 2745 if (attrzp) { 2746 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2747 SA_ZPL_UID(zfsvfs), NULL, &new_uid, 2748 sizeof (new_uid)); 2749 attrzp->z_uid = new_uid; 2750 } 2751 } 2752 2753 if (mask & AT_GID) { 2754 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 2755 NULL, &new_gid, sizeof (new_gid)); 2756 zp->z_gid = new_gid; 2757 if (attrzp) { 2758 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2759 SA_ZPL_GID(zfsvfs), NULL, &new_gid, 2760 sizeof (new_gid)); 2761 attrzp->z_gid = new_gid; 2762 } 2763 } 2764 if (!(mask & AT_MODE)) { 2765 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 2766 NULL, &new_mode, sizeof (new_mode)); 2767 new_mode = zp->z_mode; 2768 } 2769 err = zfs_acl_chown_setattr(zp); 2770 ASSERT0(err); 2771 if (attrzp) { 2772 vn_seqc_write_begin(ZTOV(attrzp)); 2773 err = zfs_acl_chown_setattr(attrzp); 2774 vn_seqc_write_end(ZTOV(attrzp)); 2775 ASSERT0(err); 2776 } 2777 } 2778 2779 if (mask & AT_MODE) { 2780 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 2781 &new_mode, sizeof (new_mode)); 2782 zp->z_mode = new_mode; 2783 ASSERT3P(aclp, !=, NULL); 2784 err = zfs_aclset_common(zp, aclp, cr, tx); 2785 ASSERT0(err); 2786 if (zp->z_acl_cached) 2787 zfs_acl_free(zp->z_acl_cached); 2788 zp->z_acl_cached = aclp; 2789 aclp = NULL; 2790 } 2791 2792 2793 if (mask & AT_ATIME) { 2794 ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 2795 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 2796 &zp->z_atime, sizeof (zp->z_atime)); 2797 } 2798 2799 if (mask & AT_MTIME) { 2800 ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 2801 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 2802 mtime, sizeof (mtime)); 2803 } 2804 2805 if (projid != ZFS_INVALID_PROJID) { 2806 zp->z_projid = projid; 2807 SA_ADD_BULK_ATTR(bulk, count, 2808 SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid, 2809 sizeof (zp->z_projid)); 2810 } 2811 2812 /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 2813 if (mask & AT_SIZE && !(mask & AT_MTIME)) { 2814 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 2815 NULL, mtime, sizeof (mtime)); 2816 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 2817 &ctime, sizeof (ctime)); 2818 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); 2819 } else if (mask != 0) { 2820 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 2821 &ctime, sizeof (ctime)); 2822 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime); 2823 if (attrzp) { 2824 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 2825 SA_ZPL_CTIME(zfsvfs), NULL, 2826 &ctime, sizeof (ctime)); 2827 zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 2828 mtime, ctime); 2829 } 2830 } 2831 2832 /* 2833 * Do this after setting timestamps to prevent timestamp 2834 * update from toggling bit 2835 */ 2836 2837 if (xoap && (mask & AT_XVATTR)) { 2838 2839 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) 2840 xoap->xoa_createtime = vap->va_birthtime; 2841 /* 2842 * restore trimmed off masks 2843 * so that return masks can be set for caller. 2844 */ 2845 2846 if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 2847 XVA_SET_REQ(xvap, XAT_APPENDONLY); 2848 } 2849 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 2850 XVA_SET_REQ(xvap, XAT_NOUNLINK); 2851 } 2852 if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 2853 XVA_SET_REQ(xvap, XAT_IMMUTABLE); 2854 } 2855 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 2856 XVA_SET_REQ(xvap, XAT_NODUMP); 2857 } 2858 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 2859 XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 2860 } 2861 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 2862 XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 2863 } 2864 if (XVA_ISSET_REQ(&tmpxvattr, XAT_PROJINHERIT)) { 2865 XVA_SET_REQ(xvap, XAT_PROJINHERIT); 2866 } 2867 2868 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 2869 ASSERT3S(vp->v_type, ==, VREG); 2870 2871 zfs_xvattr_set(zp, xvap, tx); 2872 } 2873 2874 if (fuid_dirtied) 2875 zfs_fuid_sync(zfsvfs, tx); 2876 2877 if (mask != 0) 2878 zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 2879 2880 if (mask & (AT_UID|AT_GID|AT_MODE)) 2881 mutex_exit(&zp->z_acl_lock); 2882 2883 if (attrzp) { 2884 if (mask & (AT_UID|AT_GID|AT_MODE)) 2885 mutex_exit(&attrzp->z_acl_lock); 2886 } 2887 out: 2888 if (err == 0 && attrzp) { 2889 err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 2890 xattr_count, tx); 2891 ASSERT0(err2); 2892 } 2893 2894 if (attrzp) 2895 vput(ZTOV(attrzp)); 2896 2897 if (aclp) 2898 zfs_acl_free(aclp); 2899 2900 if (fuidp) { 2901 zfs_fuid_info_free(fuidp); 2902 fuidp = NULL; 2903 } 2904 2905 if (err) { 2906 dmu_tx_abort(tx); 2907 } else { 2908 err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 2909 dmu_tx_commit(tx); 2910 } 2911 2912 out2: 2913 if (os->os_sync == ZFS_SYNC_ALWAYS) 2914 zil_commit(zilog, 0); 2915 2916 ZFS_EXIT(zfsvfs); 2917 return (err); 2918 } 2919 2920 /* 2921 * We acquire all but fdvp locks using non-blocking acquisitions. If we 2922 * fail to acquire any lock in the path we will drop all held locks, 2923 * acquire the new lock in a blocking fashion, and then release it and 2924 * restart the rename. This acquire/release step ensures that we do not 2925 * spin on a lock waiting for release. On error release all vnode locks 2926 * and decrement references the way tmpfs_rename() would do. 2927 */ 2928 static int 2929 zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp, 2930 struct vnode *tdvp, struct vnode **tvpp, 2931 const struct componentname *scnp, const struct componentname *tcnp) 2932 { 2933 zfsvfs_t *zfsvfs; 2934 struct vnode *nvp, *svp, *tvp; 2935 znode_t *sdzp, *tdzp, *szp, *tzp; 2936 const char *snm = scnp->cn_nameptr; 2937 const char *tnm = tcnp->cn_nameptr; 2938 int error; 2939 2940 VOP_UNLOCK1(tdvp); 2941 if (*tvpp != NULL && *tvpp != tdvp) 2942 VOP_UNLOCK1(*tvpp); 2943 2944 relock: 2945 error = vn_lock(sdvp, LK_EXCLUSIVE); 2946 if (error) 2947 goto out; 2948 sdzp = VTOZ(sdvp); 2949 2950 error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT); 2951 if (error != 0) { 2952 VOP_UNLOCK1(sdvp); 2953 if (error != EBUSY) 2954 goto out; 2955 error = vn_lock(tdvp, LK_EXCLUSIVE); 2956 if (error) 2957 goto out; 2958 VOP_UNLOCK1(tdvp); 2959 goto relock; 2960 } 2961 tdzp = VTOZ(tdvp); 2962 2963 /* 2964 * Before using sdzp and tdzp we must ensure that they are live. 2965 * As a porting legacy from illumos we have two things to worry 2966 * about. One is typical for FreeBSD and it is that the vnode is 2967 * not reclaimed (doomed). The other is that the znode is live. 2968 * The current code can invalidate the znode without acquiring the 2969 * corresponding vnode lock if the object represented by the znode 2970 * and vnode is no longer valid after a rollback or receive operation. 2971 * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock 2972 * that protects the znodes from the invalidation. 2973 */ 2974 zfsvfs = sdzp->z_zfsvfs; 2975 ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs); 2976 ZFS_ENTER(zfsvfs); 2977 2978 /* 2979 * We can not use ZFS_VERIFY_ZP() here because it could directly return 2980 * bypassing the cleanup code in the case of an error. 2981 */ 2982 if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 2983 ZFS_EXIT(zfsvfs); 2984 VOP_UNLOCK1(sdvp); 2985 VOP_UNLOCK1(tdvp); 2986 error = SET_ERROR(EIO); 2987 goto out; 2988 } 2989 2990 /* 2991 * Re-resolve svp to be certain it still exists and fetch the 2992 * correct vnode. 2993 */ 2994 error = zfs_dirent_lookup(sdzp, snm, &szp, ZEXISTS); 2995 if (error != 0) { 2996 /* Source entry invalid or not there. */ 2997 ZFS_EXIT(zfsvfs); 2998 VOP_UNLOCK1(sdvp); 2999 VOP_UNLOCK1(tdvp); 3000 if ((scnp->cn_flags & ISDOTDOT) != 0 || 3001 (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.')) 3002 error = SET_ERROR(EINVAL); 3003 goto out; 3004 } 3005 svp = ZTOV(szp); 3006 3007 /* 3008 * Re-resolve tvp, if it disappeared we just carry on. 3009 */ 3010 error = zfs_dirent_lookup(tdzp, tnm, &tzp, 0); 3011 if (error != 0) { 3012 ZFS_EXIT(zfsvfs); 3013 VOP_UNLOCK1(sdvp); 3014 VOP_UNLOCK1(tdvp); 3015 vrele(svp); 3016 if ((tcnp->cn_flags & ISDOTDOT) != 0) 3017 error = SET_ERROR(EINVAL); 3018 goto out; 3019 } 3020 if (tzp != NULL) 3021 tvp = ZTOV(tzp); 3022 else 3023 tvp = NULL; 3024 3025 /* 3026 * At present the vnode locks must be acquired before z_teardown_lock, 3027 * although it would be more logical to use the opposite order. 3028 */ 3029 ZFS_EXIT(zfsvfs); 3030 3031 /* 3032 * Now try acquire locks on svp and tvp. 3033 */ 3034 nvp = svp; 3035 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 3036 if (error != 0) { 3037 VOP_UNLOCK1(sdvp); 3038 VOP_UNLOCK1(tdvp); 3039 if (tvp != NULL) 3040 vrele(tvp); 3041 if (error != EBUSY) { 3042 vrele(nvp); 3043 goto out; 3044 } 3045 error = vn_lock(nvp, LK_EXCLUSIVE); 3046 if (error != 0) { 3047 vrele(nvp); 3048 goto out; 3049 } 3050 VOP_UNLOCK1(nvp); 3051 /* 3052 * Concurrent rename race. 3053 * XXX ? 3054 */ 3055 if (nvp == tdvp) { 3056 vrele(nvp); 3057 error = SET_ERROR(EINVAL); 3058 goto out; 3059 } 3060 vrele(*svpp); 3061 *svpp = nvp; 3062 goto relock; 3063 } 3064 vrele(*svpp); 3065 *svpp = nvp; 3066 3067 if (*tvpp != NULL) 3068 vrele(*tvpp); 3069 *tvpp = NULL; 3070 if (tvp != NULL) { 3071 nvp = tvp; 3072 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT); 3073 if (error != 0) { 3074 VOP_UNLOCK1(sdvp); 3075 VOP_UNLOCK1(tdvp); 3076 VOP_UNLOCK1(*svpp); 3077 if (error != EBUSY) { 3078 vrele(nvp); 3079 goto out; 3080 } 3081 error = vn_lock(nvp, LK_EXCLUSIVE); 3082 if (error != 0) { 3083 vrele(nvp); 3084 goto out; 3085 } 3086 vput(nvp); 3087 goto relock; 3088 } 3089 *tvpp = nvp; 3090 } 3091 3092 return (0); 3093 3094 out: 3095 return (error); 3096 } 3097 3098 /* 3099 * Note that we must use VRELE_ASYNC in this function as it walks 3100 * up the directory tree and vrele may need to acquire an exclusive 3101 * lock if a last reference to a vnode is dropped. 3102 */ 3103 static int 3104 zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp) 3105 { 3106 zfsvfs_t *zfsvfs; 3107 znode_t *zp, *zp1; 3108 uint64_t parent; 3109 int error; 3110 3111 zfsvfs = tdzp->z_zfsvfs; 3112 if (tdzp == szp) 3113 return (SET_ERROR(EINVAL)); 3114 if (tdzp == sdzp) 3115 return (0); 3116 if (tdzp->z_id == zfsvfs->z_root) 3117 return (0); 3118 zp = tdzp; 3119 for (;;) { 3120 ASSERT(!zp->z_unlinked); 3121 if ((error = sa_lookup(zp->z_sa_hdl, 3122 SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) 3123 break; 3124 3125 if (parent == szp->z_id) { 3126 error = SET_ERROR(EINVAL); 3127 break; 3128 } 3129 if (parent == zfsvfs->z_root) 3130 break; 3131 if (parent == sdzp->z_id) 3132 break; 3133 3134 error = zfs_zget(zfsvfs, parent, &zp1); 3135 if (error != 0) 3136 break; 3137 3138 if (zp != tdzp) 3139 VN_RELE_ASYNC(ZTOV(zp), 3140 dsl_pool_zrele_taskq( 3141 dmu_objset_pool(zfsvfs->z_os))); 3142 zp = zp1; 3143 } 3144 3145 if (error == ENOTDIR) 3146 panic("checkpath: .. not a directory\n"); 3147 if (zp != tdzp) 3148 VN_RELE_ASYNC(ZTOV(zp), 3149 dsl_pool_zrele_taskq(dmu_objset_pool(zfsvfs->z_os))); 3150 return (error); 3151 } 3152 3153 #if __FreeBSD_version < 1300124 3154 static void 3155 cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp, 3156 struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp) 3157 { 3158 3159 cache_purge(fvp); 3160 if (tvp != NULL) 3161 cache_purge(tvp); 3162 cache_purge_negative(tdvp); 3163 } 3164 #endif 3165 3166 /* 3167 * Move an entry from the provided source directory to the target 3168 * directory. Change the entry name as indicated. 3169 * 3170 * IN: sdvp - Source directory containing the "old entry". 3171 * snm - Old entry name. 3172 * tdvp - Target directory to contain the "new entry". 3173 * tnm - New entry name. 3174 * cr - credentials of caller. 3175 * ct - caller context 3176 * flags - case flags 3177 * 3178 * RETURN: 0 on success, error code on failure. 3179 * 3180 * Timestamps: 3181 * sdvp,tdvp - ctime|mtime updated 3182 */ 3183 /*ARGSUSED*/ 3184 static int 3185 zfs_rename_(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, 3186 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp, 3187 cred_t *cr, int log) 3188 { 3189 zfsvfs_t *zfsvfs; 3190 znode_t *sdzp, *tdzp, *szp, *tzp; 3191 zilog_t *zilog = NULL; 3192 dmu_tx_t *tx; 3193 const char *snm = scnp->cn_nameptr; 3194 const char *tnm = tcnp->cn_nameptr; 3195 int error = 0; 3196 bool want_seqc_end __maybe_unused = false; 3197 3198 /* Reject renames across filesystems. */ 3199 if ((*svpp)->v_mount != tdvp->v_mount || 3200 ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) { 3201 error = SET_ERROR(EXDEV); 3202 goto out; 3203 } 3204 3205 if (zfsctl_is_node(tdvp)) { 3206 error = SET_ERROR(EXDEV); 3207 goto out; 3208 } 3209 3210 /* 3211 * Lock all four vnodes to ensure safety and semantics of renaming. 3212 */ 3213 error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp); 3214 if (error != 0) { 3215 /* no vnodes are locked in the case of error here */ 3216 return (error); 3217 } 3218 3219 tdzp = VTOZ(tdvp); 3220 sdzp = VTOZ(sdvp); 3221 zfsvfs = tdzp->z_zfsvfs; 3222 zilog = zfsvfs->z_log; 3223 3224 /* 3225 * After we re-enter ZFS_ENTER() we will have to revalidate all 3226 * znodes involved. 3227 */ 3228 ZFS_ENTER(zfsvfs); 3229 3230 if (zfsvfs->z_utf8 && u8_validate(tnm, 3231 strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3232 error = SET_ERROR(EILSEQ); 3233 goto unlockout; 3234 } 3235 3236 /* If source and target are the same file, there is nothing to do. */ 3237 if ((*svpp) == (*tvpp)) { 3238 error = 0; 3239 goto unlockout; 3240 } 3241 3242 if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) || 3243 ((*tvpp) != NULL && (*tvpp)->v_type == VDIR && 3244 (*tvpp)->v_mountedhere != NULL)) { 3245 error = SET_ERROR(EXDEV); 3246 goto unlockout; 3247 } 3248 3249 /* 3250 * We can not use ZFS_VERIFY_ZP() here because it could directly return 3251 * bypassing the cleanup code in the case of an error. 3252 */ 3253 if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) { 3254 error = SET_ERROR(EIO); 3255 goto unlockout; 3256 } 3257 3258 szp = VTOZ(*svpp); 3259 tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp); 3260 if (szp->z_sa_hdl == NULL || (tzp != NULL && tzp->z_sa_hdl == NULL)) { 3261 error = SET_ERROR(EIO); 3262 goto unlockout; 3263 } 3264 3265 /* 3266 * This is to prevent the creation of links into attribute space 3267 * by renaming a linked file into/outof an attribute directory. 3268 * See the comment in zfs_link() for why this is considered bad. 3269 */ 3270 if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3271 error = SET_ERROR(EINVAL); 3272 goto unlockout; 3273 } 3274 3275 /* 3276 * If we are using project inheritance, means if the directory has 3277 * ZFS_PROJINHERIT set, then its descendant directories will inherit 3278 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under 3279 * such case, we only allow renames into our tree when the project 3280 * IDs are the same. 3281 */ 3282 if (tdzp->z_pflags & ZFS_PROJINHERIT && 3283 tdzp->z_projid != szp->z_projid) { 3284 error = SET_ERROR(EXDEV); 3285 goto unlockout; 3286 } 3287 3288 /* 3289 * Must have write access at the source to remove the old entry 3290 * and write access at the target to create the new entry. 3291 * Note that if target and source are the same, this can be 3292 * done in a single check. 3293 */ 3294 if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr))) 3295 goto unlockout; 3296 3297 if ((*svpp)->v_type == VDIR) { 3298 /* 3299 * Avoid ".", "..", and aliases of "." for obvious reasons. 3300 */ 3301 if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') || 3302 sdzp == szp || 3303 (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { 3304 error = EINVAL; 3305 goto unlockout; 3306 } 3307 3308 /* 3309 * Check to make sure rename is valid. 3310 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3311 */ 3312 if ((error = zfs_rename_check(szp, sdzp, tdzp))) 3313 goto unlockout; 3314 } 3315 3316 /* 3317 * Does target exist? 3318 */ 3319 if (tzp) { 3320 /* 3321 * Source and target must be the same type. 3322 */ 3323 if ((*svpp)->v_type == VDIR) { 3324 if ((*tvpp)->v_type != VDIR) { 3325 error = SET_ERROR(ENOTDIR); 3326 goto unlockout; 3327 } else { 3328 cache_purge(tdvp); 3329 if (sdvp != tdvp) 3330 cache_purge(sdvp); 3331 } 3332 } else { 3333 if ((*tvpp)->v_type == VDIR) { 3334 error = SET_ERROR(EISDIR); 3335 goto unlockout; 3336 } 3337 } 3338 } 3339 3340 vn_seqc_write_begin(*svpp); 3341 vn_seqc_write_begin(sdvp); 3342 if (*tvpp != NULL) 3343 vn_seqc_write_begin(*tvpp); 3344 if (tdvp != *tvpp) 3345 vn_seqc_write_begin(tdvp); 3346 #if __FreeBSD_version >= 1300102 3347 want_seqc_end = true; 3348 #endif 3349 vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct); 3350 if (tzp) 3351 vnevent_rename_dest(*tvpp, tdvp, tnm, ct); 3352 3353 /* 3354 * notify the target directory if it is not the same 3355 * as source directory. 3356 */ 3357 if (tdvp != sdvp) { 3358 vnevent_rename_dest_dir(tdvp, ct); 3359 } 3360 3361 tx = dmu_tx_create(zfsvfs->z_os); 3362 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3363 dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3364 dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3365 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3366 if (sdzp != tdzp) { 3367 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 3368 zfs_sa_upgrade_txholds(tx, tdzp); 3369 } 3370 if (tzp) { 3371 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 3372 zfs_sa_upgrade_txholds(tx, tzp); 3373 } 3374 3375 zfs_sa_upgrade_txholds(tx, szp); 3376 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3377 error = dmu_tx_assign(tx, TXG_WAIT); 3378 if (error) { 3379 dmu_tx_abort(tx); 3380 goto unlockout; 3381 } 3382 3383 3384 if (tzp) /* Attempt to remove the existing target */ 3385 error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL); 3386 3387 if (error == 0) { 3388 error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING); 3389 if (error == 0) { 3390 szp->z_pflags |= ZFS_AV_MODIFIED; 3391 3392 error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 3393 (void *)&szp->z_pflags, sizeof (uint64_t), tx); 3394 ASSERT0(error); 3395 3396 error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING, 3397 NULL); 3398 if (error == 0) { 3399 zfs_log_rename(zilog, tx, TX_RENAME, sdzp, 3400 snm, tdzp, tnm, szp); 3401 3402 /* 3403 * Update path information for the target vnode 3404 */ 3405 vn_renamepath(tdvp, *svpp, tnm, strlen(tnm)); 3406 } else { 3407 /* 3408 * At this point, we have successfully created 3409 * the target name, but have failed to remove 3410 * the source name. Since the create was done 3411 * with the ZRENAMING flag, there are 3412 * complications; for one, the link count is 3413 * wrong. The easiest way to deal with this 3414 * is to remove the newly created target, and 3415 * return the original error. This must 3416 * succeed; fortunately, it is very unlikely to 3417 * fail, since we just created it. 3418 */ 3419 VERIFY0(zfs_link_destroy(tdzp, tnm, szp, tx, 3420 ZRENAMING, NULL)); 3421 } 3422 } 3423 if (error == 0) { 3424 cache_vop_rename(sdvp, *svpp, tdvp, *tvpp, scnp, tcnp); 3425 } 3426 } 3427 3428 dmu_tx_commit(tx); 3429 3430 unlockout: /* all 4 vnodes are locked, ZFS_ENTER called */ 3431 ZFS_EXIT(zfsvfs); 3432 if (want_seqc_end) { 3433 vn_seqc_write_end(*svpp); 3434 vn_seqc_write_end(sdvp); 3435 if (*tvpp != NULL) 3436 vn_seqc_write_end(*tvpp); 3437 if (tdvp != *tvpp) 3438 vn_seqc_write_end(tdvp); 3439 want_seqc_end = false; 3440 } 3441 VOP_UNLOCK1(*svpp); 3442 VOP_UNLOCK1(sdvp); 3443 3444 out: /* original two vnodes are locked */ 3445 MPASS(!want_seqc_end); 3446 if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3447 zil_commit(zilog, 0); 3448 3449 if (*tvpp != NULL) 3450 VOP_UNLOCK1(*tvpp); 3451 if (tdvp != *tvpp) 3452 VOP_UNLOCK1(tdvp); 3453 return (error); 3454 } 3455 3456 int 3457 zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname, 3458 cred_t *cr, int flags) 3459 { 3460 struct componentname scn, tcn; 3461 vnode_t *sdvp, *tdvp; 3462 vnode_t *svp, *tvp; 3463 int error; 3464 svp = tvp = NULL; 3465 3466 sdvp = ZTOV(sdzp); 3467 tdvp = ZTOV(tdzp); 3468 error = zfs_lookup_internal(sdzp, sname, &svp, &scn, DELETE); 3469 if (sdzp->z_zfsvfs->z_replay == B_FALSE) 3470 VOP_UNLOCK1(sdvp); 3471 if (error != 0) 3472 goto fail; 3473 VOP_UNLOCK1(svp); 3474 3475 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 3476 error = zfs_lookup_internal(tdzp, tname, &tvp, &tcn, RENAME); 3477 if (error == EJUSTRETURN) 3478 tvp = NULL; 3479 else if (error != 0) { 3480 VOP_UNLOCK1(tdvp); 3481 goto fail; 3482 } 3483 3484 error = zfs_rename_(sdvp, &svp, &scn, tdvp, &tvp, &tcn, cr, 0); 3485 fail: 3486 if (svp != NULL) 3487 vrele(svp); 3488 if (tvp != NULL) 3489 vrele(tvp); 3490 3491 return (error); 3492 } 3493 3494 /* 3495 * Insert the indicated symbolic reference entry into the directory. 3496 * 3497 * IN: dvp - Directory to contain new symbolic link. 3498 * link - Name for new symlink entry. 3499 * vap - Attributes of new entry. 3500 * cr - credentials of caller. 3501 * ct - caller context 3502 * flags - case flags 3503 * 3504 * RETURN: 0 on success, error code on failure. 3505 * 3506 * Timestamps: 3507 * dvp - ctime|mtime updated 3508 */ 3509 /*ARGSUSED*/ 3510 int 3511 zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap, 3512 const char *link, znode_t **zpp, cred_t *cr, int flags) 3513 { 3514 znode_t *zp; 3515 dmu_tx_t *tx; 3516 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3517 zilog_t *zilog; 3518 uint64_t len = strlen(link); 3519 int error; 3520 zfs_acl_ids_t acl_ids; 3521 boolean_t fuid_dirtied; 3522 uint64_t txtype = TX_SYMLINK; 3523 3524 ASSERT3S(vap->va_type, ==, VLNK); 3525 3526 ZFS_ENTER(zfsvfs); 3527 ZFS_VERIFY_ZP(dzp); 3528 zilog = zfsvfs->z_log; 3529 3530 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 3531 NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3532 ZFS_EXIT(zfsvfs); 3533 return (SET_ERROR(EILSEQ)); 3534 } 3535 3536 if (len > MAXPATHLEN) { 3537 ZFS_EXIT(zfsvfs); 3538 return (SET_ERROR(ENAMETOOLONG)); 3539 } 3540 3541 if ((error = zfs_acl_ids_create(dzp, 0, 3542 vap, cr, NULL, &acl_ids)) != 0) { 3543 ZFS_EXIT(zfsvfs); 3544 return (error); 3545 } 3546 3547 /* 3548 * Attempt to lock directory; fail if entry already exists. 3549 */ 3550 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW); 3551 if (error) { 3552 zfs_acl_ids_free(&acl_ids); 3553 ZFS_EXIT(zfsvfs); 3554 return (error); 3555 } 3556 3557 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) { 3558 zfs_acl_ids_free(&acl_ids); 3559 ZFS_EXIT(zfsvfs); 3560 return (error); 3561 } 3562 3563 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, 3564 0 /* projid */)) { 3565 zfs_acl_ids_free(&acl_ids); 3566 ZFS_EXIT(zfsvfs); 3567 return (SET_ERROR(EDQUOT)); 3568 } 3569 3570 getnewvnode_reserve_(); 3571 tx = dmu_tx_create(zfsvfs->z_os); 3572 fuid_dirtied = zfsvfs->z_fuid_dirty; 3573 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 3574 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 3575 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 3576 ZFS_SA_BASE_ATTR_SIZE + len); 3577 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 3578 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 3579 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 3580 acl_ids.z_aclp->z_acl_bytes); 3581 } 3582 if (fuid_dirtied) 3583 zfs_fuid_txhold(zfsvfs, tx); 3584 error = dmu_tx_assign(tx, TXG_WAIT); 3585 if (error) { 3586 zfs_acl_ids_free(&acl_ids); 3587 dmu_tx_abort(tx); 3588 getnewvnode_drop_reserve(); 3589 ZFS_EXIT(zfsvfs); 3590 return (error); 3591 } 3592 3593 /* 3594 * Create a new object for the symlink. 3595 * for version 4 ZPL datasets the symlink will be an SA attribute 3596 */ 3597 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 3598 3599 if (fuid_dirtied) 3600 zfs_fuid_sync(zfsvfs, tx); 3601 3602 if (zp->z_is_sa) 3603 error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 3604 __DECONST(void *, link), len, tx); 3605 else 3606 zfs_sa_symlink(zp, __DECONST(char *, link), len, tx); 3607 3608 zp->z_size = len; 3609 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 3610 &zp->z_size, sizeof (zp->z_size), tx); 3611 /* 3612 * Insert the new object into the directory. 3613 */ 3614 (void) zfs_link_create(dzp, name, zp, tx, ZNEW); 3615 3616 zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 3617 *zpp = zp; 3618 3619 zfs_acl_ids_free(&acl_ids); 3620 3621 dmu_tx_commit(tx); 3622 3623 getnewvnode_drop_reserve(); 3624 3625 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3626 zil_commit(zilog, 0); 3627 3628 ZFS_EXIT(zfsvfs); 3629 return (error); 3630 } 3631 3632 /* 3633 * Return, in the buffer contained in the provided uio structure, 3634 * the symbolic path referred to by vp. 3635 * 3636 * IN: vp - vnode of symbolic link. 3637 * uio - structure to contain the link path. 3638 * cr - credentials of caller. 3639 * ct - caller context 3640 * 3641 * OUT: uio - structure containing the link path. 3642 * 3643 * RETURN: 0 on success, error code on failure. 3644 * 3645 * Timestamps: 3646 * vp - atime updated 3647 */ 3648 /* ARGSUSED */ 3649 static int 3650 zfs_readlink(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, caller_context_t *ct) 3651 { 3652 znode_t *zp = VTOZ(vp); 3653 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3654 int error; 3655 3656 ZFS_ENTER(zfsvfs); 3657 ZFS_VERIFY_ZP(zp); 3658 3659 if (zp->z_is_sa) 3660 error = sa_lookup_uio(zp->z_sa_hdl, 3661 SA_ZPL_SYMLINK(zfsvfs), uio); 3662 else 3663 error = zfs_sa_readlink(zp, uio); 3664 3665 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 3666 3667 ZFS_EXIT(zfsvfs); 3668 return (error); 3669 } 3670 3671 /* 3672 * Insert a new entry into directory tdvp referencing svp. 3673 * 3674 * IN: tdvp - Directory to contain new entry. 3675 * svp - vnode of new entry. 3676 * name - name of new entry. 3677 * cr - credentials of caller. 3678 * 3679 * RETURN: 0 on success, error code on failure. 3680 * 3681 * Timestamps: 3682 * tdvp - ctime|mtime updated 3683 * svp - ctime updated 3684 */ 3685 /* ARGSUSED */ 3686 int 3687 zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr, 3688 int flags) 3689 { 3690 znode_t *tzp; 3691 zfsvfs_t *zfsvfs = tdzp->z_zfsvfs; 3692 zilog_t *zilog; 3693 dmu_tx_t *tx; 3694 int error; 3695 uint64_t parent; 3696 uid_t owner; 3697 3698 ASSERT3S(ZTOV(tdzp)->v_type, ==, VDIR); 3699 3700 ZFS_ENTER(zfsvfs); 3701 ZFS_VERIFY_ZP(tdzp); 3702 zilog = zfsvfs->z_log; 3703 3704 /* 3705 * POSIX dictates that we return EPERM here. 3706 * Better choices include ENOTSUP or EISDIR. 3707 */ 3708 if (ZTOV(szp)->v_type == VDIR) { 3709 ZFS_EXIT(zfsvfs); 3710 return (SET_ERROR(EPERM)); 3711 } 3712 3713 ZFS_VERIFY_ZP(szp); 3714 3715 /* 3716 * If we are using project inheritance, means if the directory has 3717 * ZFS_PROJINHERIT set, then its descendant directories will inherit 3718 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under 3719 * such case, we only allow hard link creation in our tree when the 3720 * project IDs are the same. 3721 */ 3722 if (tdzp->z_pflags & ZFS_PROJINHERIT && 3723 tdzp->z_projid != szp->z_projid) { 3724 ZFS_EXIT(zfsvfs); 3725 return (SET_ERROR(EXDEV)); 3726 } 3727 3728 if (szp->z_pflags & (ZFS_APPENDONLY | 3729 ZFS_IMMUTABLE | ZFS_READONLY)) { 3730 ZFS_EXIT(zfsvfs); 3731 return (SET_ERROR(EPERM)); 3732 } 3733 3734 /* Prevent links to .zfs/shares files */ 3735 3736 if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 3737 &parent, sizeof (uint64_t))) != 0) { 3738 ZFS_EXIT(zfsvfs); 3739 return (error); 3740 } 3741 if (parent == zfsvfs->z_shares_dir) { 3742 ZFS_EXIT(zfsvfs); 3743 return (SET_ERROR(EPERM)); 3744 } 3745 3746 if (zfsvfs->z_utf8 && u8_validate(name, 3747 strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3748 ZFS_EXIT(zfsvfs); 3749 return (SET_ERROR(EILSEQ)); 3750 } 3751 3752 /* 3753 * We do not support links between attributes and non-attributes 3754 * because of the potential security risk of creating links 3755 * into "normal" file space in order to circumvent restrictions 3756 * imposed in attribute space. 3757 */ 3758 if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) { 3759 ZFS_EXIT(zfsvfs); 3760 return (SET_ERROR(EINVAL)); 3761 } 3762 3763 3764 owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 3765 if (owner != crgetuid(cr) && secpolicy_basic_link(ZTOV(szp), cr) != 0) { 3766 ZFS_EXIT(zfsvfs); 3767 return (SET_ERROR(EPERM)); 3768 } 3769 3770 if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr))) { 3771 ZFS_EXIT(zfsvfs); 3772 return (error); 3773 } 3774 3775 /* 3776 * Attempt to lock directory; fail if entry already exists. 3777 */ 3778 error = zfs_dirent_lookup(tdzp, name, &tzp, ZNEW); 3779 if (error) { 3780 ZFS_EXIT(zfsvfs); 3781 return (error); 3782 } 3783 3784 tx = dmu_tx_create(zfsvfs->z_os); 3785 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3786 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name); 3787 zfs_sa_upgrade_txholds(tx, szp); 3788 zfs_sa_upgrade_txholds(tx, tdzp); 3789 error = dmu_tx_assign(tx, TXG_WAIT); 3790 if (error) { 3791 dmu_tx_abort(tx); 3792 ZFS_EXIT(zfsvfs); 3793 return (error); 3794 } 3795 3796 error = zfs_link_create(tdzp, name, szp, tx, 0); 3797 3798 if (error == 0) { 3799 uint64_t txtype = TX_LINK; 3800 zfs_log_link(zilog, tx, txtype, tdzp, szp, name); 3801 } 3802 3803 dmu_tx_commit(tx); 3804 3805 if (error == 0) { 3806 vnevent_link(ZTOV(szp), ct); 3807 } 3808 3809 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3810 zil_commit(zilog, 0); 3811 3812 ZFS_EXIT(zfsvfs); 3813 return (error); 3814 } 3815 3816 /* 3817 * Free or allocate space in a file. Currently, this function only 3818 * supports the `F_FREESP' command. However, this command is somewhat 3819 * misnamed, as its functionality includes the ability to allocate as 3820 * well as free space. 3821 * 3822 * IN: ip - inode of file to free data in. 3823 * cmd - action to take (only F_FREESP supported). 3824 * bfp - section of file to free/alloc. 3825 * flag - current file open mode flags. 3826 * offset - current file offset. 3827 * cr - credentials of caller. 3828 * 3829 * RETURN: 0 on success, error code on failure. 3830 * 3831 * Timestamps: 3832 * ip - ctime|mtime updated 3833 */ 3834 /* ARGSUSED */ 3835 int 3836 zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag, 3837 offset_t offset, cred_t *cr) 3838 { 3839 zfsvfs_t *zfsvfs = ZTOZSB(zp); 3840 uint64_t off, len; 3841 int error; 3842 3843 ZFS_ENTER(zfsvfs); 3844 ZFS_VERIFY_ZP(zp); 3845 3846 if (cmd != F_FREESP) { 3847 ZFS_EXIT(zfsvfs); 3848 return (SET_ERROR(EINVAL)); 3849 } 3850 3851 /* 3852 * Callers might not be able to detect properly that we are read-only, 3853 * so check it explicitly here. 3854 */ 3855 if (zfs_is_readonly(zfsvfs)) { 3856 ZFS_EXIT(zfsvfs); 3857 return (SET_ERROR(EROFS)); 3858 } 3859 3860 if (bfp->l_len < 0) { 3861 ZFS_EXIT(zfsvfs); 3862 return (SET_ERROR(EINVAL)); 3863 } 3864 3865 /* 3866 * Permissions aren't checked on Solaris because on this OS 3867 * zfs_space() can only be called with an opened file handle. 3868 * On Linux we can get here through truncate_range() which 3869 * operates directly on inodes, so we need to check access rights. 3870 */ 3871 if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) { 3872 ZFS_EXIT(zfsvfs); 3873 return (error); 3874 } 3875 3876 off = bfp->l_start; 3877 len = bfp->l_len; /* 0 means from off to end of file */ 3878 3879 error = zfs_freesp(zp, off, len, flag, TRUE); 3880 3881 ZFS_EXIT(zfsvfs); 3882 return (error); 3883 } 3884 3885 /*ARGSUSED*/ 3886 static void 3887 zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 3888 { 3889 znode_t *zp = VTOZ(vp); 3890 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3891 int error; 3892 3893 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs); 3894 if (zp->z_sa_hdl == NULL) { 3895 /* 3896 * The fs has been unmounted, or we did a 3897 * suspend/resume and this file no longer exists. 3898 */ 3899 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 3900 vrecycle(vp); 3901 return; 3902 } 3903 3904 if (zp->z_unlinked) { 3905 /* 3906 * Fast path to recycle a vnode of a removed file. 3907 */ 3908 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 3909 vrecycle(vp); 3910 return; 3911 } 3912 3913 if (zp->z_atime_dirty && zp->z_unlinked == 0) { 3914 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 3915 3916 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3917 zfs_sa_upgrade_txholds(tx, zp); 3918 error = dmu_tx_assign(tx, TXG_WAIT); 3919 if (error) { 3920 dmu_tx_abort(tx); 3921 } else { 3922 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 3923 (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 3924 zp->z_atime_dirty = 0; 3925 dmu_tx_commit(tx); 3926 } 3927 } 3928 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 3929 } 3930 3931 3932 CTASSERT(sizeof (struct zfid_short) <= sizeof (struct fid)); 3933 CTASSERT(sizeof (struct zfid_long) <= sizeof (struct fid)); 3934 3935 /*ARGSUSED*/ 3936 static int 3937 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 3938 { 3939 znode_t *zp = VTOZ(vp); 3940 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3941 uint32_t gen; 3942 uint64_t gen64; 3943 uint64_t object = zp->z_id; 3944 zfid_short_t *zfid; 3945 int size, i, error; 3946 3947 ZFS_ENTER(zfsvfs); 3948 ZFS_VERIFY_ZP(zp); 3949 3950 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 3951 &gen64, sizeof (uint64_t))) != 0) { 3952 ZFS_EXIT(zfsvfs); 3953 return (error); 3954 } 3955 3956 gen = (uint32_t)gen64; 3957 3958 size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 3959 fidp->fid_len = size; 3960 3961 zfid = (zfid_short_t *)fidp; 3962 3963 zfid->zf_len = size; 3964 3965 for (i = 0; i < sizeof (zfid->zf_object); i++) 3966 zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 3967 3968 /* Must have a non-zero generation number to distinguish from .zfs */ 3969 if (gen == 0) 3970 gen = 1; 3971 for (i = 0; i < sizeof (zfid->zf_gen); i++) 3972 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 3973 3974 if (size == LONG_FID_LEN) { 3975 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 3976 zfid_long_t *zlfid; 3977 3978 zlfid = (zfid_long_t *)fidp; 3979 3980 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 3981 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 3982 3983 /* XXX - this should be the generation number for the objset */ 3984 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 3985 zlfid->zf_setgen[i] = 0; 3986 } 3987 3988 ZFS_EXIT(zfsvfs); 3989 return (0); 3990 } 3991 3992 static int 3993 zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 3994 caller_context_t *ct) 3995 { 3996 znode_t *zp; 3997 zfsvfs_t *zfsvfs; 3998 3999 switch (cmd) { 4000 case _PC_LINK_MAX: 4001 *valp = MIN(LONG_MAX, ZFS_LINK_MAX); 4002 return (0); 4003 4004 case _PC_FILESIZEBITS: 4005 *valp = 64; 4006 return (0); 4007 case _PC_MIN_HOLE_SIZE: 4008 *valp = (int)SPA_MINBLOCKSIZE; 4009 return (0); 4010 case _PC_ACL_EXTENDED: 4011 #if 0 /* POSIX ACLs are not implemented for ZFS on FreeBSD yet. */ 4012 zp = VTOZ(vp); 4013 zfsvfs = zp->z_zfsvfs; 4014 ZFS_ENTER(zfsvfs); 4015 ZFS_VERIFY_ZP(zp); 4016 *valp = zfsvfs->z_acl_type == ZFSACLTYPE_POSIX ? 1 : 0; 4017 ZFS_EXIT(zfsvfs); 4018 #else 4019 *valp = 0; 4020 #endif 4021 return (0); 4022 4023 case _PC_ACL_NFS4: 4024 zp = VTOZ(vp); 4025 zfsvfs = zp->z_zfsvfs; 4026 ZFS_ENTER(zfsvfs); 4027 ZFS_VERIFY_ZP(zp); 4028 *valp = zfsvfs->z_acl_type == ZFS_ACLTYPE_NFSV4 ? 1 : 0; 4029 ZFS_EXIT(zfsvfs); 4030 return (0); 4031 4032 case _PC_ACL_PATH_MAX: 4033 *valp = ACL_MAX_ENTRIES; 4034 return (0); 4035 4036 default: 4037 return (EOPNOTSUPP); 4038 } 4039 } 4040 4041 static int 4042 zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, 4043 int *rahead) 4044 { 4045 znode_t *zp = VTOZ(vp); 4046 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4047 objset_t *os = zp->z_zfsvfs->z_os; 4048 zfs_locked_range_t *lr; 4049 vm_object_t object; 4050 off_t start, end, obj_size; 4051 uint_t blksz; 4052 int pgsin_b, pgsin_a; 4053 int error; 4054 4055 ZFS_ENTER(zfsvfs); 4056 ZFS_VERIFY_ZP(zp); 4057 4058 start = IDX_TO_OFF(ma[0]->pindex); 4059 end = IDX_TO_OFF(ma[count - 1]->pindex + 1); 4060 4061 /* 4062 * Lock a range covering all required and optional pages. 4063 * Note that we need to handle the case of the block size growing. 4064 */ 4065 for (;;) { 4066 blksz = zp->z_blksz; 4067 lr = zfs_rangelock_tryenter(&zp->z_rangelock, 4068 rounddown(start, blksz), 4069 roundup(end, blksz) - rounddown(start, blksz), RL_READER); 4070 if (lr == NULL) { 4071 if (rahead != NULL) { 4072 *rahead = 0; 4073 rahead = NULL; 4074 } 4075 if (rbehind != NULL) { 4076 *rbehind = 0; 4077 rbehind = NULL; 4078 } 4079 break; 4080 } 4081 if (blksz == zp->z_blksz) 4082 break; 4083 zfs_rangelock_exit(lr); 4084 } 4085 4086 object = ma[0]->object; 4087 zfs_vmobject_wlock(object); 4088 obj_size = object->un_pager.vnp.vnp_size; 4089 zfs_vmobject_wunlock(object); 4090 if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) { 4091 if (lr != NULL) 4092 zfs_rangelock_exit(lr); 4093 ZFS_EXIT(zfsvfs); 4094 return (zfs_vm_pagerret_bad); 4095 } 4096 4097 pgsin_b = 0; 4098 if (rbehind != NULL) { 4099 pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz)); 4100 pgsin_b = MIN(*rbehind, pgsin_b); 4101 } 4102 4103 pgsin_a = 0; 4104 if (rahead != NULL) { 4105 pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end); 4106 if (end + IDX_TO_OFF(pgsin_a) >= obj_size) 4107 pgsin_a = OFF_TO_IDX(round_page(obj_size) - end); 4108 pgsin_a = MIN(*rahead, pgsin_a); 4109 } 4110 4111 /* 4112 * NB: we need to pass the exact byte size of the data that we expect 4113 * to read after accounting for the file size. This is required because 4114 * ZFS will panic if we request DMU to read beyond the end of the last 4115 * allocated block. 4116 */ 4117 error = dmu_read_pages(os, zp->z_id, ma, count, &pgsin_b, &pgsin_a, 4118 MIN(end, obj_size) - (end - PAGE_SIZE)); 4119 4120 if (lr != NULL) 4121 zfs_rangelock_exit(lr); 4122 ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4123 ZFS_EXIT(zfsvfs); 4124 4125 if (error != 0) 4126 return (zfs_vm_pagerret_error); 4127 4128 VM_CNT_INC(v_vnodein); 4129 VM_CNT_ADD(v_vnodepgsin, count + pgsin_b + pgsin_a); 4130 if (rbehind != NULL) 4131 *rbehind = pgsin_b; 4132 if (rahead != NULL) 4133 *rahead = pgsin_a; 4134 return (zfs_vm_pagerret_ok); 4135 } 4136 4137 #ifndef _SYS_SYSPROTO_H_ 4138 struct vop_getpages_args { 4139 struct vnode *a_vp; 4140 vm_page_t *a_m; 4141 int a_count; 4142 int *a_rbehind; 4143 int *a_rahead; 4144 }; 4145 #endif 4146 4147 static int 4148 zfs_freebsd_getpages(struct vop_getpages_args *ap) 4149 { 4150 4151 return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, 4152 ap->a_rahead)); 4153 } 4154 4155 static int 4156 zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, 4157 int *rtvals) 4158 { 4159 znode_t *zp = VTOZ(vp); 4160 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4161 zfs_locked_range_t *lr; 4162 dmu_tx_t *tx; 4163 struct sf_buf *sf; 4164 vm_object_t object; 4165 vm_page_t m; 4166 caddr_t va; 4167 size_t tocopy; 4168 size_t lo_len; 4169 vm_ooffset_t lo_off; 4170 vm_ooffset_t off; 4171 uint_t blksz; 4172 int ncount; 4173 int pcount; 4174 int err; 4175 int i; 4176 4177 ZFS_ENTER(zfsvfs); 4178 ZFS_VERIFY_ZP(zp); 4179 4180 object = vp->v_object; 4181 pcount = btoc(len); 4182 ncount = pcount; 4183 4184 KASSERT(ma[0]->object == object, ("mismatching object")); 4185 KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length")); 4186 4187 for (i = 0; i < pcount; i++) 4188 rtvals[i] = zfs_vm_pagerret_error; 4189 4190 off = IDX_TO_OFF(ma[0]->pindex); 4191 blksz = zp->z_blksz; 4192 lo_off = rounddown(off, blksz); 4193 lo_len = roundup(len + (off - lo_off), blksz); 4194 lr = zfs_rangelock_enter(&zp->z_rangelock, lo_off, lo_len, RL_WRITER); 4195 4196 zfs_vmobject_wlock(object); 4197 if (len + off > object->un_pager.vnp.vnp_size) { 4198 if (object->un_pager.vnp.vnp_size > off) { 4199 int pgoff; 4200 4201 len = object->un_pager.vnp.vnp_size - off; 4202 ncount = btoc(len); 4203 if ((pgoff = (int)len & PAGE_MASK) != 0) { 4204 /* 4205 * If the object is locked and the following 4206 * conditions hold, then the page's dirty 4207 * field cannot be concurrently changed by a 4208 * pmap operation. 4209 */ 4210 m = ma[ncount - 1]; 4211 vm_page_assert_sbusied(m); 4212 KASSERT(!pmap_page_is_write_mapped(m), 4213 ("zfs_putpages: page %p is not read-only", 4214 m)); 4215 vm_page_clear_dirty(m, pgoff, PAGE_SIZE - 4216 pgoff); 4217 } 4218 } else { 4219 len = 0; 4220 ncount = 0; 4221 } 4222 if (ncount < pcount) { 4223 for (i = ncount; i < pcount; i++) { 4224 rtvals[i] = zfs_vm_pagerret_bad; 4225 } 4226 } 4227 } 4228 zfs_vmobject_wunlock(object); 4229 4230 if (ncount == 0) 4231 goto out; 4232 4233 if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, zp->z_uid) || 4234 zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, zp->z_gid) || 4235 (zp->z_projid != ZFS_DEFAULT_PROJID && 4236 zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT, 4237 zp->z_projid))) { 4238 goto out; 4239 } 4240 4241 tx = dmu_tx_create(zfsvfs->z_os); 4242 dmu_tx_hold_write(tx, zp->z_id, off, len); 4243 4244 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 4245 zfs_sa_upgrade_txholds(tx, zp); 4246 err = dmu_tx_assign(tx, TXG_WAIT); 4247 if (err != 0) { 4248 dmu_tx_abort(tx); 4249 goto out; 4250 } 4251 4252 if (zp->z_blksz < PAGE_SIZE) { 4253 for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) { 4254 tocopy = len > PAGE_SIZE ? PAGE_SIZE : len; 4255 va = zfs_map_page(ma[i], &sf); 4256 dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx); 4257 zfs_unmap_page(sf); 4258 } 4259 } else { 4260 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx); 4261 } 4262 4263 if (err == 0) { 4264 uint64_t mtime[2], ctime[2]; 4265 sa_bulk_attr_t bulk[3]; 4266 int count = 0; 4267 4268 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 4269 &mtime, 16); 4270 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 4271 &ctime, 16); 4272 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4273 &zp->z_pflags, 8); 4274 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); 4275 err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 4276 ASSERT0(err); 4277 /* 4278 * XXX we should be passing a callback to undirty 4279 * but that would make the locking messier 4280 */ 4281 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, 4282 len, 0, NULL, NULL); 4283 4284 zfs_vmobject_wlock(object); 4285 for (i = 0; i < ncount; i++) { 4286 rtvals[i] = zfs_vm_pagerret_ok; 4287 vm_page_undirty(ma[i]); 4288 } 4289 zfs_vmobject_wunlock(object); 4290 VM_CNT_INC(v_vnodeout); 4291 VM_CNT_ADD(v_vnodepgsout, ncount); 4292 } 4293 dmu_tx_commit(tx); 4294 4295 out: 4296 zfs_rangelock_exit(lr); 4297 if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 || 4298 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4299 zil_commit(zfsvfs->z_log, zp->z_id); 4300 ZFS_EXIT(zfsvfs); 4301 return (rtvals[0]); 4302 } 4303 4304 #ifndef _SYS_SYSPROTO_H_ 4305 struct vop_putpages_args { 4306 struct vnode *a_vp; 4307 vm_page_t *a_m; 4308 int a_count; 4309 int a_sync; 4310 int *a_rtvals; 4311 }; 4312 #endif 4313 4314 static int 4315 zfs_freebsd_putpages(struct vop_putpages_args *ap) 4316 { 4317 4318 return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, 4319 ap->a_rtvals)); 4320 } 4321 4322 #ifndef _SYS_SYSPROTO_H_ 4323 struct vop_bmap_args { 4324 struct vnode *a_vp; 4325 daddr_t a_bn; 4326 struct bufobj **a_bop; 4327 daddr_t *a_bnp; 4328 int *a_runp; 4329 int *a_runb; 4330 }; 4331 #endif 4332 4333 static int 4334 zfs_freebsd_bmap(struct vop_bmap_args *ap) 4335 { 4336 4337 if (ap->a_bop != NULL) 4338 *ap->a_bop = &ap->a_vp->v_bufobj; 4339 if (ap->a_bnp != NULL) 4340 *ap->a_bnp = ap->a_bn; 4341 if (ap->a_runp != NULL) 4342 *ap->a_runp = 0; 4343 if (ap->a_runb != NULL) 4344 *ap->a_runb = 0; 4345 4346 return (0); 4347 } 4348 4349 #ifndef _SYS_SYSPROTO_H_ 4350 struct vop_open_args { 4351 struct vnode *a_vp; 4352 int a_mode; 4353 struct ucred *a_cred; 4354 struct thread *a_td; 4355 }; 4356 #endif 4357 4358 static int 4359 zfs_freebsd_open(struct vop_open_args *ap) 4360 { 4361 vnode_t *vp = ap->a_vp; 4362 znode_t *zp = VTOZ(vp); 4363 int error; 4364 4365 error = zfs_open(&vp, ap->a_mode, ap->a_cred); 4366 if (error == 0) 4367 vnode_create_vobject(vp, zp->z_size, ap->a_td); 4368 return (error); 4369 } 4370 4371 #ifndef _SYS_SYSPROTO_H_ 4372 struct vop_close_args { 4373 struct vnode *a_vp; 4374 int a_fflag; 4375 struct ucred *a_cred; 4376 struct thread *a_td; 4377 }; 4378 #endif 4379 4380 static int 4381 zfs_freebsd_close(struct vop_close_args *ap) 4382 { 4383 4384 return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred)); 4385 } 4386 4387 #ifndef _SYS_SYSPROTO_H_ 4388 struct vop_ioctl_args { 4389 struct vnode *a_vp; 4390 ulong_t a_command; 4391 caddr_t a_data; 4392 int a_fflag; 4393 struct ucred *cred; 4394 struct thread *td; 4395 }; 4396 #endif 4397 4398 static int 4399 zfs_freebsd_ioctl(struct vop_ioctl_args *ap) 4400 { 4401 4402 return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data, 4403 ap->a_fflag, ap->a_cred, NULL)); 4404 } 4405 4406 static int 4407 ioflags(int ioflags) 4408 { 4409 int flags = 0; 4410 4411 if (ioflags & IO_APPEND) 4412 flags |= FAPPEND; 4413 if (ioflags & IO_NDELAY) 4414 flags |= FNONBLOCK; 4415 if (ioflags & IO_SYNC) 4416 flags |= (FSYNC | FDSYNC | FRSYNC); 4417 4418 return (flags); 4419 } 4420 4421 #ifndef _SYS_SYSPROTO_H_ 4422 struct vop_read_args { 4423 struct vnode *a_vp; 4424 struct uio *a_uio; 4425 int a_ioflag; 4426 struct ucred *a_cred; 4427 }; 4428 #endif 4429 4430 static int 4431 zfs_freebsd_read(struct vop_read_args *ap) 4432 { 4433 zfs_uio_t uio; 4434 zfs_uio_init(&uio, ap->a_uio); 4435 return (zfs_read(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag), 4436 ap->a_cred)); 4437 } 4438 4439 #ifndef _SYS_SYSPROTO_H_ 4440 struct vop_write_args { 4441 struct vnode *a_vp; 4442 struct uio *a_uio; 4443 int a_ioflag; 4444 struct ucred *a_cred; 4445 }; 4446 #endif 4447 4448 static int 4449 zfs_freebsd_write(struct vop_write_args *ap) 4450 { 4451 zfs_uio_t uio; 4452 zfs_uio_init(&uio, ap->a_uio); 4453 return (zfs_write(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag), 4454 ap->a_cred)); 4455 } 4456 4457 #if __FreeBSD_version >= 1300102 4458 /* 4459 * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see 4460 * the comment above cache_fplookup for details. 4461 */ 4462 static int 4463 zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args *v) 4464 { 4465 vnode_t *vp; 4466 znode_t *zp; 4467 uint64_t pflags; 4468 4469 vp = v->a_vp; 4470 zp = VTOZ_SMR(vp); 4471 if (__predict_false(zp == NULL)) 4472 return (EAGAIN); 4473 pflags = atomic_load_64(&zp->z_pflags); 4474 if (pflags & ZFS_AV_QUARANTINED) 4475 return (EAGAIN); 4476 if (pflags & ZFS_XATTR) 4477 return (EAGAIN); 4478 if ((pflags & ZFS_NO_EXECS_DENIED) == 0) 4479 return (EAGAIN); 4480 return (0); 4481 } 4482 #endif 4483 4484 #if __FreeBSD_version >= 1300139 4485 static int 4486 zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args *v) 4487 { 4488 vnode_t *vp; 4489 znode_t *zp; 4490 char *target; 4491 4492 vp = v->a_vp; 4493 zp = VTOZ_SMR(vp); 4494 if (__predict_false(zp == NULL)) { 4495 return (EAGAIN); 4496 } 4497 4498 target = atomic_load_consume_ptr(&zp->z_cached_symlink); 4499 if (target == NULL) { 4500 return (EAGAIN); 4501 } 4502 return (cache_symlink_resolve(v->a_fpl, target, strlen(target))); 4503 } 4504 #endif 4505 4506 #ifndef _SYS_SYSPROTO_H_ 4507 struct vop_access_args { 4508 struct vnode *a_vp; 4509 accmode_t a_accmode; 4510 struct ucred *a_cred; 4511 struct thread *a_td; 4512 }; 4513 #endif 4514 4515 static int 4516 zfs_freebsd_access(struct vop_access_args *ap) 4517 { 4518 vnode_t *vp = ap->a_vp; 4519 znode_t *zp = VTOZ(vp); 4520 accmode_t accmode; 4521 int error = 0; 4522 4523 4524 if (ap->a_accmode == VEXEC) { 4525 if (zfs_fastaccesschk_execute(zp, ap->a_cred) == 0) 4526 return (0); 4527 } 4528 4529 /* 4530 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND, 4531 */ 4532 accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND); 4533 if (accmode != 0) 4534 error = zfs_access(zp, accmode, 0, ap->a_cred); 4535 4536 /* 4537 * VADMIN has to be handled by vaccess(). 4538 */ 4539 if (error == 0) { 4540 accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND); 4541 if (accmode != 0) { 4542 #if __FreeBSD_version >= 1300105 4543 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 4544 zp->z_gid, accmode, ap->a_cred); 4545 #else 4546 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid, 4547 zp->z_gid, accmode, ap->a_cred, NULL); 4548 #endif 4549 } 4550 } 4551 4552 /* 4553 * For VEXEC, ensure that at least one execute bit is set for 4554 * non-directories. 4555 */ 4556 if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR && 4557 (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { 4558 error = EACCES; 4559 } 4560 4561 return (error); 4562 } 4563 4564 #ifndef _SYS_SYSPROTO_H_ 4565 struct vop_lookup_args { 4566 struct vnode *a_dvp; 4567 struct vnode **a_vpp; 4568 struct componentname *a_cnp; 4569 }; 4570 #endif 4571 4572 static int 4573 zfs_freebsd_lookup(struct vop_lookup_args *ap, boolean_t cached) 4574 { 4575 struct componentname *cnp = ap->a_cnp; 4576 char nm[NAME_MAX + 1]; 4577 4578 ASSERT3U(cnp->cn_namelen, <, sizeof (nm)); 4579 strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof (nm))); 4580 4581 return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, 4582 cnp->cn_cred, cnp->cn_thread, 0, cached)); 4583 } 4584 4585 static int 4586 zfs_freebsd_cachedlookup(struct vop_cachedlookup_args *ap) 4587 { 4588 4589 return (zfs_freebsd_lookup((struct vop_lookup_args *)ap, B_TRUE)); 4590 } 4591 4592 #ifndef _SYS_SYSPROTO_H_ 4593 struct vop_lookup_args { 4594 struct vnode *a_dvp; 4595 struct vnode **a_vpp; 4596 struct componentname *a_cnp; 4597 }; 4598 #endif 4599 4600 static int 4601 zfs_cache_lookup(struct vop_lookup_args *ap) 4602 { 4603 zfsvfs_t *zfsvfs; 4604 4605 zfsvfs = ap->a_dvp->v_mount->mnt_data; 4606 if (zfsvfs->z_use_namecache) 4607 return (vfs_cache_lookup(ap)); 4608 else 4609 return (zfs_freebsd_lookup(ap, B_FALSE)); 4610 } 4611 4612 #ifndef _SYS_SYSPROTO_H_ 4613 struct vop_create_args { 4614 struct vnode *a_dvp; 4615 struct vnode **a_vpp; 4616 struct componentname *a_cnp; 4617 struct vattr *a_vap; 4618 }; 4619 #endif 4620 4621 static int 4622 zfs_freebsd_create(struct vop_create_args *ap) 4623 { 4624 zfsvfs_t *zfsvfs; 4625 struct componentname *cnp = ap->a_cnp; 4626 vattr_t *vap = ap->a_vap; 4627 znode_t *zp = NULL; 4628 int rc, mode; 4629 4630 ASSERT(cnp->cn_flags & SAVENAME); 4631 4632 vattr_init_mask(vap); 4633 mode = vap->va_mode & ALLPERMS; 4634 zfsvfs = ap->a_dvp->v_mount->mnt_data; 4635 *ap->a_vpp = NULL; 4636 4637 rc = zfs_create(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, !EXCL, mode, 4638 &zp, cnp->cn_cred, 0 /* flag */, NULL /* vsecattr */); 4639 if (rc == 0) 4640 *ap->a_vpp = ZTOV(zp); 4641 if (zfsvfs->z_use_namecache && 4642 rc == 0 && (cnp->cn_flags & MAKEENTRY) != 0) 4643 cache_enter(ap->a_dvp, *ap->a_vpp, cnp); 4644 4645 return (rc); 4646 } 4647 4648 #ifndef _SYS_SYSPROTO_H_ 4649 struct vop_remove_args { 4650 struct vnode *a_dvp; 4651 struct vnode *a_vp; 4652 struct componentname *a_cnp; 4653 }; 4654 #endif 4655 4656 static int 4657 zfs_freebsd_remove(struct vop_remove_args *ap) 4658 { 4659 4660 ASSERT(ap->a_cnp->cn_flags & SAVENAME); 4661 4662 return (zfs_remove_(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr, 4663 ap->a_cnp->cn_cred)); 4664 } 4665 4666 #ifndef _SYS_SYSPROTO_H_ 4667 struct vop_mkdir_args { 4668 struct vnode *a_dvp; 4669 struct vnode **a_vpp; 4670 struct componentname *a_cnp; 4671 struct vattr *a_vap; 4672 }; 4673 #endif 4674 4675 static int 4676 zfs_freebsd_mkdir(struct vop_mkdir_args *ap) 4677 { 4678 vattr_t *vap = ap->a_vap; 4679 znode_t *zp = NULL; 4680 int rc; 4681 4682 ASSERT(ap->a_cnp->cn_flags & SAVENAME); 4683 4684 vattr_init_mask(vap); 4685 *ap->a_vpp = NULL; 4686 4687 rc = zfs_mkdir(VTOZ(ap->a_dvp), ap->a_cnp->cn_nameptr, vap, &zp, 4688 ap->a_cnp->cn_cred, 0, NULL); 4689 4690 if (rc == 0) 4691 *ap->a_vpp = ZTOV(zp); 4692 return (rc); 4693 } 4694 4695 #ifndef _SYS_SYSPROTO_H_ 4696 struct vop_rmdir_args { 4697 struct vnode *a_dvp; 4698 struct vnode *a_vp; 4699 struct componentname *a_cnp; 4700 }; 4701 #endif 4702 4703 static int 4704 zfs_freebsd_rmdir(struct vop_rmdir_args *ap) 4705 { 4706 struct componentname *cnp = ap->a_cnp; 4707 4708 ASSERT(cnp->cn_flags & SAVENAME); 4709 4710 return (zfs_rmdir_(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred)); 4711 } 4712 4713 #ifndef _SYS_SYSPROTO_H_ 4714 struct vop_readdir_args { 4715 struct vnode *a_vp; 4716 struct uio *a_uio; 4717 struct ucred *a_cred; 4718 int *a_eofflag; 4719 int *a_ncookies; 4720 ulong_t **a_cookies; 4721 }; 4722 #endif 4723 4724 static int 4725 zfs_freebsd_readdir(struct vop_readdir_args *ap) 4726 { 4727 zfs_uio_t uio; 4728 zfs_uio_init(&uio, ap->a_uio); 4729 return (zfs_readdir(ap->a_vp, &uio, ap->a_cred, ap->a_eofflag, 4730 ap->a_ncookies, ap->a_cookies)); 4731 } 4732 4733 #ifndef _SYS_SYSPROTO_H_ 4734 struct vop_fsync_args { 4735 struct vnode *a_vp; 4736 int a_waitfor; 4737 struct thread *a_td; 4738 }; 4739 #endif 4740 4741 static int 4742 zfs_freebsd_fsync(struct vop_fsync_args *ap) 4743 { 4744 4745 vop_stdfsync(ap); 4746 return (zfs_fsync(VTOZ(ap->a_vp), 0, ap->a_td->td_ucred)); 4747 } 4748 4749 #ifndef _SYS_SYSPROTO_H_ 4750 struct vop_getattr_args { 4751 struct vnode *a_vp; 4752 struct vattr *a_vap; 4753 struct ucred *a_cred; 4754 }; 4755 #endif 4756 4757 static int 4758 zfs_freebsd_getattr(struct vop_getattr_args *ap) 4759 { 4760 vattr_t *vap = ap->a_vap; 4761 xvattr_t xvap; 4762 ulong_t fflags = 0; 4763 int error; 4764 4765 xva_init(&xvap); 4766 xvap.xva_vattr = *vap; 4767 xvap.xva_vattr.va_mask |= AT_XVATTR; 4768 4769 /* Convert chflags into ZFS-type flags. */ 4770 /* XXX: what about SF_SETTABLE?. */ 4771 XVA_SET_REQ(&xvap, XAT_IMMUTABLE); 4772 XVA_SET_REQ(&xvap, XAT_APPENDONLY); 4773 XVA_SET_REQ(&xvap, XAT_NOUNLINK); 4774 XVA_SET_REQ(&xvap, XAT_NODUMP); 4775 XVA_SET_REQ(&xvap, XAT_READONLY); 4776 XVA_SET_REQ(&xvap, XAT_ARCHIVE); 4777 XVA_SET_REQ(&xvap, XAT_SYSTEM); 4778 XVA_SET_REQ(&xvap, XAT_HIDDEN); 4779 XVA_SET_REQ(&xvap, XAT_REPARSE); 4780 XVA_SET_REQ(&xvap, XAT_OFFLINE); 4781 XVA_SET_REQ(&xvap, XAT_SPARSE); 4782 4783 error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred); 4784 if (error != 0) 4785 return (error); 4786 4787 /* Convert ZFS xattr into chflags. */ 4788 #define FLAG_CHECK(fflag, xflag, xfield) do { \ 4789 if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \ 4790 fflags |= (fflag); \ 4791 } while (0) 4792 FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE, 4793 xvap.xva_xoptattrs.xoa_immutable); 4794 FLAG_CHECK(SF_APPEND, XAT_APPENDONLY, 4795 xvap.xva_xoptattrs.xoa_appendonly); 4796 FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK, 4797 xvap.xva_xoptattrs.xoa_nounlink); 4798 FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE, 4799 xvap.xva_xoptattrs.xoa_archive); 4800 FLAG_CHECK(UF_NODUMP, XAT_NODUMP, 4801 xvap.xva_xoptattrs.xoa_nodump); 4802 FLAG_CHECK(UF_READONLY, XAT_READONLY, 4803 xvap.xva_xoptattrs.xoa_readonly); 4804 FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM, 4805 xvap.xva_xoptattrs.xoa_system); 4806 FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN, 4807 xvap.xva_xoptattrs.xoa_hidden); 4808 FLAG_CHECK(UF_REPARSE, XAT_REPARSE, 4809 xvap.xva_xoptattrs.xoa_reparse); 4810 FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE, 4811 xvap.xva_xoptattrs.xoa_offline); 4812 FLAG_CHECK(UF_SPARSE, XAT_SPARSE, 4813 xvap.xva_xoptattrs.xoa_sparse); 4814 4815 #undef FLAG_CHECK 4816 *vap = xvap.xva_vattr; 4817 vap->va_flags = fflags; 4818 return (0); 4819 } 4820 4821 #ifndef _SYS_SYSPROTO_H_ 4822 struct vop_setattr_args { 4823 struct vnode *a_vp; 4824 struct vattr *a_vap; 4825 struct ucred *a_cred; 4826 }; 4827 #endif 4828 4829 static int 4830 zfs_freebsd_setattr(struct vop_setattr_args *ap) 4831 { 4832 vnode_t *vp = ap->a_vp; 4833 vattr_t *vap = ap->a_vap; 4834 cred_t *cred = ap->a_cred; 4835 xvattr_t xvap; 4836 ulong_t fflags; 4837 uint64_t zflags; 4838 4839 vattr_init_mask(vap); 4840 vap->va_mask &= ~AT_NOSET; 4841 4842 xva_init(&xvap); 4843 xvap.xva_vattr = *vap; 4844 4845 zflags = VTOZ(vp)->z_pflags; 4846 4847 if (vap->va_flags != VNOVAL) { 4848 zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs; 4849 int error; 4850 4851 if (zfsvfs->z_use_fuids == B_FALSE) 4852 return (EOPNOTSUPP); 4853 4854 fflags = vap->va_flags; 4855 /* 4856 * XXX KDM 4857 * We need to figure out whether it makes sense to allow 4858 * UF_REPARSE through, since we don't really have other 4859 * facilities to handle reparse points and zfs_setattr() 4860 * doesn't currently allow setting that attribute anyway. 4861 */ 4862 if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE| 4863 UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE| 4864 UF_OFFLINE|UF_SPARSE)) != 0) 4865 return (EOPNOTSUPP); 4866 /* 4867 * Unprivileged processes are not permitted to unset system 4868 * flags, or modify flags if any system flags are set. 4869 * Privileged non-jail processes may not modify system flags 4870 * if securelevel > 0 and any existing system flags are set. 4871 * Privileged jail processes behave like privileged non-jail 4872 * processes if the PR_ALLOW_CHFLAGS permission bit is set; 4873 * otherwise, they behave like unprivileged processes. 4874 */ 4875 if (secpolicy_fs_owner(vp->v_mount, cred) == 0 || 4876 spl_priv_check_cred(cred, PRIV_VFS_SYSFLAGS) == 0) { 4877 if (zflags & 4878 (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) { 4879 error = securelevel_gt(cred, 0); 4880 if (error != 0) 4881 return (error); 4882 } 4883 } else { 4884 /* 4885 * Callers may only modify the file flags on 4886 * objects they have VADMIN rights for. 4887 */ 4888 if ((error = VOP_ACCESS(vp, VADMIN, cred, 4889 curthread)) != 0) 4890 return (error); 4891 if (zflags & 4892 (ZFS_IMMUTABLE | ZFS_APPENDONLY | 4893 ZFS_NOUNLINK)) { 4894 return (EPERM); 4895 } 4896 if (fflags & 4897 (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) { 4898 return (EPERM); 4899 } 4900 } 4901 4902 #define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \ 4903 if (((fflags & (fflag)) && !(zflags & (zflag))) || \ 4904 ((zflags & (zflag)) && !(fflags & (fflag)))) { \ 4905 XVA_SET_REQ(&xvap, (xflag)); \ 4906 (xfield) = ((fflags & (fflag)) != 0); \ 4907 } \ 4908 } while (0) 4909 /* Convert chflags into ZFS-type flags. */ 4910 /* XXX: what about SF_SETTABLE?. */ 4911 FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE, 4912 xvap.xva_xoptattrs.xoa_immutable); 4913 FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY, 4914 xvap.xva_xoptattrs.xoa_appendonly); 4915 FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK, 4916 xvap.xva_xoptattrs.xoa_nounlink); 4917 FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE, 4918 xvap.xva_xoptattrs.xoa_archive); 4919 FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP, 4920 xvap.xva_xoptattrs.xoa_nodump); 4921 FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY, 4922 xvap.xva_xoptattrs.xoa_readonly); 4923 FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM, 4924 xvap.xva_xoptattrs.xoa_system); 4925 FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN, 4926 xvap.xva_xoptattrs.xoa_hidden); 4927 FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE, 4928 xvap.xva_xoptattrs.xoa_reparse); 4929 FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE, 4930 xvap.xva_xoptattrs.xoa_offline); 4931 FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE, 4932 xvap.xva_xoptattrs.xoa_sparse); 4933 #undef FLAG_CHANGE 4934 } 4935 if (vap->va_birthtime.tv_sec != VNOVAL) { 4936 xvap.xva_vattr.va_mask |= AT_XVATTR; 4937 XVA_SET_REQ(&xvap, XAT_CREATETIME); 4938 } 4939 return (zfs_setattr(VTOZ(vp), (vattr_t *)&xvap, 0, cred)); 4940 } 4941 4942 #ifndef _SYS_SYSPROTO_H_ 4943 struct vop_rename_args { 4944 struct vnode *a_fdvp; 4945 struct vnode *a_fvp; 4946 struct componentname *a_fcnp; 4947 struct vnode *a_tdvp; 4948 struct vnode *a_tvp; 4949 struct componentname *a_tcnp; 4950 }; 4951 #endif 4952 4953 static int 4954 zfs_freebsd_rename(struct vop_rename_args *ap) 4955 { 4956 vnode_t *fdvp = ap->a_fdvp; 4957 vnode_t *fvp = ap->a_fvp; 4958 vnode_t *tdvp = ap->a_tdvp; 4959 vnode_t *tvp = ap->a_tvp; 4960 int error; 4961 4962 ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART)); 4963 ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART)); 4964 4965 error = zfs_rename_(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp, 4966 ap->a_tcnp, ap->a_fcnp->cn_cred, 1); 4967 4968 vrele(fdvp); 4969 vrele(fvp); 4970 vrele(tdvp); 4971 if (tvp != NULL) 4972 vrele(tvp); 4973 4974 return (error); 4975 } 4976 4977 #ifndef _SYS_SYSPROTO_H_ 4978 struct vop_symlink_args { 4979 struct vnode *a_dvp; 4980 struct vnode **a_vpp; 4981 struct componentname *a_cnp; 4982 struct vattr *a_vap; 4983 char *a_target; 4984 }; 4985 #endif 4986 4987 static int 4988 zfs_freebsd_symlink(struct vop_symlink_args *ap) 4989 { 4990 struct componentname *cnp = ap->a_cnp; 4991 vattr_t *vap = ap->a_vap; 4992 znode_t *zp = NULL; 4993 #if __FreeBSD_version >= 1300139 4994 char *symlink; 4995 size_t symlink_len; 4996 #endif 4997 int rc; 4998 4999 ASSERT(cnp->cn_flags & SAVENAME); 5000 5001 vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */ 5002 vattr_init_mask(vap); 5003 *ap->a_vpp = NULL; 5004 5005 rc = zfs_symlink(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, 5006 ap->a_target, &zp, cnp->cn_cred, 0 /* flags */); 5007 if (rc == 0) { 5008 *ap->a_vpp = ZTOV(zp); 5009 ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); 5010 #if __FreeBSD_version >= 1300139 5011 MPASS(zp->z_cached_symlink == NULL); 5012 symlink_len = strlen(ap->a_target); 5013 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK); 5014 if (symlink != NULL) { 5015 memcpy(symlink, ap->a_target, symlink_len); 5016 symlink[symlink_len] = '\0'; 5017 atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink, 5018 (uintptr_t)symlink); 5019 } 5020 #endif 5021 } 5022 return (rc); 5023 } 5024 5025 #ifndef _SYS_SYSPROTO_H_ 5026 struct vop_readlink_args { 5027 struct vnode *a_vp; 5028 struct uio *a_uio; 5029 struct ucred *a_cred; 5030 }; 5031 #endif 5032 5033 static int 5034 zfs_freebsd_readlink(struct vop_readlink_args *ap) 5035 { 5036 zfs_uio_t uio; 5037 int error; 5038 #if __FreeBSD_version >= 1300139 5039 znode_t *zp = VTOZ(ap->a_vp); 5040 char *symlink, *base; 5041 size_t symlink_len; 5042 bool trycache; 5043 #endif 5044 5045 zfs_uio_init(&uio, ap->a_uio); 5046 #if __FreeBSD_version >= 1300139 5047 trycache = false; 5048 if (zfs_uio_segflg(&uio) == UIO_SYSSPACE && 5049 zfs_uio_iovcnt(&uio) == 1) { 5050 base = zfs_uio_iovbase(&uio, 0); 5051 symlink_len = zfs_uio_iovlen(&uio, 0); 5052 trycache = true; 5053 } 5054 #endif 5055 error = zfs_readlink(ap->a_vp, &uio, ap->a_cred, NULL); 5056 #if __FreeBSD_version >= 1300139 5057 if (atomic_load_ptr(&zp->z_cached_symlink) != NULL || 5058 error != 0 || !trycache) { 5059 return (error); 5060 } 5061 symlink_len -= zfs_uio_resid(&uio); 5062 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK); 5063 if (symlink != NULL) { 5064 memcpy(symlink, base, symlink_len); 5065 symlink[symlink_len] = '\0'; 5066 if (!atomic_cmpset_rel_ptr((uintptr_t *)&zp->z_cached_symlink, 5067 (uintptr_t)NULL, (uintptr_t)symlink)) { 5068 cache_symlink_free(symlink, symlink_len + 1); 5069 } 5070 } 5071 #endif 5072 return (error); 5073 } 5074 5075 #ifndef _SYS_SYSPROTO_H_ 5076 struct vop_link_args { 5077 struct vnode *a_tdvp; 5078 struct vnode *a_vp; 5079 struct componentname *a_cnp; 5080 }; 5081 #endif 5082 5083 static int 5084 zfs_freebsd_link(struct vop_link_args *ap) 5085 { 5086 struct componentname *cnp = ap->a_cnp; 5087 vnode_t *vp = ap->a_vp; 5088 vnode_t *tdvp = ap->a_tdvp; 5089 5090 if (tdvp->v_mount != vp->v_mount) 5091 return (EXDEV); 5092 5093 ASSERT(cnp->cn_flags & SAVENAME); 5094 5095 return (zfs_link(VTOZ(tdvp), VTOZ(vp), 5096 cnp->cn_nameptr, cnp->cn_cred, 0)); 5097 } 5098 5099 #ifndef _SYS_SYSPROTO_H_ 5100 struct vop_inactive_args { 5101 struct vnode *a_vp; 5102 struct thread *a_td; 5103 }; 5104 #endif 5105 5106 static int 5107 zfs_freebsd_inactive(struct vop_inactive_args *ap) 5108 { 5109 vnode_t *vp = ap->a_vp; 5110 5111 #if __FreeBSD_version >= 1300123 5112 zfs_inactive(vp, curthread->td_ucred, NULL); 5113 #else 5114 zfs_inactive(vp, ap->a_td->td_ucred, NULL); 5115 #endif 5116 return (0); 5117 } 5118 5119 #if __FreeBSD_version >= 1300042 5120 #ifndef _SYS_SYSPROTO_H_ 5121 struct vop_need_inactive_args { 5122 struct vnode *a_vp; 5123 struct thread *a_td; 5124 }; 5125 #endif 5126 5127 static int 5128 zfs_freebsd_need_inactive(struct vop_need_inactive_args *ap) 5129 { 5130 vnode_t *vp = ap->a_vp; 5131 znode_t *zp = VTOZ(vp); 5132 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5133 int need; 5134 5135 if (vn_need_pageq_flush(vp)) 5136 return (1); 5137 5138 if (!ZFS_TEARDOWN_INACTIVE_TRY_ENTER_READ(zfsvfs)) 5139 return (1); 5140 need = (zp->z_sa_hdl == NULL || zp->z_unlinked || zp->z_atime_dirty); 5141 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 5142 5143 return (need); 5144 } 5145 #endif 5146 5147 #ifndef _SYS_SYSPROTO_H_ 5148 struct vop_reclaim_args { 5149 struct vnode *a_vp; 5150 struct thread *a_td; 5151 }; 5152 #endif 5153 5154 static int 5155 zfs_freebsd_reclaim(struct vop_reclaim_args *ap) 5156 { 5157 vnode_t *vp = ap->a_vp; 5158 znode_t *zp = VTOZ(vp); 5159 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5160 5161 ASSERT3P(zp, !=, NULL); 5162 5163 #if __FreeBSD_version < 1300042 5164 /* Destroy the vm object and flush associated pages. */ 5165 vnode_destroy_vobject(vp); 5166 #endif 5167 /* 5168 * z_teardown_inactive_lock protects from a race with 5169 * zfs_znode_dmu_fini in zfsvfs_teardown during 5170 * force unmount. 5171 */ 5172 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs); 5173 if (zp->z_sa_hdl == NULL) 5174 zfs_znode_free(zp); 5175 else 5176 zfs_zinactive(zp); 5177 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs); 5178 5179 vp->v_data = NULL; 5180 return (0); 5181 } 5182 5183 #ifndef _SYS_SYSPROTO_H_ 5184 struct vop_fid_args { 5185 struct vnode *a_vp; 5186 struct fid *a_fid; 5187 }; 5188 #endif 5189 5190 static int 5191 zfs_freebsd_fid(struct vop_fid_args *ap) 5192 { 5193 5194 return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL)); 5195 } 5196 5197 5198 #ifndef _SYS_SYSPROTO_H_ 5199 struct vop_pathconf_args { 5200 struct vnode *a_vp; 5201 int a_name; 5202 register_t *a_retval; 5203 } *ap; 5204 #endif 5205 5206 static int 5207 zfs_freebsd_pathconf(struct vop_pathconf_args *ap) 5208 { 5209 ulong_t val; 5210 int error; 5211 5212 error = zfs_pathconf(ap->a_vp, ap->a_name, &val, 5213 curthread->td_ucred, NULL); 5214 if (error == 0) { 5215 *ap->a_retval = val; 5216 return (error); 5217 } 5218 if (error != EOPNOTSUPP) 5219 return (error); 5220 5221 switch (ap->a_name) { 5222 case _PC_NAME_MAX: 5223 *ap->a_retval = NAME_MAX; 5224 return (0); 5225 #if __FreeBSD_version >= 1400032 5226 case _PC_DEALLOC_PRESENT: 5227 *ap->a_retval = 1; 5228 return (0); 5229 #endif 5230 case _PC_PIPE_BUF: 5231 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) { 5232 *ap->a_retval = PIPE_BUF; 5233 return (0); 5234 } 5235 return (EINVAL); 5236 default: 5237 return (vop_stdpathconf(ap)); 5238 } 5239 } 5240 5241 /* 5242 * FreeBSD's extended attributes namespace defines file name prefix for ZFS' 5243 * extended attribute name: 5244 * 5245 * NAMESPACE PREFIX 5246 * system freebsd:system: 5247 * user (none, can be used to access ZFS fsattr(5) attributes 5248 * created on Solaris) 5249 */ 5250 static int 5251 zfs_create_attrname(int attrnamespace, const char *name, char *attrname, 5252 size_t size) 5253 { 5254 const char *namespace, *prefix, *suffix; 5255 5256 /* We don't allow '/' character in attribute name. */ 5257 if (strchr(name, '/') != NULL) 5258 return (SET_ERROR(EINVAL)); 5259 /* We don't allow attribute names that start with "freebsd:" string. */ 5260 if (strncmp(name, "freebsd:", 8) == 0) 5261 return (SET_ERROR(EINVAL)); 5262 5263 bzero(attrname, size); 5264 5265 switch (attrnamespace) { 5266 case EXTATTR_NAMESPACE_USER: 5267 #if 0 5268 prefix = "freebsd:"; 5269 namespace = EXTATTR_NAMESPACE_USER_STRING; 5270 suffix = ":"; 5271 #else 5272 /* 5273 * This is the default namespace by which we can access all 5274 * attributes created on Solaris. 5275 */ 5276 prefix = namespace = suffix = ""; 5277 #endif 5278 break; 5279 case EXTATTR_NAMESPACE_SYSTEM: 5280 prefix = "freebsd:"; 5281 namespace = EXTATTR_NAMESPACE_SYSTEM_STRING; 5282 suffix = ":"; 5283 break; 5284 case EXTATTR_NAMESPACE_EMPTY: 5285 default: 5286 return (SET_ERROR(EINVAL)); 5287 } 5288 if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix, 5289 name) >= size) { 5290 return (SET_ERROR(ENAMETOOLONG)); 5291 } 5292 return (0); 5293 } 5294 5295 static int 5296 zfs_ensure_xattr_cached(znode_t *zp) 5297 { 5298 int error = 0; 5299 5300 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); 5301 5302 if (zp->z_xattr_cached != NULL) 5303 return (0); 5304 5305 if (rw_write_held(&zp->z_xattr_lock)) 5306 return (zfs_sa_get_xattr(zp)); 5307 5308 if (!rw_tryupgrade(&zp->z_xattr_lock)) { 5309 rw_exit(&zp->z_xattr_lock); 5310 rw_enter(&zp->z_xattr_lock, RW_WRITER); 5311 } 5312 if (zp->z_xattr_cached == NULL) 5313 error = zfs_sa_get_xattr(zp); 5314 rw_downgrade(&zp->z_xattr_lock); 5315 return (error); 5316 } 5317 5318 #ifndef _SYS_SYSPROTO_H_ 5319 struct vop_getextattr { 5320 IN struct vnode *a_vp; 5321 IN int a_attrnamespace; 5322 IN const char *a_name; 5323 INOUT struct uio *a_uio; 5324 OUT size_t *a_size; 5325 IN struct ucred *a_cred; 5326 IN struct thread *a_td; 5327 }; 5328 #endif 5329 5330 static int 5331 zfs_getextattr_dir(struct vop_getextattr_args *ap, const char *attrname) 5332 { 5333 struct thread *td = ap->a_td; 5334 struct nameidata nd; 5335 struct vattr va; 5336 vnode_t *xvp = NULL, *vp; 5337 int error, flags; 5338 5339 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5340 LOOKUP_XATTR, B_FALSE); 5341 if (error != 0) 5342 return (error); 5343 5344 flags = FREAD; 5345 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, 5346 xvp, td); 5347 error = vn_open_cred(&nd, &flags, 0, VN_OPEN_INVFS, ap->a_cred, NULL); 5348 vp = nd.ni_vp; 5349 NDFREE(&nd, NDF_ONLY_PNBUF); 5350 if (error != 0) 5351 return (SET_ERROR(error)); 5352 5353 if (ap->a_size != NULL) { 5354 error = VOP_GETATTR(vp, &va, ap->a_cred); 5355 if (error == 0) 5356 *ap->a_size = (size_t)va.va_size; 5357 } else if (ap->a_uio != NULL) 5358 error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred); 5359 5360 VOP_UNLOCK1(vp); 5361 vn_close(vp, flags, ap->a_cred, td); 5362 return (error); 5363 } 5364 5365 static int 5366 zfs_getextattr_sa(struct vop_getextattr_args *ap, const char *attrname) 5367 { 5368 znode_t *zp = VTOZ(ap->a_vp); 5369 uchar_t *nv_value; 5370 uint_t nv_size; 5371 int error; 5372 5373 error = zfs_ensure_xattr_cached(zp); 5374 if (error != 0) 5375 return (error); 5376 5377 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); 5378 ASSERT3P(zp->z_xattr_cached, !=, NULL); 5379 5380 error = nvlist_lookup_byte_array(zp->z_xattr_cached, attrname, 5381 &nv_value, &nv_size); 5382 if (error != 0) 5383 return (SET_ERROR(error)); 5384 5385 if (ap->a_size != NULL) 5386 *ap->a_size = nv_size; 5387 else if (ap->a_uio != NULL) 5388 error = uiomove(nv_value, nv_size, ap->a_uio); 5389 if (error != 0) 5390 return (SET_ERROR(error)); 5391 5392 return (0); 5393 } 5394 5395 /* 5396 * Vnode operation to retrieve a named extended attribute. 5397 */ 5398 static int 5399 zfs_getextattr(struct vop_getextattr_args *ap) 5400 { 5401 znode_t *zp = VTOZ(ap->a_vp); 5402 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5403 char attrname[EXTATTR_MAXNAMELEN+1]; 5404 int error; 5405 5406 /* 5407 * If the xattr property is off, refuse the request. 5408 */ 5409 if (!(zfsvfs->z_flags & ZSB_XATTR)) 5410 return (SET_ERROR(EOPNOTSUPP)); 5411 5412 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5413 ap->a_cred, ap->a_td, VREAD); 5414 if (error != 0) 5415 return (SET_ERROR(error)); 5416 5417 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5418 sizeof (attrname)); 5419 if (error != 0) 5420 return (error); 5421 5422 error = ENOENT; 5423 ZFS_ENTER(zfsvfs); 5424 ZFS_VERIFY_ZP(zp) 5425 rw_enter(&zp->z_xattr_lock, RW_READER); 5426 if (zfsvfs->z_use_sa && zp->z_is_sa) 5427 error = zfs_getextattr_sa(ap, attrname); 5428 if (error == ENOENT) 5429 error = zfs_getextattr_dir(ap, attrname); 5430 rw_exit(&zp->z_xattr_lock); 5431 ZFS_EXIT(zfsvfs); 5432 if (error == ENOENT) 5433 error = SET_ERROR(ENOATTR); 5434 return (error); 5435 } 5436 5437 #ifndef _SYS_SYSPROTO_H_ 5438 struct vop_deleteextattr { 5439 IN struct vnode *a_vp; 5440 IN int a_attrnamespace; 5441 IN const char *a_name; 5442 IN struct ucred *a_cred; 5443 IN struct thread *a_td; 5444 }; 5445 #endif 5446 5447 static int 5448 zfs_deleteextattr_dir(struct vop_deleteextattr_args *ap, const char *attrname) 5449 { 5450 struct thread *td = ap->a_td; 5451 struct nameidata nd; 5452 vnode_t *xvp = NULL, *vp; 5453 int error; 5454 5455 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5456 LOOKUP_XATTR, B_FALSE); 5457 if (error != 0) 5458 return (error); 5459 5460 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF, 5461 UIO_SYSSPACE, attrname, xvp, td); 5462 error = namei(&nd); 5463 vp = nd.ni_vp; 5464 if (error != 0) { 5465 NDFREE(&nd, NDF_ONLY_PNBUF); 5466 return (SET_ERROR(error)); 5467 } 5468 5469 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 5470 NDFREE(&nd, NDF_ONLY_PNBUF); 5471 5472 vput(nd.ni_dvp); 5473 if (vp == nd.ni_dvp) 5474 vrele(vp); 5475 else 5476 vput(vp); 5477 5478 return (error); 5479 } 5480 5481 static int 5482 zfs_deleteextattr_sa(struct vop_deleteextattr_args *ap, const char *attrname) 5483 { 5484 znode_t *zp = VTOZ(ap->a_vp); 5485 nvlist_t *nvl; 5486 int error; 5487 5488 error = zfs_ensure_xattr_cached(zp); 5489 if (error != 0) 5490 return (error); 5491 5492 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock)); 5493 ASSERT3P(zp->z_xattr_cached, !=, NULL); 5494 5495 nvl = zp->z_xattr_cached; 5496 error = nvlist_remove(nvl, attrname, DATA_TYPE_BYTE_ARRAY); 5497 if (error != 0) 5498 error = SET_ERROR(error); 5499 else 5500 error = zfs_sa_set_xattr(zp); 5501 if (error != 0) { 5502 zp->z_xattr_cached = NULL; 5503 nvlist_free(nvl); 5504 } 5505 return (error); 5506 } 5507 5508 /* 5509 * Vnode operation to remove a named attribute. 5510 */ 5511 static int 5512 zfs_deleteextattr(struct vop_deleteextattr_args *ap) 5513 { 5514 znode_t *zp = VTOZ(ap->a_vp); 5515 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5516 char attrname[EXTATTR_MAXNAMELEN+1]; 5517 int error; 5518 5519 /* 5520 * If the xattr property is off, refuse the request. 5521 */ 5522 if (!(zfsvfs->z_flags & ZSB_XATTR)) 5523 return (SET_ERROR(EOPNOTSUPP)); 5524 5525 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5526 ap->a_cred, ap->a_td, VWRITE); 5527 if (error != 0) 5528 return (SET_ERROR(error)); 5529 5530 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5531 sizeof (attrname)); 5532 if (error != 0) 5533 return (error); 5534 5535 size_t size = 0; 5536 struct vop_getextattr_args vga = { 5537 .a_vp = ap->a_vp, 5538 .a_size = &size, 5539 .a_cred = ap->a_cred, 5540 .a_td = ap->a_td, 5541 }; 5542 error = ENOENT; 5543 ZFS_ENTER(zfsvfs); 5544 ZFS_VERIFY_ZP(zp); 5545 rw_enter(&zp->z_xattr_lock, RW_WRITER); 5546 if (zfsvfs->z_use_sa && zp->z_is_sa) { 5547 error = zfs_getextattr_sa(&vga, attrname); 5548 if (error == 0) 5549 error = zfs_deleteextattr_sa(ap, attrname); 5550 } 5551 if (error == ENOENT) { 5552 error = zfs_getextattr_dir(&vga, attrname); 5553 if (error == 0) 5554 error = zfs_deleteextattr_dir(ap, attrname); 5555 } 5556 rw_exit(&zp->z_xattr_lock); 5557 ZFS_EXIT(zfsvfs); 5558 if (error == ENOENT) 5559 error = SET_ERROR(ENOATTR); 5560 return (error); 5561 } 5562 5563 #ifndef _SYS_SYSPROTO_H_ 5564 struct vop_setextattr { 5565 IN struct vnode *a_vp; 5566 IN int a_attrnamespace; 5567 IN const char *a_name; 5568 INOUT struct uio *a_uio; 5569 IN struct ucred *a_cred; 5570 IN struct thread *a_td; 5571 }; 5572 #endif 5573 5574 static int 5575 zfs_setextattr_dir(struct vop_setextattr_args *ap, const char *attrname) 5576 { 5577 struct thread *td = ap->a_td; 5578 struct nameidata nd; 5579 struct vattr va; 5580 vnode_t *xvp = NULL, *vp; 5581 int error, flags; 5582 5583 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5584 LOOKUP_XATTR | CREATE_XATTR_DIR, B_FALSE); 5585 if (error != 0) 5586 return (error); 5587 5588 flags = FFLAGS(O_WRONLY | O_CREAT); 5589 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp, td); 5590 error = vn_open_cred(&nd, &flags, 0600, VN_OPEN_INVFS, ap->a_cred, 5591 NULL); 5592 vp = nd.ni_vp; 5593 NDFREE(&nd, NDF_ONLY_PNBUF); 5594 if (error != 0) 5595 return (SET_ERROR(error)); 5596 5597 VATTR_NULL(&va); 5598 va.va_size = 0; 5599 error = VOP_SETATTR(vp, &va, ap->a_cred); 5600 if (error == 0) 5601 VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred); 5602 5603 VOP_UNLOCK1(vp); 5604 vn_close(vp, flags, ap->a_cred, td); 5605 return (error); 5606 } 5607 5608 static int 5609 zfs_setextattr_sa(struct vop_setextattr_args *ap, const char *attrname) 5610 { 5611 znode_t *zp = VTOZ(ap->a_vp); 5612 nvlist_t *nvl; 5613 size_t sa_size; 5614 int error; 5615 5616 error = zfs_ensure_xattr_cached(zp); 5617 if (error != 0) 5618 return (error); 5619 5620 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock)); 5621 ASSERT3P(zp->z_xattr_cached, !=, NULL); 5622 5623 nvl = zp->z_xattr_cached; 5624 size_t entry_size = ap->a_uio->uio_resid; 5625 if (entry_size > DXATTR_MAX_ENTRY_SIZE) 5626 return (SET_ERROR(EFBIG)); 5627 error = nvlist_size(nvl, &sa_size, NV_ENCODE_XDR); 5628 if (error != 0) 5629 return (SET_ERROR(error)); 5630 if (sa_size > DXATTR_MAX_SA_SIZE) 5631 return (SET_ERROR(EFBIG)); 5632 uchar_t *buf = kmem_alloc(entry_size, KM_SLEEP); 5633 error = uiomove(buf, entry_size, ap->a_uio); 5634 if (error != 0) { 5635 error = SET_ERROR(error); 5636 } else { 5637 error = nvlist_add_byte_array(nvl, attrname, buf, entry_size); 5638 if (error != 0) 5639 error = SET_ERROR(error); 5640 } 5641 kmem_free(buf, entry_size); 5642 if (error == 0) 5643 error = zfs_sa_set_xattr(zp); 5644 if (error != 0) { 5645 zp->z_xattr_cached = NULL; 5646 nvlist_free(nvl); 5647 } 5648 return (error); 5649 } 5650 5651 /* 5652 * Vnode operation to set a named attribute. 5653 */ 5654 static int 5655 zfs_setextattr(struct vop_setextattr_args *ap) 5656 { 5657 znode_t *zp = VTOZ(ap->a_vp); 5658 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5659 char attrname[EXTATTR_MAXNAMELEN+1]; 5660 int error; 5661 5662 /* 5663 * If the xattr property is off, refuse the request. 5664 */ 5665 if (!(zfsvfs->z_flags & ZSB_XATTR)) 5666 return (SET_ERROR(EOPNOTSUPP)); 5667 5668 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5669 ap->a_cred, ap->a_td, VWRITE); 5670 if (error != 0) 5671 return (SET_ERROR(error)); 5672 5673 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname, 5674 sizeof (attrname)); 5675 if (error != 0) 5676 return (error); 5677 5678 struct vop_deleteextattr_args vda = { 5679 .a_vp = ap->a_vp, 5680 .a_cred = ap->a_cred, 5681 .a_td = ap->a_td, 5682 }; 5683 error = ENOENT; 5684 ZFS_ENTER(zfsvfs); 5685 ZFS_VERIFY_ZP(zp); 5686 rw_enter(&zp->z_xattr_lock, RW_WRITER); 5687 if (zfsvfs->z_use_sa && zp->z_is_sa && zfsvfs->z_xattr_sa) { 5688 error = zfs_setextattr_sa(ap, attrname); 5689 if (error == 0) 5690 /* 5691 * Successfully put into SA, we need to clear the one 5692 * in dir if present. 5693 */ 5694 zfs_deleteextattr_dir(&vda, attrname); 5695 } 5696 if (error) { 5697 error = zfs_setextattr_dir(ap, attrname); 5698 if (error == 0 && zp->z_is_sa) 5699 /* 5700 * Successfully put into dir, we need to clear the one 5701 * in SA if present. 5702 */ 5703 zfs_deleteextattr_sa(&vda, attrname); 5704 } 5705 rw_exit(&zp->z_xattr_lock); 5706 ZFS_EXIT(zfsvfs); 5707 return (error); 5708 } 5709 5710 #ifndef _SYS_SYSPROTO_H_ 5711 struct vop_listextattr { 5712 IN struct vnode *a_vp; 5713 IN int a_attrnamespace; 5714 INOUT struct uio *a_uio; 5715 OUT size_t *a_size; 5716 IN struct ucred *a_cred; 5717 IN struct thread *a_td; 5718 }; 5719 #endif 5720 5721 static int 5722 zfs_listextattr_dir(struct vop_listextattr_args *ap, const char *attrprefix) 5723 { 5724 struct thread *td = ap->a_td; 5725 struct nameidata nd; 5726 uint8_t dirbuf[sizeof (struct dirent)]; 5727 struct iovec aiov; 5728 struct uio auio; 5729 vnode_t *xvp = NULL, *vp; 5730 int error, eof; 5731 5732 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred, td, 5733 LOOKUP_XATTR, B_FALSE); 5734 if (error != 0) { 5735 /* 5736 * ENOATTR means that the EA directory does not yet exist, 5737 * i.e. there are no extended attributes there. 5738 */ 5739 if (error == ENOATTR) 5740 error = 0; 5741 return (error); 5742 } 5743 5744 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED, 5745 UIO_SYSSPACE, ".", xvp, td); 5746 error = namei(&nd); 5747 vp = nd.ni_vp; 5748 NDFREE(&nd, NDF_ONLY_PNBUF); 5749 if (error != 0) 5750 return (SET_ERROR(error)); 5751 5752 auio.uio_iov = &aiov; 5753 auio.uio_iovcnt = 1; 5754 auio.uio_segflg = UIO_SYSSPACE; 5755 auio.uio_td = td; 5756 auio.uio_rw = UIO_READ; 5757 auio.uio_offset = 0; 5758 5759 size_t plen = strlen(attrprefix); 5760 5761 do { 5762 aiov.iov_base = (void *)dirbuf; 5763 aiov.iov_len = sizeof (dirbuf); 5764 auio.uio_resid = sizeof (dirbuf); 5765 error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL); 5766 if (error != 0) 5767 break; 5768 int done = sizeof (dirbuf) - auio.uio_resid; 5769 for (int pos = 0; pos < done; ) { 5770 struct dirent *dp = (struct dirent *)(dirbuf + pos); 5771 pos += dp->d_reclen; 5772 /* 5773 * XXX: Temporarily we also accept DT_UNKNOWN, as this 5774 * is what we get when attribute was created on Solaris. 5775 */ 5776 if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN) 5777 continue; 5778 else if (plen == 0 && 5779 strncmp(dp->d_name, "freebsd:", 8) == 0) 5780 continue; 5781 else if (strncmp(dp->d_name, attrprefix, plen) != 0) 5782 continue; 5783 uint8_t nlen = dp->d_namlen - plen; 5784 if (ap->a_size != NULL) { 5785 *ap->a_size += 1 + nlen; 5786 } else if (ap->a_uio != NULL) { 5787 /* 5788 * Format of extattr name entry is one byte for 5789 * length and the rest for name. 5790 */ 5791 error = uiomove(&nlen, 1, ap->a_uio); 5792 if (error == 0) { 5793 char *namep = dp->d_name + plen; 5794 error = uiomove(namep, nlen, ap->a_uio); 5795 } 5796 if (error != 0) { 5797 error = SET_ERROR(error); 5798 break; 5799 } 5800 } 5801 } 5802 } while (!eof && error == 0); 5803 5804 vput(vp); 5805 return (error); 5806 } 5807 5808 static int 5809 zfs_listextattr_sa(struct vop_listextattr_args *ap, const char *attrprefix) 5810 { 5811 znode_t *zp = VTOZ(ap->a_vp); 5812 int error; 5813 5814 error = zfs_ensure_xattr_cached(zp); 5815 if (error != 0) 5816 return (error); 5817 5818 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock)); 5819 ASSERT3P(zp->z_xattr_cached, !=, NULL); 5820 5821 size_t plen = strlen(attrprefix); 5822 nvpair_t *nvp = NULL; 5823 while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) { 5824 ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY); 5825 5826 const char *name = nvpair_name(nvp); 5827 if (plen == 0 && strncmp(name, "freebsd:", 8) == 0) 5828 continue; 5829 else if (strncmp(name, attrprefix, plen) != 0) 5830 continue; 5831 uint8_t nlen = strlen(name) - plen; 5832 if (ap->a_size != NULL) { 5833 *ap->a_size += 1 + nlen; 5834 } else if (ap->a_uio != NULL) { 5835 /* 5836 * Format of extattr name entry is one byte for 5837 * length and the rest for name. 5838 */ 5839 error = uiomove(&nlen, 1, ap->a_uio); 5840 if (error == 0) { 5841 char *namep = __DECONST(char *, name) + plen; 5842 error = uiomove(namep, nlen, ap->a_uio); 5843 } 5844 if (error != 0) { 5845 error = SET_ERROR(error); 5846 break; 5847 } 5848 } 5849 } 5850 5851 return (error); 5852 } 5853 5854 /* 5855 * Vnode operation to retrieve extended attributes on a vnode. 5856 */ 5857 static int 5858 zfs_listextattr(struct vop_listextattr_args *ap) 5859 { 5860 znode_t *zp = VTOZ(ap->a_vp); 5861 zfsvfs_t *zfsvfs = ZTOZSB(zp); 5862 char attrprefix[16]; 5863 int error; 5864 5865 if (ap->a_size != NULL) 5866 *ap->a_size = 0; 5867 5868 /* 5869 * If the xattr property is off, refuse the request. 5870 */ 5871 if (!(zfsvfs->z_flags & ZSB_XATTR)) 5872 return (SET_ERROR(EOPNOTSUPP)); 5873 5874 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 5875 ap->a_cred, ap->a_td, VREAD); 5876 if (error != 0) 5877 return (SET_ERROR(error)); 5878 5879 error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix, 5880 sizeof (attrprefix)); 5881 if (error != 0) 5882 return (error); 5883 5884 ZFS_ENTER(zfsvfs); 5885 ZFS_VERIFY_ZP(zp); 5886 rw_enter(&zp->z_xattr_lock, RW_READER); 5887 if (zfsvfs->z_use_sa && zp->z_is_sa) 5888 error = zfs_listextattr_sa(ap, attrprefix); 5889 if (error == 0) 5890 error = zfs_listextattr_dir(ap, attrprefix); 5891 rw_exit(&zp->z_xattr_lock); 5892 ZFS_EXIT(zfsvfs); 5893 return (error); 5894 } 5895 5896 #ifndef _SYS_SYSPROTO_H_ 5897 struct vop_getacl_args { 5898 struct vnode *vp; 5899 acl_type_t type; 5900 struct acl *aclp; 5901 struct ucred *cred; 5902 struct thread *td; 5903 }; 5904 #endif 5905 5906 static int 5907 zfs_freebsd_getacl(struct vop_getacl_args *ap) 5908 { 5909 int error; 5910 vsecattr_t vsecattr; 5911 5912 if (ap->a_type != ACL_TYPE_NFS4) 5913 return (EINVAL); 5914 5915 vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; 5916 if ((error = zfs_getsecattr(VTOZ(ap->a_vp), 5917 &vsecattr, 0, ap->a_cred))) 5918 return (error); 5919 5920 error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, 5921 vsecattr.vsa_aclcnt); 5922 if (vsecattr.vsa_aclentp != NULL) 5923 kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz); 5924 5925 return (error); 5926 } 5927 5928 #ifndef _SYS_SYSPROTO_H_ 5929 struct vop_setacl_args { 5930 struct vnode *vp; 5931 acl_type_t type; 5932 struct acl *aclp; 5933 struct ucred *cred; 5934 struct thread *td; 5935 }; 5936 #endif 5937 5938 static int 5939 zfs_freebsd_setacl(struct vop_setacl_args *ap) 5940 { 5941 int error; 5942 vsecattr_t vsecattr; 5943 int aclbsize; /* size of acl list in bytes */ 5944 aclent_t *aaclp; 5945 5946 if (ap->a_type != ACL_TYPE_NFS4) 5947 return (EINVAL); 5948 5949 if (ap->a_aclp == NULL) 5950 return (EINVAL); 5951 5952 if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES) 5953 return (EINVAL); 5954 5955 /* 5956 * With NFSv4 ACLs, chmod(2) may need to add additional entries, 5957 * splitting every entry into two and appending "canonical six" 5958 * entries at the end. Don't allow for setting an ACL that would 5959 * cause chmod(2) to run out of ACL entries. 5960 */ 5961 if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES) 5962 return (ENOSPC); 5963 5964 error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR); 5965 if (error != 0) 5966 return (error); 5967 5968 vsecattr.vsa_mask = VSA_ACE; 5969 aclbsize = ap->a_aclp->acl_cnt * sizeof (ace_t); 5970 vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP); 5971 aaclp = vsecattr.vsa_aclentp; 5972 vsecattr.vsa_aclentsz = aclbsize; 5973 5974 aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp); 5975 error = zfs_setsecattr(VTOZ(ap->a_vp), &vsecattr, 0, ap->a_cred); 5976 kmem_free(aaclp, aclbsize); 5977 5978 return (error); 5979 } 5980 5981 #ifndef _SYS_SYSPROTO_H_ 5982 struct vop_aclcheck_args { 5983 struct vnode *vp; 5984 acl_type_t type; 5985 struct acl *aclp; 5986 struct ucred *cred; 5987 struct thread *td; 5988 }; 5989 #endif 5990 5991 static int 5992 zfs_freebsd_aclcheck(struct vop_aclcheck_args *ap) 5993 { 5994 5995 return (EOPNOTSUPP); 5996 } 5997 5998 static int 5999 zfs_vptocnp(struct vop_vptocnp_args *ap) 6000 { 6001 vnode_t *covered_vp; 6002 vnode_t *vp = ap->a_vp; 6003 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 6004 znode_t *zp = VTOZ(vp); 6005 int ltype; 6006 int error; 6007 6008 ZFS_ENTER(zfsvfs); 6009 ZFS_VERIFY_ZP(zp); 6010 6011 /* 6012 * If we are a snapshot mounted under .zfs, run the operation 6013 * on the covered vnode. 6014 */ 6015 if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) { 6016 char name[MAXNAMLEN + 1]; 6017 znode_t *dzp; 6018 size_t len; 6019 6020 error = zfs_znode_parent_and_name(zp, &dzp, name); 6021 if (error == 0) { 6022 len = strlen(name); 6023 if (*ap->a_buflen < len) 6024 error = SET_ERROR(ENOMEM); 6025 } 6026 if (error == 0) { 6027 *ap->a_buflen -= len; 6028 bcopy(name, ap->a_buf + *ap->a_buflen, len); 6029 *ap->a_vpp = ZTOV(dzp); 6030 } 6031 ZFS_EXIT(zfsvfs); 6032 return (error); 6033 } 6034 ZFS_EXIT(zfsvfs); 6035 6036 covered_vp = vp->v_mount->mnt_vnodecovered; 6037 #if __FreeBSD_version >= 1300045 6038 enum vgetstate vs = vget_prep(covered_vp); 6039 #else 6040 vhold(covered_vp); 6041 #endif 6042 ltype = VOP_ISLOCKED(vp); 6043 VOP_UNLOCK1(vp); 6044 #if __FreeBSD_version >= 1300045 6045 error = vget_finish(covered_vp, LK_SHARED, vs); 6046 #else 6047 error = vget(covered_vp, LK_SHARED | LK_VNHELD, curthread); 6048 #endif 6049 if (error == 0) { 6050 #if __FreeBSD_version >= 1300123 6051 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_buf, 6052 ap->a_buflen); 6053 #else 6054 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred, 6055 ap->a_buf, ap->a_buflen); 6056 #endif 6057 vput(covered_vp); 6058 } 6059 vn_lock(vp, ltype | LK_RETRY); 6060 if (VN_IS_DOOMED(vp)) 6061 error = SET_ERROR(ENOENT); 6062 return (error); 6063 } 6064 6065 #if __FreeBSD_version >= 1400032 6066 static int 6067 zfs_deallocate(struct vop_deallocate_args *ap) 6068 { 6069 znode_t *zp = VTOZ(ap->a_vp); 6070 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 6071 zilog_t *zilog; 6072 off_t off, len, file_sz; 6073 int error; 6074 6075 ZFS_ENTER(zfsvfs); 6076 ZFS_VERIFY_ZP(zp); 6077 6078 /* 6079 * Callers might not be able to detect properly that we are read-only, 6080 * so check it explicitly here. 6081 */ 6082 if (zfs_is_readonly(zfsvfs)) { 6083 ZFS_EXIT(zfsvfs); 6084 return (SET_ERROR(EROFS)); 6085 } 6086 6087 zilog = zfsvfs->z_log; 6088 off = *ap->a_offset; 6089 len = *ap->a_len; 6090 file_sz = zp->z_size; 6091 if (off + len > file_sz) 6092 len = file_sz - off; 6093 /* Fast path for out-of-range request. */ 6094 if (len <= 0) { 6095 *ap->a_len = 0; 6096 ZFS_EXIT(zfsvfs); 6097 return (0); 6098 } 6099 6100 error = zfs_freesp(zp, off, len, O_RDWR, TRUE); 6101 if (error == 0) { 6102 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS || 6103 (ap->a_ioflag & IO_SYNC) != 0) 6104 zil_commit(zilog, zp->z_id); 6105 *ap->a_offset = off + len; 6106 *ap->a_len = 0; 6107 } 6108 6109 ZFS_EXIT(zfsvfs); 6110 return (error); 6111 } 6112 #endif 6113 6114 struct vop_vector zfs_vnodeops; 6115 struct vop_vector zfs_fifoops; 6116 struct vop_vector zfs_shareops; 6117 6118 struct vop_vector zfs_vnodeops = { 6119 .vop_default = &default_vnodeops, 6120 .vop_inactive = zfs_freebsd_inactive, 6121 #if __FreeBSD_version >= 1300042 6122 .vop_need_inactive = zfs_freebsd_need_inactive, 6123 #endif 6124 .vop_reclaim = zfs_freebsd_reclaim, 6125 #if __FreeBSD_version >= 1300102 6126 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec, 6127 #endif 6128 #if __FreeBSD_version >= 1300139 6129 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink, 6130 #endif 6131 .vop_access = zfs_freebsd_access, 6132 .vop_allocate = VOP_EINVAL, 6133 #if __FreeBSD_version >= 1400032 6134 .vop_deallocate = zfs_deallocate, 6135 #endif 6136 .vop_lookup = zfs_cache_lookup, 6137 .vop_cachedlookup = zfs_freebsd_cachedlookup, 6138 .vop_getattr = zfs_freebsd_getattr, 6139 .vop_setattr = zfs_freebsd_setattr, 6140 .vop_create = zfs_freebsd_create, 6141 .vop_mknod = (vop_mknod_t *)zfs_freebsd_create, 6142 .vop_mkdir = zfs_freebsd_mkdir, 6143 .vop_readdir = zfs_freebsd_readdir, 6144 .vop_fsync = zfs_freebsd_fsync, 6145 .vop_open = zfs_freebsd_open, 6146 .vop_close = zfs_freebsd_close, 6147 .vop_rmdir = zfs_freebsd_rmdir, 6148 .vop_ioctl = zfs_freebsd_ioctl, 6149 .vop_link = zfs_freebsd_link, 6150 .vop_symlink = zfs_freebsd_symlink, 6151 .vop_readlink = zfs_freebsd_readlink, 6152 .vop_read = zfs_freebsd_read, 6153 .vop_write = zfs_freebsd_write, 6154 .vop_remove = zfs_freebsd_remove, 6155 .vop_rename = zfs_freebsd_rename, 6156 .vop_pathconf = zfs_freebsd_pathconf, 6157 .vop_bmap = zfs_freebsd_bmap, 6158 .vop_fid = zfs_freebsd_fid, 6159 .vop_getextattr = zfs_getextattr, 6160 .vop_deleteextattr = zfs_deleteextattr, 6161 .vop_setextattr = zfs_setextattr, 6162 .vop_listextattr = zfs_listextattr, 6163 .vop_getacl = zfs_freebsd_getacl, 6164 .vop_setacl = zfs_freebsd_setacl, 6165 .vop_aclcheck = zfs_freebsd_aclcheck, 6166 .vop_getpages = zfs_freebsd_getpages, 6167 .vop_putpages = zfs_freebsd_putpages, 6168 .vop_vptocnp = zfs_vptocnp, 6169 #if __FreeBSD_version >= 1300064 6170 .vop_lock1 = vop_lock, 6171 .vop_unlock = vop_unlock, 6172 .vop_islocked = vop_islocked, 6173 #endif 6174 }; 6175 VFS_VOP_VECTOR_REGISTER(zfs_vnodeops); 6176 6177 struct vop_vector zfs_fifoops = { 6178 .vop_default = &fifo_specops, 6179 .vop_fsync = zfs_freebsd_fsync, 6180 #if __FreeBSD_version >= 1300102 6181 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec, 6182 #endif 6183 #if __FreeBSD_version >= 1300139 6184 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink, 6185 #endif 6186 .vop_access = zfs_freebsd_access, 6187 .vop_getattr = zfs_freebsd_getattr, 6188 .vop_inactive = zfs_freebsd_inactive, 6189 .vop_read = VOP_PANIC, 6190 .vop_reclaim = zfs_freebsd_reclaim, 6191 .vop_setattr = zfs_freebsd_setattr, 6192 .vop_write = VOP_PANIC, 6193 .vop_pathconf = zfs_freebsd_pathconf, 6194 .vop_fid = zfs_freebsd_fid, 6195 .vop_getacl = zfs_freebsd_getacl, 6196 .vop_setacl = zfs_freebsd_setacl, 6197 .vop_aclcheck = zfs_freebsd_aclcheck, 6198 }; 6199 VFS_VOP_VECTOR_REGISTER(zfs_fifoops); 6200 6201 /* 6202 * special share hidden files vnode operations template 6203 */ 6204 struct vop_vector zfs_shareops = { 6205 .vop_default = &default_vnodeops, 6206 #if __FreeBSD_version >= 1300121 6207 .vop_fplookup_vexec = VOP_EAGAIN, 6208 #endif 6209 #if __FreeBSD_version >= 1300139 6210 .vop_fplookup_symlink = VOP_EAGAIN, 6211 #endif 6212 .vop_access = zfs_freebsd_access, 6213 .vop_inactive = zfs_freebsd_inactive, 6214 .vop_reclaim = zfs_freebsd_reclaim, 6215 .vop_fid = zfs_freebsd_fid, 6216 .vop_pathconf = zfs_freebsd_pathconf, 6217 }; 6218 VFS_VOP_VECTOR_REGISTER(zfs_shareops); 6219