1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, 2014 by Delphix. All rights reserved. 24 * Copyright (c) 2014 Integros [integros.com] 25 */ 26 27 /* Portions Copyright 2007 Jeremy Teo */ 28 /* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */ 29 30 #ifdef _KERNEL 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/time.h> 34 #include <sys/systm.h> 35 #include <sys/sysmacros.h> 36 #include <sys/resource.h> 37 #include <sys/mntent.h> 38 #include <sys/u8_textprep.h> 39 #include <sys/dsl_dataset.h> 40 #include <sys/vfs.h> 41 #include <sys/vnode.h> 42 #include <sys/file.h> 43 #include <sys/kmem.h> 44 #include <sys/errno.h> 45 #include <sys/unistd.h> 46 #include <sys/atomic.h> 47 #include <sys/zfs_dir.h> 48 #include <sys/zfs_acl.h> 49 #include <sys/zfs_ioctl.h> 50 #include <sys/zfs_rlock.h> 51 #include <sys/zfs_fuid.h> 52 #include <sys/dnode.h> 53 #include <sys/fs/zfs.h> 54 #endif /* _KERNEL */ 55 56 #include <sys/dmu.h> 57 #include <sys/dmu_objset.h> 58 #include <sys/dmu_tx.h> 59 #include <sys/zfs_refcount.h> 60 #include <sys/stat.h> 61 #include <sys/zap.h> 62 #include <sys/zfs_znode.h> 63 #include <sys/sa.h> 64 #include <sys/zfs_sa.h> 65 #include <sys/zfs_stat.h> 66 67 #include "zfs_prop.h" 68 #include "zfs_comutil.h" 69 70 /* Used by fstat(1). */ 71 SYSCTL_INT(_debug_sizeof, OID_AUTO, znode, CTLFLAG_RD, 72 SYSCTL_NULL_INT_PTR, sizeof (znode_t), "sizeof(znode_t)"); 73 74 /* 75 * Define ZNODE_STATS to turn on statistic gathering. By default, it is only 76 * turned on when DEBUG is also defined. 77 */ 78 #ifdef ZFS_DEBUG 79 #define ZNODE_STATS 80 #endif /* DEBUG */ 81 82 #ifdef ZNODE_STATS 83 #define ZNODE_STAT_ADD(stat) ((stat)++) 84 #else 85 #define ZNODE_STAT_ADD(stat) /* nothing */ 86 #endif /* ZNODE_STATS */ 87 88 /* 89 * Functions needed for userland (ie: libzpool) are not put under 90 * #ifdef_KERNEL; the rest of the functions have dependencies 91 * (such as VFS logic) that will not compile easily in userland. 92 */ 93 #ifdef _KERNEL 94 #if !defined(KMEM_DEBUG) && __FreeBSD_version >= 1300102 95 #define _ZFS_USE_SMR 96 static uma_zone_t znode_uma_zone; 97 #else 98 static kmem_cache_t *znode_cache = NULL; 99 #endif 100 101 extern struct vop_vector zfs_vnodeops; 102 extern struct vop_vector zfs_fifoops; 103 extern struct vop_vector zfs_shareops; 104 105 106 /* 107 * This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on 108 * z_rangelock. It will modify the offset and length of the lock to reflect 109 * znode-specific information, and convert RL_APPEND to RL_WRITER. This is 110 * called with the rangelock_t's rl_lock held, which avoids races. 111 */ 112 static void 113 zfs_rangelock_cb(zfs_locked_range_t *new, void *arg) 114 { 115 znode_t *zp = arg; 116 117 /* 118 * If in append mode, convert to writer and lock starting at the 119 * current end of file. 120 */ 121 if (new->lr_type == RL_APPEND) { 122 new->lr_offset = zp->z_size; 123 new->lr_type = RL_WRITER; 124 } 125 126 /* 127 * If we need to grow the block size then lock the whole file range. 128 */ 129 uint64_t end_size = MAX(zp->z_size, new->lr_offset + new->lr_length); 130 if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) || 131 zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) { 132 new->lr_offset = 0; 133 new->lr_length = UINT64_MAX; 134 } 135 } 136 137 static int 138 zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) 139 { 140 znode_t *zp = buf; 141 142 POINTER_INVALIDATE(&zp->z_zfsvfs); 143 144 list_link_init(&zp->z_link_node); 145 146 mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); 147 148 zfs_rangelock_init(&zp->z_rangelock, zfs_rangelock_cb, zp); 149 150 zp->z_acl_cached = NULL; 151 zp->z_vnode = NULL; 152 zp->z_moved = 0; 153 return (0); 154 } 155 156 /*ARGSUSED*/ 157 static void 158 zfs_znode_cache_destructor(void *buf, void *arg) 159 { 160 znode_t *zp = buf; 161 162 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); 163 ASSERT3P(zp->z_vnode, ==, NULL); 164 ASSERT(!list_link_active(&zp->z_link_node)); 165 mutex_destroy(&zp->z_acl_lock); 166 zfs_rangelock_fini(&zp->z_rangelock); 167 168 ASSERT(zp->z_acl_cached == NULL); 169 } 170 171 172 #ifdef _ZFS_USE_SMR 173 VFS_SMR_DECLARE; 174 175 static int 176 zfs_znode_cache_constructor_smr(void *mem, int size __unused, void *private, 177 int flags) 178 { 179 180 return (zfs_znode_cache_constructor(mem, private, flags)); 181 } 182 183 static void 184 zfs_znode_cache_destructor_smr(void *mem, int size __unused, void *private) 185 { 186 187 zfs_znode_cache_destructor(mem, private); 188 } 189 190 void 191 zfs_znode_init(void) 192 { 193 /* 194 * Initialize zcache 195 */ 196 ASSERT(znode_uma_zone == NULL); 197 znode_uma_zone = uma_zcreate("zfs_znode_cache", 198 sizeof (znode_t), zfs_znode_cache_constructor_smr, 199 zfs_znode_cache_destructor_smr, NULL, NULL, 0, 0); 200 VFS_SMR_ZONE_SET(znode_uma_zone); 201 } 202 203 static znode_t * 204 zfs_znode_alloc_kmem(int flags) 205 { 206 207 return (uma_zalloc_smr(znode_uma_zone, flags)); 208 } 209 210 static void 211 zfs_znode_free_kmem(znode_t *zp) 212 { 213 214 uma_zfree_smr(znode_uma_zone, zp); 215 } 216 #else 217 void 218 zfs_znode_init(void) 219 { 220 /* 221 * Initialize zcache 222 */ 223 ASSERT(znode_cache == NULL); 224 znode_cache = kmem_cache_create("zfs_znode_cache", 225 sizeof (znode_t), 0, zfs_znode_cache_constructor, 226 zfs_znode_cache_destructor, NULL, NULL, NULL, 0); 227 } 228 229 static znode_t * 230 zfs_znode_alloc_kmem(int flags) 231 { 232 233 return (kmem_cache_alloc(znode_cache, flags)); 234 } 235 236 static void 237 zfs_znode_free_kmem(znode_t *zp) 238 { 239 240 kmem_cache_free(znode_cache, zp); 241 } 242 #endif 243 244 void 245 zfs_znode_fini(void) 246 { 247 /* 248 * Cleanup zcache 249 */ 250 #ifdef _ZFS_USE_SMR 251 if (znode_uma_zone) { 252 uma_zdestroy(znode_uma_zone); 253 znode_uma_zone = NULL; 254 } 255 #else 256 if (znode_cache) { 257 kmem_cache_destroy(znode_cache); 258 znode_cache = NULL; 259 } 260 #endif 261 } 262 263 264 static int 265 zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) 266 { 267 zfs_acl_ids_t acl_ids; 268 vattr_t vattr; 269 znode_t *sharezp; 270 znode_t *zp; 271 int error; 272 273 vattr.va_mask = AT_MODE|AT_UID|AT_GID; 274 vattr.va_type = VDIR; 275 vattr.va_mode = S_IFDIR|0555; 276 vattr.va_uid = crgetuid(kcred); 277 vattr.va_gid = crgetgid(kcred); 278 279 sharezp = zfs_znode_alloc_kmem(KM_SLEEP); 280 ASSERT(!POINTER_IS_VALID(sharezp->z_zfsvfs)); 281 sharezp->z_moved = 0; 282 sharezp->z_unlinked = 0; 283 sharezp->z_atime_dirty = 0; 284 sharezp->z_zfsvfs = zfsvfs; 285 sharezp->z_is_sa = zfsvfs->z_use_sa; 286 287 VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr, 288 kcred, NULL, &acl_ids)); 289 zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids); 290 ASSERT3P(zp, ==, sharezp); 291 POINTER_INVALIDATE(&sharezp->z_zfsvfs); 292 error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, 293 ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx); 294 zfsvfs->z_shares_dir = sharezp->z_id; 295 296 zfs_acl_ids_free(&acl_ids); 297 sa_handle_destroy(sharezp->z_sa_hdl); 298 zfs_znode_free_kmem(sharezp); 299 300 return (error); 301 } 302 303 /* 304 * define a couple of values we need available 305 * for both 64 and 32 bit environments. 306 */ 307 #ifndef NBITSMINOR64 308 #define NBITSMINOR64 32 309 #endif 310 #ifndef MAXMAJ64 311 #define MAXMAJ64 0xffffffffUL 312 #endif 313 #ifndef MAXMIN64 314 #define MAXMIN64 0xffffffffUL 315 #endif 316 317 /* 318 * Create special expldev for ZFS private use. 319 * Can't use standard expldev since it doesn't do 320 * what we want. The standard expldev() takes a 321 * dev32_t in LP64 and expands it to a long dev_t. 322 * We need an interface that takes a dev32_t in ILP32 323 * and expands it to a long dev_t. 324 */ 325 static uint64_t 326 zfs_expldev(dev_t dev) 327 { 328 return (((uint64_t)major(dev) << NBITSMINOR64) | minor(dev)); 329 } 330 /* 331 * Special cmpldev for ZFS private use. 332 * Can't use standard cmpldev since it takes 333 * a long dev_t and compresses it to dev32_t in 334 * LP64. We need to do a compaction of a long dev_t 335 * to a dev32_t in ILP32. 336 */ 337 dev_t 338 zfs_cmpldev(uint64_t dev) 339 { 340 return (makedev((dev >> NBITSMINOR64), (dev & MAXMIN64))); 341 } 342 343 static void 344 zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp, 345 dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl) 346 { 347 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs)); 348 ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id))); 349 350 ASSERT(zp->z_sa_hdl == NULL); 351 ASSERT(zp->z_acl_cached == NULL); 352 if (sa_hdl == NULL) { 353 VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp, 354 SA_HDL_SHARED, &zp->z_sa_hdl)); 355 } else { 356 zp->z_sa_hdl = sa_hdl; 357 sa_set_userp(sa_hdl, zp); 358 } 359 360 zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE; 361 362 /* 363 * Slap on VROOT if we are the root znode unless we are the root 364 * node of a snapshot mounted under .zfs. 365 */ 366 if (zp->z_id == zfsvfs->z_root && zfsvfs->z_parent == zfsvfs) 367 ZTOV(zp)->v_flag |= VROOT; 368 369 vn_exists(ZTOV(zp)); 370 } 371 372 void 373 zfs_znode_dmu_fini(znode_t *zp) 374 { 375 ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) || 376 zp->z_unlinked || 377 ZFS_TEARDOWN_INACTIVE_WLOCKED(zp->z_zfsvfs)); 378 379 sa_handle_destroy(zp->z_sa_hdl); 380 zp->z_sa_hdl = NULL; 381 } 382 383 static void 384 zfs_vnode_forget(vnode_t *vp) 385 { 386 387 /* copied from insmntque_stddtr */ 388 vp->v_data = NULL; 389 vp->v_op = &dead_vnodeops; 390 vgone(vp); 391 vput(vp); 392 } 393 394 /* 395 * Construct a new znode/vnode and initialize. 396 * 397 * This does not do a call to dmu_set_user() that is 398 * up to the caller to do, in case you don't want to 399 * return the znode 400 */ 401 static znode_t * 402 zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, 403 dmu_object_type_t obj_type, sa_handle_t *hdl) 404 { 405 znode_t *zp; 406 vnode_t *vp; 407 uint64_t mode; 408 uint64_t parent; 409 #ifdef notyet 410 uint64_t mtime[2], ctime[2]; 411 #endif 412 uint64_t projid = ZFS_DEFAULT_PROJID; 413 sa_bulk_attr_t bulk[9]; 414 int count = 0; 415 int error; 416 417 zp = zfs_znode_alloc_kmem(KM_SLEEP); 418 419 #ifndef _ZFS_USE_SMR 420 KASSERT((zfsvfs->z_parent->z_vfs->mnt_kern_flag & MNTK_FPLOOKUP) == 0, 421 ("%s: fast path lookup enabled without smr", __func__)); 422 #endif 423 424 #if __FreeBSD_version >= 1300076 425 KASSERT(curthread->td_vp_reserved != NULL, 426 ("zfs_znode_alloc: getnewvnode without any vnodes reserved")); 427 #else 428 KASSERT(curthread->td_vp_reserv > 0, 429 ("zfs_znode_alloc: getnewvnode without any vnodes reserved")); 430 #endif 431 error = getnewvnode("zfs", zfsvfs->z_parent->z_vfs, &zfs_vnodeops, &vp); 432 if (error != 0) { 433 zfs_znode_free_kmem(zp); 434 return (NULL); 435 } 436 zp->z_vnode = vp; 437 vp->v_data = zp; 438 439 ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); 440 zp->z_moved = 0; 441 442 zp->z_sa_hdl = NULL; 443 zp->z_unlinked = 0; 444 zp->z_atime_dirty = 0; 445 zp->z_mapcnt = 0; 446 zp->z_id = db->db_object; 447 zp->z_blksz = blksz; 448 zp->z_seq = 0x7A4653; 449 zp->z_sync_cnt = 0; 450 451 vp = ZTOV(zp); 452 453 zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl); 454 455 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); 456 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8); 457 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 458 &zp->z_size, 8); 459 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 460 &zp->z_links, 8); 461 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 462 &zp->z_pflags, 8); 463 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8); 464 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 465 &zp->z_atime, 16); 466 #ifdef notyet 467 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 468 &mtime, 16); 469 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 470 &ctime, 16); 471 #endif 472 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 473 &zp->z_uid, 8); 474 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, 475 &zp->z_gid, 8); 476 477 if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0 || 478 (dmu_objset_projectquota_enabled(zfsvfs->z_os) && 479 (zp->z_pflags & ZFS_PROJID) && 480 sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs), &projid, 8) != 0)) { 481 if (hdl == NULL) 482 sa_handle_destroy(zp->z_sa_hdl); 483 zfs_vnode_forget(vp); 484 zp->z_vnode = NULL; 485 zfs_znode_free_kmem(zp); 486 return (NULL); 487 } 488 489 zp->z_projid = projid; 490 zp->z_mode = mode; 491 492 /* Cache the xattr parent id */ 493 if (zp->z_pflags & ZFS_XATTR) 494 zp->z_xattr_parent = parent; 495 496 vp->v_type = IFTOVT((mode_t)mode); 497 498 switch (vp->v_type) { 499 case VDIR: 500 zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */ 501 break; 502 case VFIFO: 503 vp->v_op = &zfs_fifoops; 504 break; 505 case VREG: 506 if (parent == zfsvfs->z_shares_dir) { 507 ASSERT(zp->z_uid == 0 && zp->z_gid == 0); 508 vp->v_op = &zfs_shareops; 509 } 510 break; 511 default: 512 break; 513 } 514 515 mutex_enter(&zfsvfs->z_znodes_lock); 516 list_insert_tail(&zfsvfs->z_all_znodes, zp); 517 zfsvfs->z_nr_znodes++; 518 zp->z_zfsvfs = zfsvfs; 519 mutex_exit(&zfsvfs->z_znodes_lock); 520 521 /* 522 * Acquire vnode lock before making it available to the world. 523 */ 524 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 525 VN_LOCK_AREC(vp); 526 if (vp->v_type != VFIFO) 527 VN_LOCK_ASHARE(vp); 528 529 return (zp); 530 } 531 532 static uint64_t empty_xattr; 533 static uint64_t pad[4]; 534 static zfs_acl_phys_t acl_phys; 535 /* 536 * Create a new DMU object to hold a zfs znode. 537 * 538 * IN: dzp - parent directory for new znode 539 * vap - file attributes for new znode 540 * tx - dmu transaction id for zap operations 541 * cr - credentials of caller 542 * flag - flags: 543 * IS_ROOT_NODE - new object will be root 544 * IS_XATTR - new object is an attribute 545 * bonuslen - length of bonus buffer 546 * setaclp - File/Dir initial ACL 547 * fuidp - Tracks fuid allocation. 548 * 549 * OUT: zpp - allocated znode 550 * 551 */ 552 void 553 zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, 554 uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids) 555 { 556 uint64_t crtime[2], atime[2], mtime[2], ctime[2]; 557 uint64_t mode, size, links, parent, pflags; 558 uint64_t dzp_pflags = 0; 559 uint64_t rdev = 0; 560 zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 561 dmu_buf_t *db; 562 timestruc_t now; 563 uint64_t gen, obj; 564 int err; 565 int bonuslen; 566 int dnodesize; 567 sa_handle_t *sa_hdl; 568 dmu_object_type_t obj_type; 569 sa_bulk_attr_t *sa_attrs; 570 int cnt = 0; 571 zfs_acl_locator_cb_t locate = { 0 }; 572 573 ASSERT(vap && ((vap->va_mask & AT_MODE) == AT_MODE)); 574 575 if (zfsvfs->z_replay) { 576 obj = vap->va_nodeid; 577 now = vap->va_ctime; /* see zfs_replay_create() */ 578 gen = vap->va_nblocks; /* ditto */ 579 dnodesize = vap->va_fsid; /* ditto */ 580 } else { 581 obj = 0; 582 vfs_timestamp(&now); 583 gen = dmu_tx_get_txg(tx); 584 dnodesize = dmu_objset_dnodesize(zfsvfs->z_os); 585 } 586 587 if (dnodesize == 0) 588 dnodesize = DNODE_MIN_SIZE; 589 590 obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE; 591 bonuslen = (obj_type == DMU_OT_SA) ? 592 DN_BONUS_SIZE(dnodesize) : ZFS_OLD_ZNODE_PHYS_SIZE; 593 594 /* 595 * Create a new DMU object. 596 */ 597 /* 598 * There's currently no mechanism for pre-reading the blocks that will 599 * be needed to allocate a new object, so we accept the small chance 600 * that there will be an i/o error and we will fail one of the 601 * assertions below. 602 */ 603 if (vap->va_type == VDIR) { 604 if (zfsvfs->z_replay) { 605 VERIFY0(zap_create_claim_norm_dnsize(zfsvfs->z_os, obj, 606 zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, 607 obj_type, bonuslen, dnodesize, tx)); 608 } else { 609 obj = zap_create_norm_dnsize(zfsvfs->z_os, 610 zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, 611 obj_type, bonuslen, dnodesize, tx); 612 } 613 } else { 614 if (zfsvfs->z_replay) { 615 VERIFY0(dmu_object_claim_dnsize(zfsvfs->z_os, obj, 616 DMU_OT_PLAIN_FILE_CONTENTS, 0, 617 obj_type, bonuslen, dnodesize, tx)); 618 } else { 619 obj = dmu_object_alloc_dnsize(zfsvfs->z_os, 620 DMU_OT_PLAIN_FILE_CONTENTS, 0, 621 obj_type, bonuslen, dnodesize, tx); 622 } 623 } 624 625 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); 626 VERIFY(0 == sa_buf_hold(zfsvfs->z_os, obj, NULL, &db)); 627 628 /* 629 * If this is the root, fix up the half-initialized parent pointer 630 * to reference the just-allocated physical data area. 631 */ 632 if (flag & IS_ROOT_NODE) { 633 dzp->z_id = obj; 634 } else { 635 dzp_pflags = dzp->z_pflags; 636 } 637 638 /* 639 * If parent is an xattr, so am I. 640 */ 641 if (dzp_pflags & ZFS_XATTR) { 642 flag |= IS_XATTR; 643 } 644 645 if (zfsvfs->z_use_fuids) 646 pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; 647 else 648 pflags = 0; 649 650 if (vap->va_type == VDIR) { 651 size = 2; /* contents ("." and "..") */ 652 links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1; 653 } else { 654 size = links = 0; 655 } 656 657 if (vap->va_type == VBLK || vap->va_type == VCHR) { 658 rdev = zfs_expldev(vap->va_rdev); 659 } 660 661 parent = dzp->z_id; 662 mode = acl_ids->z_mode; 663 if (flag & IS_XATTR) 664 pflags |= ZFS_XATTR; 665 666 /* 667 * No execs denied will be determined when zfs_mode_compute() is called. 668 */ 669 pflags |= acl_ids->z_aclp->z_hints & 670 (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT| 671 ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED); 672 673 ZFS_TIME_ENCODE(&now, crtime); 674 ZFS_TIME_ENCODE(&now, ctime); 675 676 if (vap->va_mask & AT_ATIME) { 677 ZFS_TIME_ENCODE(&vap->va_atime, atime); 678 } else { 679 ZFS_TIME_ENCODE(&now, atime); 680 } 681 682 if (vap->va_mask & AT_MTIME) { 683 ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 684 } else { 685 ZFS_TIME_ENCODE(&now, mtime); 686 } 687 688 /* Now add in all of the "SA" attributes */ 689 VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED, 690 &sa_hdl)); 691 692 /* 693 * Setup the array of attributes to be replaced/set on the new file 694 * 695 * order for DMU_OT_ZNODE is critical since it needs to be constructed 696 * in the old znode_phys_t format. Don't change this ordering 697 */ 698 sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP); 699 700 if (obj_type == DMU_OT_ZNODE) { 701 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), 702 NULL, &atime, 16); 703 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), 704 NULL, &mtime, 16); 705 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), 706 NULL, &ctime, 16); 707 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), 708 NULL, &crtime, 16); 709 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), 710 NULL, &gen, 8); 711 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), 712 NULL, &mode, 8); 713 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), 714 NULL, &size, 8); 715 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), 716 NULL, &parent, 8); 717 } else { 718 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), 719 NULL, &mode, 8); 720 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), 721 NULL, &size, 8); 722 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), 723 NULL, &gen, 8); 724 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), 725 NULL, &acl_ids->z_fuid, 8); 726 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), 727 NULL, &acl_ids->z_fgid, 8); 728 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), 729 NULL, &parent, 8); 730 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), 731 NULL, &pflags, 8); 732 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), 733 NULL, &atime, 16); 734 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), 735 NULL, &mtime, 16); 736 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), 737 NULL, &ctime, 16); 738 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), 739 NULL, &crtime, 16); 740 } 741 742 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8); 743 744 if (obj_type == DMU_OT_ZNODE) { 745 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL, 746 &empty_xattr, 8); 747 } 748 if (obj_type == DMU_OT_ZNODE || 749 (vap->va_type == VBLK || vap->va_type == VCHR)) { 750 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs), 751 NULL, &rdev, 8); 752 753 } 754 if (obj_type == DMU_OT_ZNODE) { 755 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), 756 NULL, &pflags, 8); 757 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL, 758 &acl_ids->z_fuid, 8); 759 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL, 760 &acl_ids->z_fgid, 8); 761 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad, 762 sizeof (uint64_t) * 4); 763 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL, 764 &acl_phys, sizeof (zfs_acl_phys_t)); 765 } else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) { 766 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL, 767 &acl_ids->z_aclp->z_acl_count, 8); 768 locate.cb_aclp = acl_ids->z_aclp; 769 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs), 770 zfs_acl_data_locator, &locate, 771 acl_ids->z_aclp->z_acl_bytes); 772 mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags, 773 acl_ids->z_fuid, acl_ids->z_fgid); 774 } 775 776 VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0); 777 778 if (!(flag & IS_ROOT_NODE)) { 779 *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl); 780 ASSERT(*zpp != NULL); 781 } else { 782 /* 783 * If we are creating the root node, the "parent" we 784 * passed in is the znode for the root. 785 */ 786 *zpp = dzp; 787 788 (*zpp)->z_sa_hdl = sa_hdl; 789 } 790 791 (*zpp)->z_pflags = pflags; 792 (*zpp)->z_mode = mode; 793 (*zpp)->z_dnodesize = dnodesize; 794 795 if (vap->va_mask & AT_XVATTR) 796 zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx); 797 798 if (obj_type == DMU_OT_ZNODE || 799 acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) { 800 VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx)); 801 } 802 if (!(flag & IS_ROOT_NODE)) { 803 vnode_t *vp; 804 805 vp = ZTOV(*zpp); 806 vp->v_vflag |= VV_FORCEINSMQ; 807 err = insmntque(vp, zfsvfs->z_vfs); 808 vp->v_vflag &= ~VV_FORCEINSMQ; 809 KASSERT(err == 0, ("insmntque() failed: error %d", err)); 810 } 811 kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END); 812 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); 813 } 814 815 /* 816 * Update in-core attributes. It is assumed the caller will be doing an 817 * sa_bulk_update to push the changes out. 818 */ 819 void 820 zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx) 821 { 822 xoptattr_t *xoap; 823 824 xoap = xva_getxoptattr(xvap); 825 ASSERT(xoap); 826 827 ASSERT_VOP_IN_SEQC(ZTOV(zp)); 828 829 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 830 uint64_t times[2]; 831 ZFS_TIME_ENCODE(&xoap->xoa_createtime, times); 832 (void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs), 833 ×, sizeof (times), tx); 834 XVA_SET_RTN(xvap, XAT_CREATETIME); 835 } 836 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 837 ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly, 838 zp->z_pflags, tx); 839 XVA_SET_RTN(xvap, XAT_READONLY); 840 } 841 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 842 ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden, 843 zp->z_pflags, tx); 844 XVA_SET_RTN(xvap, XAT_HIDDEN); 845 } 846 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 847 ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system, 848 zp->z_pflags, tx); 849 XVA_SET_RTN(xvap, XAT_SYSTEM); 850 } 851 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 852 ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive, 853 zp->z_pflags, tx); 854 XVA_SET_RTN(xvap, XAT_ARCHIVE); 855 } 856 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 857 ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable, 858 zp->z_pflags, tx); 859 XVA_SET_RTN(xvap, XAT_IMMUTABLE); 860 } 861 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 862 ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink, 863 zp->z_pflags, tx); 864 XVA_SET_RTN(xvap, XAT_NOUNLINK); 865 } 866 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 867 ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly, 868 zp->z_pflags, tx); 869 XVA_SET_RTN(xvap, XAT_APPENDONLY); 870 } 871 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 872 ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump, 873 zp->z_pflags, tx); 874 XVA_SET_RTN(xvap, XAT_NODUMP); 875 } 876 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 877 ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque, 878 zp->z_pflags, tx); 879 XVA_SET_RTN(xvap, XAT_OPAQUE); 880 } 881 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 882 ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED, 883 xoap->xoa_av_quarantined, zp->z_pflags, tx); 884 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 885 } 886 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 887 ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified, 888 zp->z_pflags, tx); 889 XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 890 } 891 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { 892 zfs_sa_set_scanstamp(zp, xvap, tx); 893 XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); 894 } 895 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 896 ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse, 897 zp->z_pflags, tx); 898 XVA_SET_RTN(xvap, XAT_REPARSE); 899 } 900 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 901 ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline, 902 zp->z_pflags, tx); 903 XVA_SET_RTN(xvap, XAT_OFFLINE); 904 } 905 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 906 ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse, 907 zp->z_pflags, tx); 908 XVA_SET_RTN(xvap, XAT_SPARSE); 909 } 910 } 911 912 int 913 zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) 914 { 915 dmu_object_info_t doi; 916 dmu_buf_t *db; 917 znode_t *zp; 918 vnode_t *vp; 919 sa_handle_t *hdl; 920 struct thread *td; 921 int locked; 922 int err; 923 924 td = curthread; 925 getnewvnode_reserve_(); 926 again: 927 *zpp = NULL; 928 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); 929 930 err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); 931 if (err) { 932 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 933 getnewvnode_drop_reserve(); 934 return (err); 935 } 936 937 dmu_object_info_from_db(db, &doi); 938 if (doi.doi_bonus_type != DMU_OT_SA && 939 (doi.doi_bonus_type != DMU_OT_ZNODE || 940 (doi.doi_bonus_type == DMU_OT_ZNODE && 941 doi.doi_bonus_size < sizeof (znode_phys_t)))) { 942 sa_buf_rele(db, NULL); 943 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 944 getnewvnode_drop_reserve(); 945 return (SET_ERROR(EINVAL)); 946 } 947 948 hdl = dmu_buf_get_user(db); 949 if (hdl != NULL) { 950 zp = sa_get_userdata(hdl); 951 952 /* 953 * Since "SA" does immediate eviction we 954 * should never find a sa handle that doesn't 955 * know about the znode. 956 */ 957 ASSERT3P(zp, !=, NULL); 958 ASSERT3U(zp->z_id, ==, obj_num); 959 if (zp->z_unlinked) { 960 err = SET_ERROR(ENOENT); 961 } else { 962 vp = ZTOV(zp); 963 /* 964 * Don't let the vnode disappear after 965 * ZFS_OBJ_HOLD_EXIT. 966 */ 967 VN_HOLD(vp); 968 *zpp = zp; 969 err = 0; 970 } 971 972 sa_buf_rele(db, NULL); 973 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 974 975 if (err) { 976 getnewvnode_drop_reserve(); 977 return (err); 978 } 979 980 locked = VOP_ISLOCKED(vp); 981 VI_LOCK(vp); 982 if (VN_IS_DOOMED(vp) && locked != LK_EXCLUSIVE) { 983 /* 984 * The vnode is doomed and this thread doesn't 985 * hold the exclusive lock on it, so the vnode 986 * must be being reclaimed by another thread. 987 * Otherwise the doomed vnode is being reclaimed 988 * by this thread and zfs_zget is called from 989 * ZIL internals. 990 */ 991 VI_UNLOCK(vp); 992 993 /* 994 * XXX vrele() locks the vnode when the last reference 995 * is dropped. Although in this case the vnode is 996 * doomed / dead and so no inactivation is required, 997 * the vnode lock is still acquired. That could result 998 * in a LOR with z_teardown_lock if another thread holds 999 * the vnode's lock and tries to take z_teardown_lock. 1000 * But that is only possible if the other thread peforms 1001 * a ZFS vnode operation on the vnode. That either 1002 * should not happen if the vnode is dead or the thread 1003 * should also have a reference to the vnode and thus 1004 * our reference is not last. 1005 */ 1006 VN_RELE(vp); 1007 goto again; 1008 } 1009 VI_UNLOCK(vp); 1010 getnewvnode_drop_reserve(); 1011 return (err); 1012 } 1013 1014 /* 1015 * Not found create new znode/vnode 1016 * but only if file exists. 1017 * 1018 * There is a small window where zfs_vget() could 1019 * find this object while a file create is still in 1020 * progress. This is checked for in zfs_znode_alloc() 1021 * 1022 * if zfs_znode_alloc() fails it will drop the hold on the 1023 * bonus buffer. 1024 */ 1025 zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size, 1026 doi.doi_bonus_type, NULL); 1027 if (zp == NULL) { 1028 err = SET_ERROR(ENOENT); 1029 } else { 1030 *zpp = zp; 1031 } 1032 if (err == 0) { 1033 vnode_t *vp = ZTOV(zp); 1034 1035 err = insmntque(vp, zfsvfs->z_vfs); 1036 if (err == 0) { 1037 vp->v_hash = obj_num; 1038 VOP_UNLOCK1(vp); 1039 } else { 1040 zp->z_vnode = NULL; 1041 zfs_znode_dmu_fini(zp); 1042 zfs_znode_free(zp); 1043 *zpp = NULL; 1044 } 1045 } 1046 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1047 getnewvnode_drop_reserve(); 1048 return (err); 1049 } 1050 1051 int 1052 zfs_rezget(znode_t *zp) 1053 { 1054 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1055 dmu_object_info_t doi; 1056 dmu_buf_t *db; 1057 vnode_t *vp; 1058 uint64_t obj_num = zp->z_id; 1059 uint64_t mode, size; 1060 sa_bulk_attr_t bulk[8]; 1061 int err; 1062 int count = 0; 1063 uint64_t gen; 1064 1065 /* 1066 * Remove cached pages before reloading the znode, so that they are not 1067 * lingering after we run into any error. Ideally, we should vgone() 1068 * the vnode in case of error, but currently we cannot do that 1069 * because of the LOR between the vnode lock and z_teardown_lock. 1070 * So, instead, we have to "doom" the znode in the illumos style. 1071 */ 1072 vp = ZTOV(zp); 1073 vn_pages_remove(vp, 0, 0); 1074 1075 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); 1076 1077 mutex_enter(&zp->z_acl_lock); 1078 if (zp->z_acl_cached) { 1079 zfs_acl_free(zp->z_acl_cached); 1080 zp->z_acl_cached = NULL; 1081 } 1082 1083 mutex_exit(&zp->z_acl_lock); 1084 ASSERT(zp->z_sa_hdl == NULL); 1085 err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); 1086 if (err) { 1087 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1088 return (err); 1089 } 1090 1091 dmu_object_info_from_db(db, &doi); 1092 if (doi.doi_bonus_type != DMU_OT_SA && 1093 (doi.doi_bonus_type != DMU_OT_ZNODE || 1094 (doi.doi_bonus_type == DMU_OT_ZNODE && 1095 doi.doi_bonus_size < sizeof (znode_phys_t)))) { 1096 sa_buf_rele(db, NULL); 1097 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1098 return (SET_ERROR(EINVAL)); 1099 } 1100 1101 zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL); 1102 size = zp->z_size; 1103 1104 /* reload cached values */ 1105 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, 1106 &gen, sizeof (gen)); 1107 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 1108 &zp->z_size, sizeof (zp->z_size)); 1109 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 1110 &zp->z_links, sizeof (zp->z_links)); 1111 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 1112 &zp->z_pflags, sizeof (zp->z_pflags)); 1113 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 1114 &zp->z_atime, sizeof (zp->z_atime)); 1115 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 1116 &zp->z_uid, sizeof (zp->z_uid)); 1117 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, 1118 &zp->z_gid, sizeof (zp->z_gid)); 1119 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 1120 &mode, sizeof (mode)); 1121 1122 if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) { 1123 zfs_znode_dmu_fini(zp); 1124 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1125 return (SET_ERROR(EIO)); 1126 } 1127 1128 zp->z_mode = mode; 1129 1130 if (gen != zp->z_gen) { 1131 zfs_znode_dmu_fini(zp); 1132 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1133 return (SET_ERROR(EIO)); 1134 } 1135 1136 /* 1137 * It is highly improbable but still quite possible that two 1138 * objects in different datasets are created with the same 1139 * object numbers and in transaction groups with the same 1140 * numbers. znodes corresponding to those objects would 1141 * have the same z_id and z_gen, but their other attributes 1142 * may be different. 1143 * zfs recv -F may replace one of such objects with the other. 1144 * As a result file properties recorded in the replaced 1145 * object's vnode may no longer match the received object's 1146 * properties. At present the only cached property is the 1147 * files type recorded in v_type. 1148 * So, handle this case by leaving the old vnode and znode 1149 * disassociated from the actual object. A new vnode and a 1150 * znode will be created if the object is accessed 1151 * (e.g. via a look-up). The old vnode and znode will be 1152 * recycled when the last vnode reference is dropped. 1153 */ 1154 if (vp->v_type != IFTOVT((mode_t)zp->z_mode)) { 1155 zfs_znode_dmu_fini(zp); 1156 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1157 return (SET_ERROR(EIO)); 1158 } 1159 1160 /* 1161 * If the file has zero links, then it has been unlinked on the send 1162 * side and it must be in the received unlinked set. 1163 * We call zfs_znode_dmu_fini() now to prevent any accesses to the 1164 * stale data and to prevent automatically removal of the file in 1165 * zfs_zinactive(). The file will be removed either when it is removed 1166 * on the send side and the next incremental stream is received or 1167 * when the unlinked set gets processed. 1168 */ 1169 zp->z_unlinked = (zp->z_links == 0); 1170 if (zp->z_unlinked) { 1171 zfs_znode_dmu_fini(zp); 1172 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1173 return (0); 1174 } 1175 1176 zp->z_blksz = doi.doi_data_block_size; 1177 if (zp->z_size != size) 1178 vnode_pager_setsize(vp, zp->z_size); 1179 1180 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1181 1182 return (0); 1183 } 1184 1185 void 1186 zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) 1187 { 1188 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1189 objset_t *os = zfsvfs->z_os; 1190 uint64_t obj = zp->z_id; 1191 uint64_t acl_obj = zfs_external_acl(zp); 1192 1193 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); 1194 if (acl_obj) { 1195 VERIFY(!zp->z_is_sa); 1196 VERIFY(0 == dmu_object_free(os, acl_obj, tx)); 1197 } 1198 VERIFY(0 == dmu_object_free(os, obj, tx)); 1199 zfs_znode_dmu_fini(zp); 1200 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); 1201 zfs_znode_free(zp); 1202 } 1203 1204 void 1205 zfs_zinactive(znode_t *zp) 1206 { 1207 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1208 uint64_t z_id = zp->z_id; 1209 1210 ASSERT(zp->z_sa_hdl); 1211 1212 /* 1213 * Don't allow a zfs_zget() while were trying to release this znode 1214 */ 1215 ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); 1216 1217 /* 1218 * If this was the last reference to a file with no links, remove 1219 * the file from the file system unless the file system is mounted 1220 * read-only. That can happen, for example, if the file system was 1221 * originally read-write, the file was opened, then unlinked and 1222 * the file system was made read-only before the file was finally 1223 * closed. The file will remain in the unlinked set. 1224 */ 1225 if (zp->z_unlinked) { 1226 ASSERT(!zfsvfs->z_issnap); 1227 if ((zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) == 0) { 1228 ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 1229 zfs_rmnode(zp); 1230 return; 1231 } 1232 } 1233 1234 zfs_znode_dmu_fini(zp); 1235 ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 1236 zfs_znode_free(zp); 1237 } 1238 1239 void 1240 zfs_znode_free(znode_t *zp) 1241 { 1242 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1243 1244 ASSERT(zp->z_sa_hdl == NULL); 1245 zp->z_vnode = NULL; 1246 mutex_enter(&zfsvfs->z_znodes_lock); 1247 POINTER_INVALIDATE(&zp->z_zfsvfs); 1248 list_remove(&zfsvfs->z_all_znodes, zp); 1249 zfsvfs->z_nr_znodes--; 1250 mutex_exit(&zfsvfs->z_znodes_lock); 1251 1252 if (zp->z_acl_cached) { 1253 zfs_acl_free(zp->z_acl_cached); 1254 zp->z_acl_cached = NULL; 1255 } 1256 1257 zfs_znode_free_kmem(zp); 1258 } 1259 1260 void 1261 zfs_tstamp_update_setup_ext(znode_t *zp, uint_t flag, uint64_t mtime[2], 1262 uint64_t ctime[2], boolean_t have_tx) 1263 { 1264 timestruc_t now; 1265 1266 vfs_timestamp(&now); 1267 1268 if (have_tx) { /* will sa_bulk_update happen really soon? */ 1269 zp->z_atime_dirty = 0; 1270 zp->z_seq++; 1271 } else { 1272 zp->z_atime_dirty = 1; 1273 } 1274 1275 if (flag & AT_ATIME) { 1276 ZFS_TIME_ENCODE(&now, zp->z_atime); 1277 } 1278 1279 if (flag & AT_MTIME) { 1280 ZFS_TIME_ENCODE(&now, mtime); 1281 if (zp->z_zfsvfs->z_use_fuids) { 1282 zp->z_pflags |= (ZFS_ARCHIVE | 1283 ZFS_AV_MODIFIED); 1284 } 1285 } 1286 1287 if (flag & AT_CTIME) { 1288 ZFS_TIME_ENCODE(&now, ctime); 1289 if (zp->z_zfsvfs->z_use_fuids) 1290 zp->z_pflags |= ZFS_ARCHIVE; 1291 } 1292 } 1293 1294 1295 void 1296 zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2], 1297 uint64_t ctime[2]) 1298 { 1299 zfs_tstamp_update_setup_ext(zp, flag, mtime, ctime, B_TRUE); 1300 } 1301 /* 1302 * Grow the block size for a file. 1303 * 1304 * IN: zp - znode of file to free data in. 1305 * size - requested block size 1306 * tx - open transaction. 1307 * 1308 * NOTE: this function assumes that the znode is write locked. 1309 */ 1310 void 1311 zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) 1312 { 1313 int error; 1314 u_longlong_t dummy; 1315 1316 if (size <= zp->z_blksz) 1317 return; 1318 /* 1319 * If the file size is already greater than the current blocksize, 1320 * we will not grow. If there is more than one block in a file, 1321 * the blocksize cannot change. 1322 */ 1323 if (zp->z_blksz && zp->z_size > zp->z_blksz) 1324 return; 1325 1326 error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id, 1327 size, 0, tx); 1328 1329 if (error == ENOTSUP) 1330 return; 1331 ASSERT0(error); 1332 1333 /* What blocksize did we actually get? */ 1334 dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy); 1335 } 1336 1337 /* 1338 * Increase the file length 1339 * 1340 * IN: zp - znode of file to free data in. 1341 * end - new end-of-file 1342 * 1343 * RETURN: 0 on success, error code on failure 1344 */ 1345 static int 1346 zfs_extend(znode_t *zp, uint64_t end) 1347 { 1348 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1349 dmu_tx_t *tx; 1350 zfs_locked_range_t *lr; 1351 uint64_t newblksz; 1352 int error; 1353 1354 /* 1355 * We will change zp_size, lock the whole file. 1356 */ 1357 lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); 1358 1359 /* 1360 * Nothing to do if file already at desired length. 1361 */ 1362 if (end <= zp->z_size) { 1363 zfs_rangelock_exit(lr); 1364 return (0); 1365 } 1366 tx = dmu_tx_create(zfsvfs->z_os); 1367 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1368 zfs_sa_upgrade_txholds(tx, zp); 1369 if (end > zp->z_blksz && 1370 (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) { 1371 /* 1372 * We are growing the file past the current block size. 1373 */ 1374 if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) { 1375 /* 1376 * File's blocksize is already larger than the 1377 * "recordsize" property. Only let it grow to 1378 * the next power of 2. 1379 */ 1380 ASSERT(!ISP2(zp->z_blksz)); 1381 newblksz = MIN(end, 1 << highbit64(zp->z_blksz)); 1382 } else { 1383 newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz); 1384 } 1385 dmu_tx_hold_write(tx, zp->z_id, 0, newblksz); 1386 } else { 1387 newblksz = 0; 1388 } 1389 1390 error = dmu_tx_assign(tx, TXG_WAIT); 1391 if (error) { 1392 dmu_tx_abort(tx); 1393 zfs_rangelock_exit(lr); 1394 return (error); 1395 } 1396 1397 if (newblksz) 1398 zfs_grow_blocksize(zp, newblksz, tx); 1399 1400 zp->z_size = end; 1401 1402 VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs), 1403 &zp->z_size, sizeof (zp->z_size), tx)); 1404 1405 vnode_pager_setsize(ZTOV(zp), end); 1406 1407 zfs_rangelock_exit(lr); 1408 1409 dmu_tx_commit(tx); 1410 1411 return (0); 1412 } 1413 1414 /* 1415 * Free space in a file. 1416 * 1417 * IN: zp - znode of file to free data in. 1418 * off - start of section to free. 1419 * len - length of section to free. 1420 * 1421 * RETURN: 0 on success, error code on failure 1422 */ 1423 static int 1424 zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) 1425 { 1426 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1427 zfs_locked_range_t *lr; 1428 int error; 1429 1430 /* 1431 * Lock the range being freed. 1432 */ 1433 lr = zfs_rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER); 1434 1435 /* 1436 * Nothing to do if file already at desired length. 1437 */ 1438 if (off >= zp->z_size) { 1439 zfs_rangelock_exit(lr); 1440 return (0); 1441 } 1442 1443 if (off + len > zp->z_size) 1444 len = zp->z_size - off; 1445 1446 error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len); 1447 1448 if (error == 0) { 1449 /* 1450 * In FreeBSD we cannot free block in the middle of a file, 1451 * but only at the end of a file, so this code path should 1452 * never happen. 1453 */ 1454 vnode_pager_setsize(ZTOV(zp), off); 1455 } 1456 1457 zfs_rangelock_exit(lr); 1458 1459 return (error); 1460 } 1461 1462 /* 1463 * Truncate a file 1464 * 1465 * IN: zp - znode of file to free data in. 1466 * end - new end-of-file. 1467 * 1468 * RETURN: 0 on success, error code on failure 1469 */ 1470 static int 1471 zfs_trunc(znode_t *zp, uint64_t end) 1472 { 1473 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1474 vnode_t *vp = ZTOV(zp); 1475 dmu_tx_t *tx; 1476 zfs_locked_range_t *lr; 1477 int error; 1478 sa_bulk_attr_t bulk[2]; 1479 int count = 0; 1480 1481 /* 1482 * We will change zp_size, lock the whole file. 1483 */ 1484 lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); 1485 1486 /* 1487 * Nothing to do if file already at desired length. 1488 */ 1489 if (end >= zp->z_size) { 1490 zfs_rangelock_exit(lr); 1491 return (0); 1492 } 1493 1494 error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, 1495 DMU_OBJECT_END); 1496 if (error) { 1497 zfs_rangelock_exit(lr); 1498 return (error); 1499 } 1500 tx = dmu_tx_create(zfsvfs->z_os); 1501 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1502 zfs_sa_upgrade_txholds(tx, zp); 1503 dmu_tx_mark_netfree(tx); 1504 error = dmu_tx_assign(tx, TXG_WAIT); 1505 if (error) { 1506 dmu_tx_abort(tx); 1507 zfs_rangelock_exit(lr); 1508 return (error); 1509 } 1510 1511 zp->z_size = end; 1512 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), 1513 NULL, &zp->z_size, sizeof (zp->z_size)); 1514 1515 if (end == 0) { 1516 zp->z_pflags &= ~ZFS_SPARSE; 1517 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 1518 NULL, &zp->z_pflags, 8); 1519 } 1520 VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0); 1521 1522 dmu_tx_commit(tx); 1523 1524 /* 1525 * Clear any mapped pages in the truncated region. This has to 1526 * happen outside of the transaction to avoid the possibility of 1527 * a deadlock with someone trying to push a page that we are 1528 * about to invalidate. 1529 */ 1530 vnode_pager_setsize(vp, end); 1531 1532 zfs_rangelock_exit(lr); 1533 1534 return (0); 1535 } 1536 1537 /* 1538 * Free space in a file 1539 * 1540 * IN: zp - znode of file to free data in. 1541 * off - start of range 1542 * len - end of range (0 => EOF) 1543 * flag - current file open mode flags. 1544 * log - TRUE if this action should be logged 1545 * 1546 * RETURN: 0 on success, error code on failure 1547 */ 1548 int 1549 zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) 1550 { 1551 dmu_tx_t *tx; 1552 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1553 zilog_t *zilog = zfsvfs->z_log; 1554 uint64_t mode; 1555 uint64_t mtime[2], ctime[2]; 1556 sa_bulk_attr_t bulk[3]; 1557 int count = 0; 1558 int error; 1559 1560 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode, 1561 sizeof (mode))) != 0) 1562 return (error); 1563 1564 if (off > zp->z_size) { 1565 error = zfs_extend(zp, off+len); 1566 if (error == 0 && log) 1567 goto log; 1568 else 1569 return (error); 1570 } 1571 1572 if (len == 0) { 1573 error = zfs_trunc(zp, off); 1574 } else { 1575 if ((error = zfs_free_range(zp, off, len)) == 0 && 1576 off + len > zp->z_size) 1577 error = zfs_extend(zp, off+len); 1578 } 1579 if (error || !log) 1580 return (error); 1581 log: 1582 tx = dmu_tx_create(zfsvfs->z_os); 1583 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1584 zfs_sa_upgrade_txholds(tx, zp); 1585 error = dmu_tx_assign(tx, TXG_WAIT); 1586 if (error) { 1587 dmu_tx_abort(tx); 1588 return (error); 1589 } 1590 1591 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16); 1592 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16); 1593 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 1594 NULL, &zp->z_pflags, 8); 1595 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); 1596 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1597 ASSERT(error == 0); 1598 1599 zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); 1600 1601 dmu_tx_commit(tx); 1602 return (0); 1603 } 1604 1605 void 1606 zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) 1607 { 1608 uint64_t moid, obj, sa_obj, version; 1609 uint64_t sense = ZFS_CASE_SENSITIVE; 1610 uint64_t norm = 0; 1611 nvpair_t *elem; 1612 int error; 1613 int i; 1614 znode_t *rootzp = NULL; 1615 zfsvfs_t *zfsvfs; 1616 vattr_t vattr; 1617 znode_t *zp; 1618 zfs_acl_ids_t acl_ids; 1619 1620 /* 1621 * First attempt to create master node. 1622 */ 1623 /* 1624 * In an empty objset, there are no blocks to read and thus 1625 * there can be no i/o errors (which we assert below). 1626 */ 1627 moid = MASTER_NODE_OBJ; 1628 error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE, 1629 DMU_OT_NONE, 0, tx); 1630 ASSERT(error == 0); 1631 1632 /* 1633 * Set starting attributes. 1634 */ 1635 version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os))); 1636 elem = NULL; 1637 while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) { 1638 /* For the moment we expect all zpl props to be uint64_ts */ 1639 uint64_t val; 1640 char *name; 1641 1642 ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64); 1643 VERIFY(nvpair_value_uint64(elem, &val) == 0); 1644 name = nvpair_name(elem); 1645 if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) { 1646 if (val < version) 1647 version = val; 1648 } else { 1649 error = zap_update(os, moid, name, 8, 1, &val, tx); 1650 } 1651 ASSERT(error == 0); 1652 if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0) 1653 norm = val; 1654 else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0) 1655 sense = val; 1656 } 1657 ASSERT(version != 0); 1658 error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); 1659 1660 /* 1661 * Create zap object used for SA attribute registration 1662 */ 1663 1664 if (version >= ZPL_VERSION_SA) { 1665 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, 1666 DMU_OT_NONE, 0, tx); 1667 error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); 1668 ASSERT(error == 0); 1669 } else { 1670 sa_obj = 0; 1671 } 1672 /* 1673 * Create a delete queue. 1674 */ 1675 obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx); 1676 1677 error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx); 1678 ASSERT(error == 0); 1679 1680 /* 1681 * Create root znode. Create minimal znode/vnode/zfsvfs 1682 * to allow zfs_mknode to work. 1683 */ 1684 VATTR_NULL(&vattr); 1685 vattr.va_mask = AT_MODE|AT_UID|AT_GID; 1686 vattr.va_type = VDIR; 1687 vattr.va_mode = S_IFDIR|0755; 1688 vattr.va_uid = crgetuid(cr); 1689 vattr.va_gid = crgetgid(cr); 1690 1691 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 1692 1693 rootzp = zfs_znode_alloc_kmem(KM_SLEEP); 1694 ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs)); 1695 rootzp->z_moved = 0; 1696 rootzp->z_unlinked = 0; 1697 rootzp->z_atime_dirty = 0; 1698 rootzp->z_is_sa = USE_SA(version, os); 1699 1700 zfsvfs->z_os = os; 1701 zfsvfs->z_parent = zfsvfs; 1702 zfsvfs->z_version = version; 1703 zfsvfs->z_use_fuids = USE_FUIDS(version, os); 1704 zfsvfs->z_use_sa = USE_SA(version, os); 1705 zfsvfs->z_norm = norm; 1706 1707 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, 1708 &zfsvfs->z_attr_table); 1709 1710 ASSERT(error == 0); 1711 1712 /* 1713 * Fold case on file systems that are always or sometimes case 1714 * insensitive. 1715 */ 1716 if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED) 1717 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 1718 1719 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 1720 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 1721 offsetof(znode_t, z_link_node)); 1722 1723 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1724 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 1725 1726 rootzp->z_zfsvfs = zfsvfs; 1727 VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, 1728 cr, NULL, &acl_ids)); 1729 zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids); 1730 ASSERT3P(zp, ==, rootzp); 1731 error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx); 1732 ASSERT(error == 0); 1733 zfs_acl_ids_free(&acl_ids); 1734 POINTER_INVALIDATE(&rootzp->z_zfsvfs); 1735 1736 sa_handle_destroy(rootzp->z_sa_hdl); 1737 zfs_znode_free_kmem(rootzp); 1738 1739 /* 1740 * Create shares directory 1741 */ 1742 1743 error = zfs_create_share_dir(zfsvfs, tx); 1744 1745 ASSERT(error == 0); 1746 1747 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1748 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1749 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1750 } 1751 #endif /* _KERNEL */ 1752 1753 static int 1754 zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table) 1755 { 1756 uint64_t sa_obj = 0; 1757 int error; 1758 1759 error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); 1760 if (error != 0 && error != ENOENT) 1761 return (error); 1762 1763 error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table); 1764 return (error); 1765 } 1766 1767 static int 1768 zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, 1769 dmu_buf_t **db, void *tag) 1770 { 1771 dmu_object_info_t doi; 1772 int error; 1773 1774 if ((error = sa_buf_hold(osp, obj, tag, db)) != 0) 1775 return (error); 1776 1777 dmu_object_info_from_db(*db, &doi); 1778 if ((doi.doi_bonus_type != DMU_OT_SA && 1779 doi.doi_bonus_type != DMU_OT_ZNODE) || 1780 (doi.doi_bonus_type == DMU_OT_ZNODE && 1781 doi.doi_bonus_size < sizeof (znode_phys_t))) { 1782 sa_buf_rele(*db, tag); 1783 return (SET_ERROR(ENOTSUP)); 1784 } 1785 1786 error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); 1787 if (error != 0) { 1788 sa_buf_rele(*db, tag); 1789 return (error); 1790 } 1791 1792 return (0); 1793 } 1794 1795 static void 1796 zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag) 1797 { 1798 sa_handle_destroy(hdl); 1799 sa_buf_rele(db, tag); 1800 } 1801 1802 /* 1803 * Given an object number, return its parent object number and whether 1804 * or not the object is an extended attribute directory. 1805 */ 1806 static int 1807 zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table, 1808 uint64_t *pobjp, int *is_xattrdir) 1809 { 1810 uint64_t parent; 1811 uint64_t pflags; 1812 uint64_t mode; 1813 uint64_t parent_mode; 1814 sa_bulk_attr_t bulk[3]; 1815 sa_handle_t *sa_hdl; 1816 dmu_buf_t *sa_db; 1817 int count = 0; 1818 int error; 1819 1820 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL, 1821 &parent, sizeof (parent)); 1822 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL, 1823 &pflags, sizeof (pflags)); 1824 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, 1825 &mode, sizeof (mode)); 1826 1827 if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0) 1828 return (error); 1829 1830 /* 1831 * When a link is removed its parent pointer is not changed and will 1832 * be invalid. There are two cases where a link is removed but the 1833 * file stays around, when it goes to the delete queue and when there 1834 * are additional links. 1835 */ 1836 error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG); 1837 if (error != 0) 1838 return (error); 1839 1840 error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode)); 1841 zfs_release_sa_handle(sa_hdl, sa_db, FTAG); 1842 if (error != 0) 1843 return (error); 1844 1845 *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode); 1846 1847 /* 1848 * Extended attributes can be applied to files, directories, etc. 1849 * Otherwise the parent must be a directory. 1850 */ 1851 if (!*is_xattrdir && !S_ISDIR(parent_mode)) 1852 return (SET_ERROR(EINVAL)); 1853 1854 *pobjp = parent; 1855 1856 return (0); 1857 } 1858 1859 /* 1860 * Given an object number, return some zpl level statistics 1861 */ 1862 static int 1863 zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table, 1864 zfs_stat_t *sb) 1865 { 1866 sa_bulk_attr_t bulk[4]; 1867 int count = 0; 1868 1869 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, 1870 &sb->zs_mode, sizeof (sb->zs_mode)); 1871 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL, 1872 &sb->zs_gen, sizeof (sb->zs_gen)); 1873 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL, 1874 &sb->zs_links, sizeof (sb->zs_links)); 1875 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL, 1876 &sb->zs_ctime, sizeof (sb->zs_ctime)); 1877 1878 return (sa_bulk_lookup(hdl, bulk, count)); 1879 } 1880 1881 static int 1882 zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, 1883 sa_attr_type_t *sa_table, char *buf, int len) 1884 { 1885 sa_handle_t *sa_hdl; 1886 sa_handle_t *prevhdl = NULL; 1887 dmu_buf_t *prevdb = NULL; 1888 dmu_buf_t *sa_db = NULL; 1889 char *path = buf + len - 1; 1890 int error; 1891 1892 *path = '\0'; 1893 sa_hdl = hdl; 1894 1895 uint64_t deleteq_obj; 1896 VERIFY0(zap_lookup(osp, MASTER_NODE_OBJ, 1897 ZFS_UNLINKED_SET, sizeof (uint64_t), 1, &deleteq_obj)); 1898 error = zap_lookup_int(osp, deleteq_obj, obj); 1899 if (error == 0) { 1900 return (ESTALE); 1901 } else if (error != ENOENT) { 1902 return (error); 1903 } 1904 error = 0; 1905 1906 for (;;) { 1907 uint64_t pobj; 1908 char component[MAXNAMELEN + 2]; 1909 size_t complen; 1910 int is_xattrdir; 1911 1912 if (prevdb) 1913 zfs_release_sa_handle(prevhdl, prevdb, FTAG); 1914 1915 if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj, 1916 &is_xattrdir)) != 0) 1917 break; 1918 1919 if (pobj == obj) { 1920 if (path[0] != '/') 1921 *--path = '/'; 1922 break; 1923 } 1924 1925 component[0] = '/'; 1926 if (is_xattrdir) { 1927 (void) sprintf(component + 1, "<xattrdir>"); 1928 } else { 1929 error = zap_value_search(osp, pobj, obj, 1930 ZFS_DIRENT_OBJ(-1ULL), component + 1); 1931 if (error != 0) 1932 break; 1933 } 1934 1935 complen = strlen(component); 1936 path -= complen; 1937 ASSERT(path >= buf); 1938 bcopy(component, path, complen); 1939 obj = pobj; 1940 1941 if (sa_hdl != hdl) { 1942 prevhdl = sa_hdl; 1943 prevdb = sa_db; 1944 } 1945 error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG); 1946 if (error != 0) { 1947 sa_hdl = prevhdl; 1948 sa_db = prevdb; 1949 break; 1950 } 1951 } 1952 1953 if (sa_hdl != NULL && sa_hdl != hdl) { 1954 ASSERT(sa_db != NULL); 1955 zfs_release_sa_handle(sa_hdl, sa_db, FTAG); 1956 } 1957 1958 if (error == 0) 1959 (void) memmove(buf, path, buf + len - path); 1960 1961 return (error); 1962 } 1963 1964 int 1965 zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) 1966 { 1967 sa_attr_type_t *sa_table; 1968 sa_handle_t *hdl; 1969 dmu_buf_t *db; 1970 int error; 1971 1972 error = zfs_sa_setup(osp, &sa_table); 1973 if (error != 0) 1974 return (error); 1975 1976 error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); 1977 if (error != 0) 1978 return (error); 1979 1980 error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); 1981 1982 zfs_release_sa_handle(hdl, db, FTAG); 1983 return (error); 1984 } 1985 1986 int 1987 zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, 1988 char *buf, int len) 1989 { 1990 char *path = buf + len - 1; 1991 sa_attr_type_t *sa_table; 1992 sa_handle_t *hdl; 1993 dmu_buf_t *db; 1994 int error; 1995 1996 *path = '\0'; 1997 1998 error = zfs_sa_setup(osp, &sa_table); 1999 if (error != 0) 2000 return (error); 2001 2002 error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); 2003 if (error != 0) 2004 return (error); 2005 2006 error = zfs_obj_to_stats_impl(hdl, sa_table, sb); 2007 if (error != 0) { 2008 zfs_release_sa_handle(hdl, db, FTAG); 2009 return (error); 2010 } 2011 2012 error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); 2013 2014 zfs_release_sa_handle(hdl, db, FTAG); 2015 return (error); 2016 } 2017 2018 #ifdef _KERNEL 2019 int 2020 zfs_znode_parent_and_name(znode_t *zp, znode_t **dzpp, char *buf) 2021 { 2022 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2023 uint64_t parent; 2024 int is_xattrdir; 2025 int err; 2026 2027 /* Extended attributes should not be visible as regular files. */ 2028 if ((zp->z_pflags & ZFS_XATTR) != 0) 2029 return (SET_ERROR(EINVAL)); 2030 2031 err = zfs_obj_to_pobj(zfsvfs->z_os, zp->z_sa_hdl, zfsvfs->z_attr_table, 2032 &parent, &is_xattrdir); 2033 if (err != 0) 2034 return (err); 2035 ASSERT0(is_xattrdir); 2036 2037 /* No name as this is a root object. */ 2038 if (parent == zp->z_id) 2039 return (SET_ERROR(EINVAL)); 2040 2041 err = zap_value_search(zfsvfs->z_os, parent, zp->z_id, 2042 ZFS_DIRENT_OBJ(-1ULL), buf); 2043 if (err != 0) 2044 return (err); 2045 err = zfs_zget(zfsvfs, parent, dzpp); 2046 return (err); 2047 } 2048 #endif /* _KERNEL */ 2049