1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, 2018 by Delphix. All rights reserved. 24 */ 25 26 /* Portions Copyright 2007 Jeremy Teo */ 27 28 #ifdef _KERNEL 29 #include <sys/types.h> 30 #include <sys/param.h> 31 #include <sys/time.h> 32 #include <sys/sysmacros.h> 33 #include <sys/mntent.h> 34 #include <sys/u8_textprep.h> 35 #include <sys/dsl_dataset.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/file.h> 39 #include <sys/kmem.h> 40 #include <sys/errno.h> 41 #include <sys/atomic.h> 42 #include <sys/zfs_dir.h> 43 #include <sys/zfs_acl.h> 44 #include <sys/zfs_ioctl.h> 45 #include <sys/zfs_rlock.h> 46 #include <sys/zfs_fuid.h> 47 #include <sys/zfs_vnops.h> 48 #include <sys/zfs_ctldir.h> 49 #include <sys/dnode.h> 50 #include <sys/fs/zfs.h> 51 #include <sys/zpl.h> 52 #endif /* _KERNEL */ 53 54 #include <sys/dmu.h> 55 #include <sys/dmu_objset.h> 56 #include <sys/dmu_tx.h> 57 #include <sys/zfs_refcount.h> 58 #include <sys/stat.h> 59 #include <sys/zap.h> 60 #include <sys/zfs_znode.h> 61 #include <sys/sa.h> 62 #include <sys/zfs_sa.h> 63 #include <sys/zfs_stat.h> 64 65 #include "zfs_prop.h" 66 #include "zfs_comutil.h" 67 68 /* 69 * Functions needed for userland (ie: libzpool) are not put under 70 * #ifdef_KERNEL; the rest of the functions have dependencies 71 * (such as VFS logic) that will not compile easily in userland. 72 */ 73 #ifdef _KERNEL 74 75 static kmem_cache_t *znode_cache = NULL; 76 static kmem_cache_t *znode_hold_cache = NULL; 77 unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ; 78 79 /* 80 * This is used by the test suite so that it can delay znodes from being 81 * freed in order to inspect the unlinked set. 82 */ 83 static int zfs_unlink_suspend_progress = 0; 84 85 /* 86 * This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on 87 * z_rangelock. It will modify the offset and length of the lock to reflect 88 * znode-specific information, and convert RL_APPEND to RL_WRITER. This is 89 * called with the rangelock_t's rl_lock held, which avoids races. 90 */ 91 static void 92 zfs_rangelock_cb(zfs_locked_range_t *new, void *arg) 93 { 94 znode_t *zp = arg; 95 96 /* 97 * If in append mode, convert to writer and lock starting at the 98 * current end of file. 99 */ 100 if (new->lr_type == RL_APPEND) { 101 new->lr_offset = zp->z_size; 102 new->lr_type = RL_WRITER; 103 } 104 105 /* 106 * If we need to grow the block size then lock the whole file range. 107 */ 108 uint64_t end_size = MAX(zp->z_size, new->lr_offset + new->lr_length); 109 if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) || 110 zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) { 111 new->lr_offset = 0; 112 new->lr_length = UINT64_MAX; 113 } 114 } 115 116 static int 117 zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) 118 { 119 (void) arg, (void) kmflags; 120 znode_t *zp = buf; 121 122 inode_init_once(ZTOI(zp)); 123 list_link_init(&zp->z_link_node); 124 125 mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL); 126 rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL); 127 rw_init(&zp->z_name_lock, NULL, RW_NOLOCKDEP, NULL); 128 mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); 129 rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL); 130 131 zfs_rangelock_init(&zp->z_rangelock, zfs_rangelock_cb, zp); 132 133 zp->z_dirlocks = NULL; 134 zp->z_acl_cached = NULL; 135 zp->z_xattr_cached = NULL; 136 zp->z_xattr_parent = 0; 137 zp->z_sync_writes_cnt = 0; 138 zp->z_async_writes_cnt = 0; 139 140 return (0); 141 } 142 143 static void 144 zfs_znode_cache_destructor(void *buf, void *arg) 145 { 146 (void) arg; 147 znode_t *zp = buf; 148 149 ASSERT(!list_link_active(&zp->z_link_node)); 150 mutex_destroy(&zp->z_lock); 151 rw_destroy(&zp->z_parent_lock); 152 rw_destroy(&zp->z_name_lock); 153 mutex_destroy(&zp->z_acl_lock); 154 rw_destroy(&zp->z_xattr_lock); 155 zfs_rangelock_fini(&zp->z_rangelock); 156 157 ASSERT3P(zp->z_dirlocks, ==, NULL); 158 ASSERT3P(zp->z_acl_cached, ==, NULL); 159 ASSERT3P(zp->z_xattr_cached, ==, NULL); 160 161 ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt)); 162 ASSERT0(atomic_load_32(&zp->z_async_writes_cnt)); 163 } 164 165 static int 166 zfs_znode_hold_cache_constructor(void *buf, void *arg, int kmflags) 167 { 168 (void) arg, (void) kmflags; 169 znode_hold_t *zh = buf; 170 171 mutex_init(&zh->zh_lock, NULL, MUTEX_DEFAULT, NULL); 172 zh->zh_refcount = 0; 173 174 return (0); 175 } 176 177 static void 178 zfs_znode_hold_cache_destructor(void *buf, void *arg) 179 { 180 (void) arg; 181 znode_hold_t *zh = buf; 182 183 mutex_destroy(&zh->zh_lock); 184 } 185 186 void 187 zfs_znode_init(void) 188 { 189 /* 190 * Initialize zcache. The KMC_SLAB hint is used in order that it be 191 * backed by kmalloc() when on the Linux slab in order that any 192 * wait_on_bit() operations on the related inode operate properly. 193 */ 194 ASSERT(znode_cache == NULL); 195 znode_cache = kmem_cache_create("zfs_znode_cache", 196 sizeof (znode_t), 0, zfs_znode_cache_constructor, 197 zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_SLAB); 198 199 ASSERT(znode_hold_cache == NULL); 200 znode_hold_cache = kmem_cache_create("zfs_znode_hold_cache", 201 sizeof (znode_hold_t), 0, zfs_znode_hold_cache_constructor, 202 zfs_znode_hold_cache_destructor, NULL, NULL, NULL, 0); 203 } 204 205 void 206 zfs_znode_fini(void) 207 { 208 /* 209 * Cleanup zcache 210 */ 211 if (znode_cache) 212 kmem_cache_destroy(znode_cache); 213 znode_cache = NULL; 214 215 if (znode_hold_cache) 216 kmem_cache_destroy(znode_hold_cache); 217 znode_hold_cache = NULL; 218 } 219 220 /* 221 * The zfs_znode_hold_enter() / zfs_znode_hold_exit() functions are used to 222 * serialize access to a znode and its SA buffer while the object is being 223 * created or destroyed. This kind of locking would normally reside in the 224 * znode itself but in this case that's impossible because the znode and SA 225 * buffer may not yet exist. Therefore the locking is handled externally 226 * with an array of mutexes and AVLs trees which contain per-object locks. 227 * 228 * In zfs_znode_hold_enter() a per-object lock is created as needed, inserted 229 * in to the correct AVL tree and finally the per-object lock is held. In 230 * zfs_znode_hold_exit() the process is reversed. The per-object lock is 231 * released, removed from the AVL tree and destroyed if there are no waiters. 232 * 233 * This scheme has two important properties: 234 * 235 * 1) No memory allocations are performed while holding one of the z_hold_locks. 236 * This ensures evict(), which can be called from direct memory reclaim, will 237 * never block waiting on a z_hold_locks which just happens to have hashed 238 * to the same index. 239 * 240 * 2) All locks used to serialize access to an object are per-object and never 241 * shared. This minimizes lock contention without creating a large number 242 * of dedicated locks. 243 * 244 * On the downside it does require znode_lock_t structures to be frequently 245 * allocated and freed. However, because these are backed by a kmem cache 246 * and very short lived this cost is minimal. 247 */ 248 int 249 zfs_znode_hold_compare(const void *a, const void *b) 250 { 251 const znode_hold_t *zh_a = (const znode_hold_t *)a; 252 const znode_hold_t *zh_b = (const znode_hold_t *)b; 253 254 return (TREE_CMP(zh_a->zh_obj, zh_b->zh_obj)); 255 } 256 257 static boolean_t __maybe_unused 258 zfs_znode_held(zfsvfs_t *zfsvfs, uint64_t obj) 259 { 260 znode_hold_t *zh, search; 261 int i = ZFS_OBJ_HASH(zfsvfs, obj); 262 boolean_t held; 263 264 search.zh_obj = obj; 265 266 mutex_enter(&zfsvfs->z_hold_locks[i]); 267 zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL); 268 held = (zh && MUTEX_HELD(&zh->zh_lock)) ? B_TRUE : B_FALSE; 269 mutex_exit(&zfsvfs->z_hold_locks[i]); 270 271 return (held); 272 } 273 274 znode_hold_t * 275 zfs_znode_hold_enter(zfsvfs_t *zfsvfs, uint64_t obj) 276 { 277 znode_hold_t *zh, *zh_new, search; 278 int i = ZFS_OBJ_HASH(zfsvfs, obj); 279 boolean_t found = B_FALSE; 280 281 zh_new = kmem_cache_alloc(znode_hold_cache, KM_SLEEP); 282 search.zh_obj = obj; 283 284 mutex_enter(&zfsvfs->z_hold_locks[i]); 285 zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL); 286 if (likely(zh == NULL)) { 287 zh = zh_new; 288 zh->zh_obj = obj; 289 avl_add(&zfsvfs->z_hold_trees[i], zh); 290 } else { 291 ASSERT3U(zh->zh_obj, ==, obj); 292 found = B_TRUE; 293 } 294 zh->zh_refcount++; 295 ASSERT3S(zh->zh_refcount, >, 0); 296 mutex_exit(&zfsvfs->z_hold_locks[i]); 297 298 if (found == B_TRUE) 299 kmem_cache_free(znode_hold_cache, zh_new); 300 301 ASSERT(MUTEX_NOT_HELD(&zh->zh_lock)); 302 mutex_enter(&zh->zh_lock); 303 304 return (zh); 305 } 306 307 void 308 zfs_znode_hold_exit(zfsvfs_t *zfsvfs, znode_hold_t *zh) 309 { 310 int i = ZFS_OBJ_HASH(zfsvfs, zh->zh_obj); 311 boolean_t remove = B_FALSE; 312 313 ASSERT(zfs_znode_held(zfsvfs, zh->zh_obj)); 314 mutex_exit(&zh->zh_lock); 315 316 mutex_enter(&zfsvfs->z_hold_locks[i]); 317 ASSERT3S(zh->zh_refcount, >, 0); 318 if (--zh->zh_refcount == 0) { 319 avl_remove(&zfsvfs->z_hold_trees[i], zh); 320 remove = B_TRUE; 321 } 322 mutex_exit(&zfsvfs->z_hold_locks[i]); 323 324 if (remove == B_TRUE) 325 kmem_cache_free(znode_hold_cache, zh); 326 } 327 328 dev_t 329 zfs_cmpldev(uint64_t dev) 330 { 331 return (dev); 332 } 333 334 static void 335 zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp, 336 dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl) 337 { 338 ASSERT(zfs_znode_held(zfsvfs, zp->z_id)); 339 340 mutex_enter(&zp->z_lock); 341 342 ASSERT(zp->z_sa_hdl == NULL); 343 ASSERT(zp->z_acl_cached == NULL); 344 if (sa_hdl == NULL) { 345 VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp, 346 SA_HDL_SHARED, &zp->z_sa_hdl)); 347 } else { 348 zp->z_sa_hdl = sa_hdl; 349 sa_set_userp(sa_hdl, zp); 350 } 351 352 zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE; 353 354 mutex_exit(&zp->z_lock); 355 } 356 357 void 358 zfs_znode_dmu_fini(znode_t *zp) 359 { 360 ASSERT(zfs_znode_held(ZTOZSB(zp), zp->z_id) || 361 RW_WRITE_HELD(&ZTOZSB(zp)->z_teardown_inactive_lock)); 362 363 sa_handle_destroy(zp->z_sa_hdl); 364 zp->z_sa_hdl = NULL; 365 } 366 367 /* 368 * Called by new_inode() to allocate a new inode. 369 */ 370 int 371 zfs_inode_alloc(struct super_block *sb, struct inode **ip) 372 { 373 znode_t *zp; 374 375 zp = kmem_cache_alloc(znode_cache, KM_SLEEP); 376 *ip = ZTOI(zp); 377 378 return (0); 379 } 380 381 /* 382 * Called in multiple places when an inode should be destroyed. 383 */ 384 void 385 zfs_inode_destroy(struct inode *ip) 386 { 387 znode_t *zp = ITOZ(ip); 388 zfsvfs_t *zfsvfs = ZTOZSB(zp); 389 390 mutex_enter(&zfsvfs->z_znodes_lock); 391 if (list_link_active(&zp->z_link_node)) { 392 list_remove(&zfsvfs->z_all_znodes, zp); 393 } 394 mutex_exit(&zfsvfs->z_znodes_lock); 395 396 if (zp->z_acl_cached) { 397 zfs_acl_free(zp->z_acl_cached); 398 zp->z_acl_cached = NULL; 399 } 400 401 if (zp->z_xattr_cached) { 402 nvlist_free(zp->z_xattr_cached); 403 zp->z_xattr_cached = NULL; 404 } 405 406 kmem_cache_free(znode_cache, zp); 407 } 408 409 static void 410 zfs_inode_set_ops(zfsvfs_t *zfsvfs, struct inode *ip) 411 { 412 uint64_t rdev = 0; 413 414 switch (ip->i_mode & S_IFMT) { 415 case S_IFREG: 416 ip->i_op = &zpl_inode_operations; 417 #ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND 418 ip->i_fop = &zpl_file_operations.kabi_fops; 419 #else 420 ip->i_fop = &zpl_file_operations; 421 #endif 422 ip->i_mapping->a_ops = &zpl_address_space_operations; 423 break; 424 425 case S_IFDIR: 426 #ifdef HAVE_RENAME2_OPERATIONS_WRAPPER 427 ip->i_flags |= S_IOPS_WRAPPER; 428 ip->i_op = &zpl_dir_inode_operations.ops; 429 #else 430 ip->i_op = &zpl_dir_inode_operations; 431 #endif 432 ip->i_fop = &zpl_dir_file_operations; 433 ITOZ(ip)->z_zn_prefetch = B_TRUE; 434 break; 435 436 case S_IFLNK: 437 ip->i_op = &zpl_symlink_inode_operations; 438 break; 439 440 /* 441 * rdev is only stored in a SA only for device files. 442 */ 443 case S_IFCHR: 444 case S_IFBLK: 445 (void) sa_lookup(ITOZ(ip)->z_sa_hdl, SA_ZPL_RDEV(zfsvfs), &rdev, 446 sizeof (rdev)); 447 zfs_fallthrough; 448 case S_IFIFO: 449 case S_IFSOCK: 450 init_special_inode(ip, ip->i_mode, rdev); 451 ip->i_op = &zpl_special_inode_operations; 452 break; 453 454 default: 455 zfs_panic_recover("inode %llu has invalid mode: 0x%x\n", 456 (u_longlong_t)ip->i_ino, ip->i_mode); 457 458 /* Assume the inode is a file and attempt to continue */ 459 ip->i_mode = S_IFREG | 0644; 460 ip->i_op = &zpl_inode_operations; 461 #ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND 462 ip->i_fop = &zpl_file_operations.kabi_fops; 463 #else 464 ip->i_fop = &zpl_file_operations; 465 #endif 466 ip->i_mapping->a_ops = &zpl_address_space_operations; 467 break; 468 } 469 } 470 471 static void 472 zfs_set_inode_flags(znode_t *zp, struct inode *ip) 473 { 474 /* 475 * Linux and Solaris have different sets of file attributes, so we 476 * restrict this conversion to the intersection of the two. 477 */ 478 #ifdef HAVE_INODE_SET_FLAGS 479 unsigned int flags = 0; 480 if (zp->z_pflags & ZFS_IMMUTABLE) 481 flags |= S_IMMUTABLE; 482 if (zp->z_pflags & ZFS_APPENDONLY) 483 flags |= S_APPEND; 484 485 inode_set_flags(ip, flags, S_IMMUTABLE|S_APPEND); 486 #else 487 if (zp->z_pflags & ZFS_IMMUTABLE) 488 ip->i_flags |= S_IMMUTABLE; 489 else 490 ip->i_flags &= ~S_IMMUTABLE; 491 492 if (zp->z_pflags & ZFS_APPENDONLY) 493 ip->i_flags |= S_APPEND; 494 else 495 ip->i_flags &= ~S_APPEND; 496 #endif 497 } 498 499 /* 500 * Update the embedded inode given the znode. 501 */ 502 void 503 zfs_znode_update_vfs(znode_t *zp) 504 { 505 struct inode *ip; 506 uint32_t blksize; 507 u_longlong_t i_blocks; 508 509 ASSERT(zp != NULL); 510 ip = ZTOI(zp); 511 512 /* Skip .zfs control nodes which do not exist on disk. */ 513 if (zfsctl_is_node(ip)) 514 return; 515 516 dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize, &i_blocks); 517 518 spin_lock(&ip->i_lock); 519 ip->i_mode = zp->z_mode; 520 ip->i_blocks = i_blocks; 521 i_size_write(ip, zp->z_size); 522 spin_unlock(&ip->i_lock); 523 } 524 525 526 /* 527 * Construct a znode+inode and initialize. 528 * 529 * This does not do a call to dmu_set_user() that is 530 * up to the caller to do, in case you don't want to 531 * return the znode 532 */ 533 static znode_t * 534 zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, 535 dmu_object_type_t obj_type, sa_handle_t *hdl) 536 { 537 znode_t *zp; 538 struct inode *ip; 539 uint64_t mode; 540 uint64_t parent; 541 uint64_t tmp_gen; 542 uint64_t links; 543 uint64_t z_uid, z_gid; 544 uint64_t atime[2], mtime[2], ctime[2], btime[2]; 545 inode_timespec_t tmp_ts; 546 uint64_t projid = ZFS_DEFAULT_PROJID; 547 sa_bulk_attr_t bulk[12]; 548 int count = 0; 549 550 ASSERT(zfsvfs != NULL); 551 552 ip = new_inode(zfsvfs->z_sb); 553 if (ip == NULL) 554 return (NULL); 555 556 zp = ITOZ(ip); 557 ASSERT(zp->z_dirlocks == NULL); 558 ASSERT3P(zp->z_acl_cached, ==, NULL); 559 ASSERT3P(zp->z_xattr_cached, ==, NULL); 560 zp->z_unlinked = B_FALSE; 561 zp->z_atime_dirty = B_FALSE; 562 #if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE) 563 zp->z_is_mapped = B_FALSE; 564 #endif 565 zp->z_is_ctldir = B_FALSE; 566 zp->z_suspended = B_FALSE; 567 zp->z_sa_hdl = NULL; 568 zp->z_mapcnt = 0; 569 zp->z_id = db->db_object; 570 zp->z_blksz = blksz; 571 zp->z_seq = 0x7A4653; 572 zp->z_sync_cnt = 0; 573 zp->z_sync_writes_cnt = 0; 574 zp->z_async_writes_cnt = 0; 575 576 zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl); 577 578 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); 579 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &tmp_gen, 8); 580 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 581 &zp->z_size, 8); 582 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8); 583 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 584 &zp->z_pflags, 8); 585 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, 586 &parent, 8); 587 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &z_uid, 8); 588 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &z_gid, 8); 589 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16); 590 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 591 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 592 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &btime, 16); 593 594 if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || tmp_gen == 0 || 595 (dmu_objset_projectquota_enabled(zfsvfs->z_os) && 596 (zp->z_pflags & ZFS_PROJID) && 597 sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs), &projid, 8) != 0)) { 598 if (hdl == NULL) 599 sa_handle_destroy(zp->z_sa_hdl); 600 zp->z_sa_hdl = NULL; 601 goto error; 602 } 603 604 zp->z_projid = projid; 605 zp->z_mode = ip->i_mode = mode; 606 ip->i_generation = (uint32_t)tmp_gen; 607 ip->i_blkbits = SPA_MINBLOCKSHIFT; 608 set_nlink(ip, (uint32_t)links); 609 zfs_uid_write(ip, z_uid); 610 zfs_gid_write(ip, z_gid); 611 zfs_set_inode_flags(zp, ip); 612 613 /* Cache the xattr parent id */ 614 if (zp->z_pflags & ZFS_XATTR) 615 zp->z_xattr_parent = parent; 616 617 ZFS_TIME_DECODE(&tmp_ts, atime); 618 zpl_inode_set_atime_to_ts(ip, tmp_ts); 619 ZFS_TIME_DECODE(&tmp_ts, mtime); 620 zpl_inode_set_mtime_to_ts(ip, tmp_ts); 621 ZFS_TIME_DECODE(&tmp_ts, ctime); 622 zpl_inode_set_ctime_to_ts(ip, tmp_ts); 623 ZFS_TIME_DECODE(&zp->z_btime, btime); 624 625 ip->i_ino = zp->z_id; 626 zfs_znode_update_vfs(zp); 627 zfs_inode_set_ops(zfsvfs, ip); 628 629 /* 630 * The only way insert_inode_locked() can fail is if the ip->i_ino 631 * number is already hashed for this super block. This can never 632 * happen because the inode numbers map 1:1 with the object numbers. 633 * 634 * Exceptions include rolling back a mounted file system, either 635 * from the zfs rollback or zfs recv command. 636 * 637 * Active inodes are unhashed during the rollback, but since zrele 638 * can happen asynchronously, we can't guarantee they've been 639 * unhashed. This can cause hash collisions in unlinked drain 640 * processing so do not hash unlinked znodes. 641 */ 642 if (links > 0) 643 VERIFY3S(insert_inode_locked(ip), ==, 0); 644 645 mutex_enter(&zfsvfs->z_znodes_lock); 646 list_insert_tail(&zfsvfs->z_all_znodes, zp); 647 mutex_exit(&zfsvfs->z_znodes_lock); 648 649 if (links > 0) 650 unlock_new_inode(ip); 651 return (zp); 652 653 error: 654 iput(ip); 655 return (NULL); 656 } 657 658 /* 659 * Safely mark an inode dirty. Inodes which are part of a read-only 660 * file system or snapshot may not be dirtied. 661 */ 662 void 663 zfs_mark_inode_dirty(struct inode *ip) 664 { 665 zfsvfs_t *zfsvfs = ITOZSB(ip); 666 667 if (zfs_is_readonly(zfsvfs) || dmu_objset_is_snapshot(zfsvfs->z_os)) 668 return; 669 670 mark_inode_dirty(ip); 671 } 672 673 static uint64_t empty_xattr; 674 static uint64_t pad[4]; 675 static zfs_acl_phys_t acl_phys; 676 /* 677 * Create a new DMU object to hold a zfs znode. 678 * 679 * IN: dzp - parent directory for new znode 680 * vap - file attributes for new znode 681 * tx - dmu transaction id for zap operations 682 * cr - credentials of caller 683 * flag - flags: 684 * IS_ROOT_NODE - new object will be root 685 * IS_TMPFILE - new object is of O_TMPFILE 686 * IS_XATTR - new object is an attribute 687 * acl_ids - ACL related attributes 688 * 689 * OUT: zpp - allocated znode (set to dzp if IS_ROOT_NODE) 690 * 691 */ 692 void 693 zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, 694 uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids) 695 { 696 uint64_t crtime[2], atime[2], mtime[2], ctime[2]; 697 uint64_t mode, size, links, parent, pflags; 698 uint64_t projid = ZFS_DEFAULT_PROJID; 699 uint64_t rdev = 0; 700 zfsvfs_t *zfsvfs = ZTOZSB(dzp); 701 dmu_buf_t *db; 702 inode_timespec_t now; 703 uint64_t gen, obj; 704 int bonuslen; 705 int dnodesize; 706 sa_handle_t *sa_hdl; 707 dmu_object_type_t obj_type; 708 sa_bulk_attr_t *sa_attrs; 709 int cnt = 0; 710 zfs_acl_locator_cb_t locate = { 0 }; 711 znode_hold_t *zh; 712 713 if (zfsvfs->z_replay) { 714 obj = vap->va_nodeid; 715 now = vap->va_ctime; /* see zfs_replay_create() */ 716 gen = vap->va_nblocks; /* ditto */ 717 dnodesize = vap->va_fsid; /* ditto */ 718 } else { 719 obj = 0; 720 gethrestime(&now); 721 gen = dmu_tx_get_txg(tx); 722 dnodesize = dmu_objset_dnodesize(zfsvfs->z_os); 723 } 724 725 if (dnodesize == 0) 726 dnodesize = DNODE_MIN_SIZE; 727 728 obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE; 729 730 bonuslen = (obj_type == DMU_OT_SA) ? 731 DN_BONUS_SIZE(dnodesize) : ZFS_OLD_ZNODE_PHYS_SIZE; 732 733 /* 734 * Create a new DMU object. 735 */ 736 /* 737 * There's currently no mechanism for pre-reading the blocks that will 738 * be needed to allocate a new object, so we accept the small chance 739 * that there will be an i/o error and we will fail one of the 740 * assertions below. 741 */ 742 if (S_ISDIR(vap->va_mode)) { 743 if (zfsvfs->z_replay) { 744 VERIFY0(zap_create_claim_norm_dnsize(zfsvfs->z_os, obj, 745 zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, 746 obj_type, bonuslen, dnodesize, tx)); 747 } else { 748 obj = zap_create_norm_dnsize(zfsvfs->z_os, 749 zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, 750 obj_type, bonuslen, dnodesize, tx); 751 } 752 } else { 753 if (zfsvfs->z_replay) { 754 VERIFY0(dmu_object_claim_dnsize(zfsvfs->z_os, obj, 755 DMU_OT_PLAIN_FILE_CONTENTS, 0, 756 obj_type, bonuslen, dnodesize, tx)); 757 } else { 758 obj = dmu_object_alloc_dnsize(zfsvfs->z_os, 759 DMU_OT_PLAIN_FILE_CONTENTS, 0, 760 obj_type, bonuslen, dnodesize, tx); 761 } 762 } 763 764 zh = zfs_znode_hold_enter(zfsvfs, obj); 765 VERIFY0(sa_buf_hold(zfsvfs->z_os, obj, NULL, &db)); 766 767 /* 768 * If this is the root, fix up the half-initialized parent pointer 769 * to reference the just-allocated physical data area. 770 */ 771 if (flag & IS_ROOT_NODE) { 772 dzp->z_id = obj; 773 } 774 775 /* 776 * If parent is an xattr, so am I. 777 */ 778 if (dzp->z_pflags & ZFS_XATTR) { 779 flag |= IS_XATTR; 780 } 781 782 if (zfsvfs->z_use_fuids) 783 pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; 784 else 785 pflags = 0; 786 787 if (S_ISDIR(vap->va_mode)) { 788 size = 2; /* contents ("." and "..") */ 789 links = 2; 790 } else { 791 size = 0; 792 links = (flag & IS_TMPFILE) ? 0 : 1; 793 } 794 795 if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode)) 796 rdev = vap->va_rdev; 797 798 parent = dzp->z_id; 799 mode = acl_ids->z_mode; 800 if (flag & IS_XATTR) 801 pflags |= ZFS_XATTR; 802 803 if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode)) { 804 /* 805 * With ZFS_PROJID flag, we can easily know whether there is 806 * project ID stored on disk or not. See zfs_space_delta_cb(). 807 */ 808 if (obj_type != DMU_OT_ZNODE && 809 dmu_objset_projectquota_enabled(zfsvfs->z_os)) 810 pflags |= ZFS_PROJID; 811 812 /* 813 * Inherit project ID from parent if required. 814 */ 815 projid = zfs_inherit_projid(dzp); 816 if (dzp->z_pflags & ZFS_PROJINHERIT) 817 pflags |= ZFS_PROJINHERIT; 818 } 819 820 /* 821 * No execs denied will be determined when zfs_mode_compute() is called. 822 */ 823 pflags |= acl_ids->z_aclp->z_hints & 824 (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT| 825 ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED); 826 827 ZFS_TIME_ENCODE(&now, crtime); 828 ZFS_TIME_ENCODE(&now, ctime); 829 830 if (vap->va_mask & ATTR_ATIME) { 831 ZFS_TIME_ENCODE(&vap->va_atime, atime); 832 } else { 833 ZFS_TIME_ENCODE(&now, atime); 834 } 835 836 if (vap->va_mask & ATTR_MTIME) { 837 ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 838 } else { 839 ZFS_TIME_ENCODE(&now, mtime); 840 } 841 842 /* Now add in all of the "SA" attributes */ 843 VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED, 844 &sa_hdl)); 845 846 /* 847 * Setup the array of attributes to be replaced/set on the new file 848 * 849 * order for DMU_OT_ZNODE is critical since it needs to be constructed 850 * in the old znode_phys_t format. Don't change this ordering 851 */ 852 sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP); 853 854 if (obj_type == DMU_OT_ZNODE) { 855 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), 856 NULL, &atime, 16); 857 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), 858 NULL, &mtime, 16); 859 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), 860 NULL, &ctime, 16); 861 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), 862 NULL, &crtime, 16); 863 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), 864 NULL, &gen, 8); 865 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), 866 NULL, &mode, 8); 867 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), 868 NULL, &size, 8); 869 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), 870 NULL, &parent, 8); 871 } else { 872 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), 873 NULL, &mode, 8); 874 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), 875 NULL, &size, 8); 876 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), 877 NULL, &gen, 8); 878 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), 879 NULL, &acl_ids->z_fuid, 8); 880 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), 881 NULL, &acl_ids->z_fgid, 8); 882 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), 883 NULL, &parent, 8); 884 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), 885 NULL, &pflags, 8); 886 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), 887 NULL, &atime, 16); 888 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), 889 NULL, &mtime, 16); 890 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), 891 NULL, &ctime, 16); 892 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), 893 NULL, &crtime, 16); 894 } 895 896 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8); 897 898 if (obj_type == DMU_OT_ZNODE) { 899 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL, 900 &empty_xattr, 8); 901 } else if (dmu_objset_projectquota_enabled(zfsvfs->z_os) && 902 pflags & ZFS_PROJID) { 903 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PROJID(zfsvfs), 904 NULL, &projid, 8); 905 } 906 if (obj_type == DMU_OT_ZNODE || 907 (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) { 908 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs), 909 NULL, &rdev, 8); 910 } 911 if (obj_type == DMU_OT_ZNODE) { 912 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), 913 NULL, &pflags, 8); 914 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL, 915 &acl_ids->z_fuid, 8); 916 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL, 917 &acl_ids->z_fgid, 8); 918 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad, 919 sizeof (uint64_t) * 4); 920 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL, 921 &acl_phys, sizeof (zfs_acl_phys_t)); 922 } else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) { 923 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL, 924 &acl_ids->z_aclp->z_acl_count, 8); 925 locate.cb_aclp = acl_ids->z_aclp; 926 SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs), 927 zfs_acl_data_locator, &locate, 928 acl_ids->z_aclp->z_acl_bytes); 929 mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags, 930 acl_ids->z_fuid, acl_ids->z_fgid); 931 } 932 933 VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0); 934 935 if (!(flag & IS_ROOT_NODE)) { 936 /* 937 * The call to zfs_znode_alloc() may fail if memory is low 938 * via the call path: alloc_inode() -> inode_init_always() -> 939 * security_inode_alloc() -> inode_alloc_security(). Since 940 * the existing code is written such that zfs_mknode() can 941 * not fail retry until sufficient memory has been reclaimed. 942 */ 943 do { 944 *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl); 945 } while (*zpp == NULL); 946 947 VERIFY(*zpp != NULL); 948 VERIFY(dzp != NULL); 949 } else { 950 /* 951 * If we are creating the root node, the "parent" we 952 * passed in is the znode for the root. 953 */ 954 *zpp = dzp; 955 956 (*zpp)->z_sa_hdl = sa_hdl; 957 } 958 959 (*zpp)->z_pflags = pflags; 960 (*zpp)->z_mode = ZTOI(*zpp)->i_mode = mode; 961 (*zpp)->z_dnodesize = dnodesize; 962 (*zpp)->z_projid = projid; 963 964 if (obj_type == DMU_OT_ZNODE || 965 acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) { 966 VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx)); 967 } 968 kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END); 969 zfs_znode_hold_exit(zfsvfs, zh); 970 } 971 972 /* 973 * Update in-core attributes. It is assumed the caller will be doing an 974 * sa_bulk_update to push the changes out. 975 */ 976 void 977 zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx) 978 { 979 xoptattr_t *xoap; 980 boolean_t update_inode = B_FALSE; 981 982 xoap = xva_getxoptattr(xvap); 983 ASSERT(xoap); 984 985 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 986 uint64_t times[2]; 987 ZFS_TIME_ENCODE(&xoap->xoa_createtime, times); 988 (void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)), 989 ×, sizeof (times), tx); 990 XVA_SET_RTN(xvap, XAT_CREATETIME); 991 } 992 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 993 ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly, 994 zp->z_pflags, tx); 995 XVA_SET_RTN(xvap, XAT_READONLY); 996 } 997 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 998 ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden, 999 zp->z_pflags, tx); 1000 XVA_SET_RTN(xvap, XAT_HIDDEN); 1001 } 1002 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 1003 ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system, 1004 zp->z_pflags, tx); 1005 XVA_SET_RTN(xvap, XAT_SYSTEM); 1006 } 1007 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 1008 ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive, 1009 zp->z_pflags, tx); 1010 XVA_SET_RTN(xvap, XAT_ARCHIVE); 1011 } 1012 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 1013 ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable, 1014 zp->z_pflags, tx); 1015 XVA_SET_RTN(xvap, XAT_IMMUTABLE); 1016 1017 update_inode = B_TRUE; 1018 } 1019 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 1020 ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink, 1021 zp->z_pflags, tx); 1022 XVA_SET_RTN(xvap, XAT_NOUNLINK); 1023 } 1024 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 1025 ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly, 1026 zp->z_pflags, tx); 1027 XVA_SET_RTN(xvap, XAT_APPENDONLY); 1028 1029 update_inode = B_TRUE; 1030 } 1031 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 1032 ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump, 1033 zp->z_pflags, tx); 1034 XVA_SET_RTN(xvap, XAT_NODUMP); 1035 } 1036 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 1037 ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque, 1038 zp->z_pflags, tx); 1039 XVA_SET_RTN(xvap, XAT_OPAQUE); 1040 } 1041 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 1042 ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED, 1043 xoap->xoa_av_quarantined, zp->z_pflags, tx); 1044 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 1045 } 1046 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 1047 ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified, 1048 zp->z_pflags, tx); 1049 XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 1050 } 1051 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { 1052 zfs_sa_set_scanstamp(zp, xvap, tx); 1053 XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); 1054 } 1055 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 1056 ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse, 1057 zp->z_pflags, tx); 1058 XVA_SET_RTN(xvap, XAT_REPARSE); 1059 } 1060 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 1061 ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline, 1062 zp->z_pflags, tx); 1063 XVA_SET_RTN(xvap, XAT_OFFLINE); 1064 } 1065 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 1066 ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse, 1067 zp->z_pflags, tx); 1068 XVA_SET_RTN(xvap, XAT_SPARSE); 1069 } 1070 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) { 1071 ZFS_ATTR_SET(zp, ZFS_PROJINHERIT, xoap->xoa_projinherit, 1072 zp->z_pflags, tx); 1073 XVA_SET_RTN(xvap, XAT_PROJINHERIT); 1074 } 1075 1076 if (update_inode) 1077 zfs_set_inode_flags(zp, ZTOI(zp)); 1078 } 1079 1080 int 1081 zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) 1082 { 1083 dmu_object_info_t doi; 1084 dmu_buf_t *db; 1085 znode_t *zp; 1086 znode_hold_t *zh; 1087 int err; 1088 sa_handle_t *hdl; 1089 1090 *zpp = NULL; 1091 1092 again: 1093 zh = zfs_znode_hold_enter(zfsvfs, obj_num); 1094 1095 err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); 1096 if (err) { 1097 zfs_znode_hold_exit(zfsvfs, zh); 1098 return (err); 1099 } 1100 1101 dmu_object_info_from_db(db, &doi); 1102 if (doi.doi_bonus_type != DMU_OT_SA && 1103 (doi.doi_bonus_type != DMU_OT_ZNODE || 1104 (doi.doi_bonus_type == DMU_OT_ZNODE && 1105 doi.doi_bonus_size < sizeof (znode_phys_t)))) { 1106 sa_buf_rele(db, NULL); 1107 zfs_znode_hold_exit(zfsvfs, zh); 1108 return (SET_ERROR(EINVAL)); 1109 } 1110 1111 hdl = dmu_buf_get_user(db); 1112 if (hdl != NULL) { 1113 zp = sa_get_userdata(hdl); 1114 1115 1116 /* 1117 * Since "SA" does immediate eviction we 1118 * should never find a sa handle that doesn't 1119 * know about the znode. 1120 */ 1121 1122 ASSERT3P(zp, !=, NULL); 1123 1124 mutex_enter(&zp->z_lock); 1125 ASSERT3U(zp->z_id, ==, obj_num); 1126 /* 1127 * If zp->z_unlinked is set, the znode is already marked 1128 * for deletion and should not be discovered. Check this 1129 * after checking igrab() due to fsetxattr() & O_TMPFILE. 1130 * 1131 * If igrab() returns NULL the VFS has independently 1132 * determined the inode should be evicted and has 1133 * called iput_final() to start the eviction process. 1134 * The SA handle is still valid but because the VFS 1135 * requires that the eviction succeed we must drop 1136 * our locks and references to allow the eviction to 1137 * complete. The zfs_zget() may then be retried. 1138 * 1139 * This unlikely case could be optimized by registering 1140 * a sops->drop_inode() callback. The callback would 1141 * need to detect the active SA hold thereby informing 1142 * the VFS that this inode should not be evicted. 1143 */ 1144 if (igrab(ZTOI(zp)) == NULL) { 1145 if (zp->z_unlinked) 1146 err = SET_ERROR(ENOENT); 1147 else 1148 err = SET_ERROR(EAGAIN); 1149 } else { 1150 *zpp = zp; 1151 err = 0; 1152 } 1153 1154 mutex_exit(&zp->z_lock); 1155 sa_buf_rele(db, NULL); 1156 zfs_znode_hold_exit(zfsvfs, zh); 1157 1158 if (err == EAGAIN) { 1159 /* inode might need this to finish evict */ 1160 cond_resched(); 1161 goto again; 1162 } 1163 return (err); 1164 } 1165 1166 /* 1167 * Not found create new znode/vnode but only if file exists. 1168 * 1169 * There is a small window where zfs_vget() could 1170 * find this object while a file create is still in 1171 * progress. This is checked for in zfs_znode_alloc() 1172 * 1173 * if zfs_znode_alloc() fails it will drop the hold on the 1174 * bonus buffer. 1175 */ 1176 zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size, 1177 doi.doi_bonus_type, NULL); 1178 if (zp == NULL) { 1179 err = SET_ERROR(ENOENT); 1180 } else { 1181 *zpp = zp; 1182 } 1183 zfs_znode_hold_exit(zfsvfs, zh); 1184 return (err); 1185 } 1186 1187 int 1188 zfs_rezget(znode_t *zp) 1189 { 1190 zfsvfs_t *zfsvfs = ZTOZSB(zp); 1191 dmu_object_info_t doi; 1192 dmu_buf_t *db; 1193 uint64_t obj_num = zp->z_id; 1194 uint64_t mode; 1195 uint64_t links; 1196 sa_bulk_attr_t bulk[11]; 1197 int err; 1198 int count = 0; 1199 uint64_t gen; 1200 uint64_t z_uid, z_gid; 1201 uint64_t atime[2], mtime[2], ctime[2], btime[2]; 1202 inode_timespec_t tmp_ts; 1203 uint64_t projid = ZFS_DEFAULT_PROJID; 1204 znode_hold_t *zh; 1205 1206 /* 1207 * skip ctldir, otherwise they will always get invalidated. This will 1208 * cause funny behaviour for the mounted snapdirs. Especially for 1209 * Linux >= 3.18, d_invalidate will detach the mountpoint and prevent 1210 * anyone automount it again as long as someone is still using the 1211 * detached mount. 1212 */ 1213 if (zp->z_is_ctldir) 1214 return (0); 1215 1216 zh = zfs_znode_hold_enter(zfsvfs, obj_num); 1217 1218 mutex_enter(&zp->z_acl_lock); 1219 if (zp->z_acl_cached) { 1220 zfs_acl_free(zp->z_acl_cached); 1221 zp->z_acl_cached = NULL; 1222 } 1223 mutex_exit(&zp->z_acl_lock); 1224 1225 rw_enter(&zp->z_xattr_lock, RW_WRITER); 1226 if (zp->z_xattr_cached) { 1227 nvlist_free(zp->z_xattr_cached); 1228 zp->z_xattr_cached = NULL; 1229 } 1230 rw_exit(&zp->z_xattr_lock); 1231 1232 ASSERT(zp->z_sa_hdl == NULL); 1233 err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); 1234 if (err) { 1235 zfs_znode_hold_exit(zfsvfs, zh); 1236 return (err); 1237 } 1238 1239 dmu_object_info_from_db(db, &doi); 1240 if (doi.doi_bonus_type != DMU_OT_SA && 1241 (doi.doi_bonus_type != DMU_OT_ZNODE || 1242 (doi.doi_bonus_type == DMU_OT_ZNODE && 1243 doi.doi_bonus_size < sizeof (znode_phys_t)))) { 1244 sa_buf_rele(db, NULL); 1245 zfs_znode_hold_exit(zfsvfs, zh); 1246 return (SET_ERROR(EINVAL)); 1247 } 1248 1249 zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL); 1250 1251 /* reload cached values */ 1252 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, 1253 &gen, sizeof (gen)); 1254 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 1255 &zp->z_size, sizeof (zp->z_size)); 1256 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 1257 &links, sizeof (links)); 1258 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 1259 &zp->z_pflags, sizeof (zp->z_pflags)); 1260 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 1261 &z_uid, sizeof (z_uid)); 1262 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, 1263 &z_gid, sizeof (z_gid)); 1264 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 1265 &mode, sizeof (mode)); 1266 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 1267 &atime, 16); 1268 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 1269 &mtime, 16); 1270 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 1271 &ctime, 16); 1272 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &btime, 16); 1273 1274 if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) { 1275 zfs_znode_dmu_fini(zp); 1276 zfs_znode_hold_exit(zfsvfs, zh); 1277 return (SET_ERROR(EIO)); 1278 } 1279 1280 if (dmu_objset_projectquota_enabled(zfsvfs->z_os)) { 1281 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs), 1282 &projid, 8); 1283 if (err != 0 && err != ENOENT) { 1284 zfs_znode_dmu_fini(zp); 1285 zfs_znode_hold_exit(zfsvfs, zh); 1286 return (SET_ERROR(err)); 1287 } 1288 } 1289 1290 zp->z_projid = projid; 1291 zp->z_mode = ZTOI(zp)->i_mode = mode; 1292 zfs_uid_write(ZTOI(zp), z_uid); 1293 zfs_gid_write(ZTOI(zp), z_gid); 1294 1295 ZFS_TIME_DECODE(&tmp_ts, atime); 1296 zpl_inode_set_atime_to_ts(ZTOI(zp), tmp_ts); 1297 ZFS_TIME_DECODE(&tmp_ts, mtime); 1298 zpl_inode_set_mtime_to_ts(ZTOI(zp), tmp_ts); 1299 ZFS_TIME_DECODE(&tmp_ts, ctime); 1300 zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ts); 1301 ZFS_TIME_DECODE(&zp->z_btime, btime); 1302 1303 if ((uint32_t)gen != ZTOI(zp)->i_generation) { 1304 zfs_znode_dmu_fini(zp); 1305 zfs_znode_hold_exit(zfsvfs, zh); 1306 return (SET_ERROR(EIO)); 1307 } 1308 1309 set_nlink(ZTOI(zp), (uint32_t)links); 1310 zfs_set_inode_flags(zp, ZTOI(zp)); 1311 1312 zp->z_blksz = doi.doi_data_block_size; 1313 zp->z_atime_dirty = B_FALSE; 1314 zfs_znode_update_vfs(zp); 1315 1316 /* 1317 * If the file has zero links, then it has been unlinked on the send 1318 * side and it must be in the received unlinked set. 1319 * We call zfs_znode_dmu_fini() now to prevent any accesses to the 1320 * stale data and to prevent automatic removal of the file in 1321 * zfs_zinactive(). The file will be removed either when it is removed 1322 * on the send side and the next incremental stream is received or 1323 * when the unlinked set gets processed. 1324 */ 1325 zp->z_unlinked = (ZTOI(zp)->i_nlink == 0); 1326 if (zp->z_unlinked) 1327 zfs_znode_dmu_fini(zp); 1328 1329 zfs_znode_hold_exit(zfsvfs, zh); 1330 1331 return (0); 1332 } 1333 1334 void 1335 zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) 1336 { 1337 zfsvfs_t *zfsvfs = ZTOZSB(zp); 1338 objset_t *os = zfsvfs->z_os; 1339 uint64_t obj = zp->z_id; 1340 uint64_t acl_obj = zfs_external_acl(zp); 1341 znode_hold_t *zh; 1342 1343 zh = zfs_znode_hold_enter(zfsvfs, obj); 1344 if (acl_obj) { 1345 VERIFY(!zp->z_is_sa); 1346 VERIFY(0 == dmu_object_free(os, acl_obj, tx)); 1347 } 1348 VERIFY(0 == dmu_object_free(os, obj, tx)); 1349 zfs_znode_dmu_fini(zp); 1350 zfs_znode_hold_exit(zfsvfs, zh); 1351 } 1352 1353 void 1354 zfs_zinactive(znode_t *zp) 1355 { 1356 zfsvfs_t *zfsvfs = ZTOZSB(zp); 1357 uint64_t z_id = zp->z_id; 1358 znode_hold_t *zh; 1359 1360 ASSERT(zp->z_sa_hdl); 1361 1362 /* 1363 * Don't allow a zfs_zget() while were trying to release this znode. 1364 */ 1365 zh = zfs_znode_hold_enter(zfsvfs, z_id); 1366 1367 mutex_enter(&zp->z_lock); 1368 1369 /* 1370 * If this was the last reference to a file with no links, remove 1371 * the file from the file system unless the file system is mounted 1372 * read-only. That can happen, for example, if the file system was 1373 * originally read-write, the file was opened, then unlinked and 1374 * the file system was made read-only before the file was finally 1375 * closed. The file will remain in the unlinked set. 1376 */ 1377 if (zp->z_unlinked) { 1378 ASSERT(!zfsvfs->z_issnap); 1379 if (!zfs_is_readonly(zfsvfs) && !zfs_unlink_suspend_progress) { 1380 mutex_exit(&zp->z_lock); 1381 zfs_znode_hold_exit(zfsvfs, zh); 1382 zfs_rmnode(zp); 1383 return; 1384 } 1385 } 1386 1387 mutex_exit(&zp->z_lock); 1388 zfs_znode_dmu_fini(zp); 1389 1390 zfs_znode_hold_exit(zfsvfs, zh); 1391 } 1392 1393 #if defined(HAVE_INODE_TIMESPEC64_TIMES) 1394 #define zfs_compare_timespec timespec64_compare 1395 #else 1396 #define zfs_compare_timespec timespec_compare 1397 #endif 1398 1399 /* 1400 * Determine whether the znode's atime must be updated. The logic mostly 1401 * duplicates the Linux kernel's relatime_need_update() functionality. 1402 * This function is only called if the underlying filesystem actually has 1403 * atime updates enabled. 1404 */ 1405 boolean_t 1406 zfs_relatime_need_update(const struct inode *ip) 1407 { 1408 inode_timespec_t now, tmp_atime, tmp_ts; 1409 1410 gethrestime(&now); 1411 tmp_atime = zpl_inode_get_atime(ip); 1412 /* 1413 * In relatime mode, only update the atime if the previous atime 1414 * is earlier than either the ctime or mtime or if at least a day 1415 * has passed since the last update of atime. 1416 */ 1417 tmp_ts = zpl_inode_get_mtime(ip); 1418 if (zfs_compare_timespec(&tmp_ts, &tmp_atime) >= 0) 1419 return (B_TRUE); 1420 1421 tmp_ts = zpl_inode_get_ctime(ip); 1422 if (zfs_compare_timespec(&tmp_ts, &tmp_atime) >= 0) 1423 return (B_TRUE); 1424 1425 if ((hrtime_t)now.tv_sec - (hrtime_t)tmp_atime.tv_sec >= 24*60*60) 1426 return (B_TRUE); 1427 1428 return (B_FALSE); 1429 } 1430 1431 /* 1432 * Prepare to update znode time stamps. 1433 * 1434 * IN: zp - znode requiring timestamp update 1435 * flag - ATTR_MTIME, ATTR_CTIME flags 1436 * 1437 * OUT: zp - z_seq 1438 * mtime - new mtime 1439 * ctime - new ctime 1440 * 1441 * Note: We don't update atime here, because we rely on Linux VFS to do 1442 * atime updating. 1443 */ 1444 void 1445 zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2], 1446 uint64_t ctime[2]) 1447 { 1448 inode_timespec_t now, tmp_ts; 1449 1450 gethrestime(&now); 1451 1452 zp->z_seq++; 1453 1454 if (flag & ATTR_MTIME) { 1455 ZFS_TIME_ENCODE(&now, mtime); 1456 ZFS_TIME_DECODE(&tmp_ts, mtime); 1457 zpl_inode_set_mtime_to_ts(ZTOI(zp), tmp_ts); 1458 if (ZTOZSB(zp)->z_use_fuids) { 1459 zp->z_pflags |= (ZFS_ARCHIVE | 1460 ZFS_AV_MODIFIED); 1461 } 1462 } 1463 1464 if (flag & ATTR_CTIME) { 1465 ZFS_TIME_ENCODE(&now, ctime); 1466 ZFS_TIME_DECODE(&tmp_ts, ctime); 1467 zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ts); 1468 if (ZTOZSB(zp)->z_use_fuids) 1469 zp->z_pflags |= ZFS_ARCHIVE; 1470 } 1471 } 1472 1473 /* 1474 * Grow the block size for a file. 1475 * 1476 * IN: zp - znode of file to free data in. 1477 * size - requested block size 1478 * tx - open transaction. 1479 * 1480 * NOTE: this function assumes that the znode is write locked. 1481 */ 1482 void 1483 zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) 1484 { 1485 int error; 1486 u_longlong_t dummy; 1487 1488 if (size <= zp->z_blksz) 1489 return; 1490 /* 1491 * If the file size is already greater than the current blocksize, 1492 * we will not grow. If there is more than one block in a file, 1493 * the blocksize cannot change. 1494 */ 1495 if (zp->z_blksz && zp->z_size > zp->z_blksz) 1496 return; 1497 1498 error = dmu_object_set_blocksize(ZTOZSB(zp)->z_os, zp->z_id, 1499 size, 0, tx); 1500 1501 if (error == ENOTSUP) 1502 return; 1503 ASSERT0(error); 1504 1505 /* What blocksize did we actually get? */ 1506 dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy); 1507 } 1508 1509 /* 1510 * Increase the file length 1511 * 1512 * IN: zp - znode of file to free data in. 1513 * end - new end-of-file 1514 * 1515 * RETURN: 0 on success, error code on failure 1516 */ 1517 static int 1518 zfs_extend(znode_t *zp, uint64_t end) 1519 { 1520 zfsvfs_t *zfsvfs = ZTOZSB(zp); 1521 dmu_tx_t *tx; 1522 zfs_locked_range_t *lr; 1523 uint64_t newblksz; 1524 int error; 1525 1526 /* 1527 * We will change zp_size, lock the whole file. 1528 */ 1529 lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); 1530 1531 /* 1532 * Nothing to do if file already at desired length. 1533 */ 1534 if (end <= zp->z_size) { 1535 zfs_rangelock_exit(lr); 1536 return (0); 1537 } 1538 tx = dmu_tx_create(zfsvfs->z_os); 1539 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1540 zfs_sa_upgrade_txholds(tx, zp); 1541 if (end > zp->z_blksz && 1542 (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) { 1543 /* 1544 * We are growing the file past the current block size. 1545 */ 1546 if (zp->z_blksz > ZTOZSB(zp)->z_max_blksz) { 1547 /* 1548 * File's blocksize is already larger than the 1549 * "recordsize" property. Only let it grow to 1550 * the next power of 2. 1551 */ 1552 ASSERT(!ISP2(zp->z_blksz)); 1553 newblksz = MIN(end, 1 << highbit64(zp->z_blksz)); 1554 } else { 1555 newblksz = MIN(end, ZTOZSB(zp)->z_max_blksz); 1556 } 1557 dmu_tx_hold_write(tx, zp->z_id, 0, newblksz); 1558 } else { 1559 newblksz = 0; 1560 } 1561 1562 error = dmu_tx_assign(tx, TXG_WAIT); 1563 if (error) { 1564 dmu_tx_abort(tx); 1565 zfs_rangelock_exit(lr); 1566 return (error); 1567 } 1568 1569 if (newblksz) 1570 zfs_grow_blocksize(zp, newblksz, tx); 1571 1572 zp->z_size = end; 1573 1574 VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)), 1575 &zp->z_size, sizeof (zp->z_size), tx)); 1576 1577 zfs_rangelock_exit(lr); 1578 1579 dmu_tx_commit(tx); 1580 1581 return (0); 1582 } 1583 1584 /* 1585 * zfs_zero_partial_page - Modeled after update_pages() but 1586 * with different arguments and semantics for use by zfs_freesp(). 1587 * 1588 * Zeroes a piece of a single page cache entry for zp at offset 1589 * start and length len. 1590 * 1591 * Caller must acquire a range lock on the file for the region 1592 * being zeroed in order that the ARC and page cache stay in sync. 1593 */ 1594 static void 1595 zfs_zero_partial_page(znode_t *zp, uint64_t start, uint64_t len) 1596 { 1597 struct address_space *mp = ZTOI(zp)->i_mapping; 1598 struct page *pp; 1599 int64_t off; 1600 void *pb; 1601 1602 ASSERT((start & PAGE_MASK) == ((start + len - 1) & PAGE_MASK)); 1603 1604 off = start & (PAGE_SIZE - 1); 1605 start &= PAGE_MASK; 1606 1607 pp = find_lock_page(mp, start >> PAGE_SHIFT); 1608 if (pp) { 1609 if (mapping_writably_mapped(mp)) 1610 flush_dcache_page(pp); 1611 1612 pb = kmap(pp); 1613 memset(pb + off, 0, len); 1614 kunmap(pp); 1615 1616 if (mapping_writably_mapped(mp)) 1617 flush_dcache_page(pp); 1618 1619 mark_page_accessed(pp); 1620 SetPageUptodate(pp); 1621 ClearPageError(pp); 1622 unlock_page(pp); 1623 put_page(pp); 1624 } 1625 } 1626 1627 /* 1628 * Free space in a file. 1629 * 1630 * IN: zp - znode of file to free data in. 1631 * off - start of section to free. 1632 * len - length of section to free. 1633 * 1634 * RETURN: 0 on success, error code on failure 1635 */ 1636 static int 1637 zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) 1638 { 1639 zfsvfs_t *zfsvfs = ZTOZSB(zp); 1640 zfs_locked_range_t *lr; 1641 int error; 1642 1643 /* 1644 * Lock the range being freed. 1645 */ 1646 lr = zfs_rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER); 1647 1648 /* 1649 * Nothing to do if file already at desired length. 1650 */ 1651 if (off >= zp->z_size) { 1652 zfs_rangelock_exit(lr); 1653 return (0); 1654 } 1655 1656 if (off + len > zp->z_size) 1657 len = zp->z_size - off; 1658 1659 error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len); 1660 1661 /* 1662 * Zero partial page cache entries. This must be done under a 1663 * range lock in order to keep the ARC and page cache in sync. 1664 */ 1665 if (zn_has_cached_data(zp, off, off + len - 1)) { 1666 loff_t first_page, last_page, page_len; 1667 loff_t first_page_offset, last_page_offset; 1668 1669 /* first possible full page in hole */ 1670 first_page = (off + PAGE_SIZE - 1) >> PAGE_SHIFT; 1671 /* last page of hole */ 1672 last_page = (off + len) >> PAGE_SHIFT; 1673 1674 /* offset of first_page */ 1675 first_page_offset = first_page << PAGE_SHIFT; 1676 /* offset of last_page */ 1677 last_page_offset = last_page << PAGE_SHIFT; 1678 1679 /* truncate whole pages */ 1680 if (last_page_offset > first_page_offset) { 1681 truncate_inode_pages_range(ZTOI(zp)->i_mapping, 1682 first_page_offset, last_page_offset - 1); 1683 } 1684 1685 /* truncate sub-page ranges */ 1686 if (first_page > last_page) { 1687 /* entire punched area within a single page */ 1688 zfs_zero_partial_page(zp, off, len); 1689 } else { 1690 /* beginning of punched area at the end of a page */ 1691 page_len = first_page_offset - off; 1692 if (page_len > 0) 1693 zfs_zero_partial_page(zp, off, page_len); 1694 1695 /* end of punched area at the beginning of a page */ 1696 page_len = off + len - last_page_offset; 1697 if (page_len > 0) 1698 zfs_zero_partial_page(zp, last_page_offset, 1699 page_len); 1700 } 1701 } 1702 zfs_rangelock_exit(lr); 1703 1704 return (error); 1705 } 1706 1707 /* 1708 * Truncate a file 1709 * 1710 * IN: zp - znode of file to free data in. 1711 * end - new end-of-file. 1712 * 1713 * RETURN: 0 on success, error code on failure 1714 */ 1715 static int 1716 zfs_trunc(znode_t *zp, uint64_t end) 1717 { 1718 zfsvfs_t *zfsvfs = ZTOZSB(zp); 1719 dmu_tx_t *tx; 1720 zfs_locked_range_t *lr; 1721 int error; 1722 sa_bulk_attr_t bulk[2]; 1723 int count = 0; 1724 1725 /* 1726 * We will change zp_size, lock the whole file. 1727 */ 1728 lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); 1729 1730 /* 1731 * Nothing to do if file already at desired length. 1732 */ 1733 if (end >= zp->z_size) { 1734 zfs_rangelock_exit(lr); 1735 return (0); 1736 } 1737 1738 error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, 1739 DMU_OBJECT_END); 1740 if (error) { 1741 zfs_rangelock_exit(lr); 1742 return (error); 1743 } 1744 tx = dmu_tx_create(zfsvfs->z_os); 1745 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1746 zfs_sa_upgrade_txholds(tx, zp); 1747 dmu_tx_mark_netfree(tx); 1748 error = dmu_tx_assign(tx, TXG_WAIT); 1749 if (error) { 1750 dmu_tx_abort(tx); 1751 zfs_rangelock_exit(lr); 1752 return (error); 1753 } 1754 1755 zp->z_size = end; 1756 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), 1757 NULL, &zp->z_size, sizeof (zp->z_size)); 1758 1759 if (end == 0) { 1760 zp->z_pflags &= ~ZFS_SPARSE; 1761 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 1762 NULL, &zp->z_pflags, 8); 1763 } 1764 VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0); 1765 1766 dmu_tx_commit(tx); 1767 zfs_rangelock_exit(lr); 1768 1769 return (0); 1770 } 1771 1772 /* 1773 * Free space in a file 1774 * 1775 * IN: zp - znode of file to free data in. 1776 * off - start of range 1777 * len - end of range (0 => EOF) 1778 * flag - current file open mode flags. 1779 * log - TRUE if this action should be logged 1780 * 1781 * RETURN: 0 on success, error code on failure 1782 */ 1783 int 1784 zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) 1785 { 1786 dmu_tx_t *tx; 1787 zfsvfs_t *zfsvfs = ZTOZSB(zp); 1788 zilog_t *zilog = zfsvfs->z_log; 1789 uint64_t mode; 1790 uint64_t mtime[2], ctime[2]; 1791 sa_bulk_attr_t bulk[3]; 1792 int count = 0; 1793 int error; 1794 1795 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode, 1796 sizeof (mode))) != 0) 1797 return (error); 1798 1799 if (off > zp->z_size) { 1800 error = zfs_extend(zp, off+len); 1801 if (error == 0 && log) 1802 goto log; 1803 goto out; 1804 } 1805 1806 if (len == 0) { 1807 error = zfs_trunc(zp, off); 1808 } else { 1809 if ((error = zfs_free_range(zp, off, len)) == 0 && 1810 off + len > zp->z_size) 1811 error = zfs_extend(zp, off+len); 1812 } 1813 if (error || !log) 1814 goto out; 1815 log: 1816 tx = dmu_tx_create(zfsvfs->z_os); 1817 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1818 zfs_sa_upgrade_txholds(tx, zp); 1819 error = dmu_tx_assign(tx, TXG_WAIT); 1820 if (error) { 1821 dmu_tx_abort(tx); 1822 goto out; 1823 } 1824 1825 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16); 1826 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16); 1827 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 1828 NULL, &zp->z_pflags, 8); 1829 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); 1830 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1831 ASSERT(error == 0); 1832 1833 zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); 1834 1835 dmu_tx_commit(tx); 1836 1837 zfs_znode_update_vfs(zp); 1838 error = 0; 1839 1840 out: 1841 /* 1842 * Truncate the page cache - for file truncate operations, use 1843 * the purpose-built API for truncations. For punching operations, 1844 * the truncation is handled under a range lock in zfs_free_range. 1845 */ 1846 if (len == 0) 1847 truncate_setsize(ZTOI(zp), off); 1848 return (error); 1849 } 1850 1851 void 1852 zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) 1853 { 1854 struct super_block *sb; 1855 zfsvfs_t *zfsvfs; 1856 uint64_t moid, obj, sa_obj, version; 1857 uint64_t sense = ZFS_CASE_SENSITIVE; 1858 uint64_t norm = 0; 1859 nvpair_t *elem; 1860 int size; 1861 int error; 1862 int i; 1863 znode_t *rootzp = NULL; 1864 vattr_t vattr; 1865 znode_t *zp; 1866 zfs_acl_ids_t acl_ids; 1867 1868 /* 1869 * First attempt to create master node. 1870 */ 1871 /* 1872 * In an empty objset, there are no blocks to read and thus 1873 * there can be no i/o errors (which we assert below). 1874 */ 1875 moid = MASTER_NODE_OBJ; 1876 error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE, 1877 DMU_OT_NONE, 0, tx); 1878 ASSERT(error == 0); 1879 1880 /* 1881 * Set starting attributes. 1882 */ 1883 version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os))); 1884 elem = NULL; 1885 while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) { 1886 /* For the moment we expect all zpl props to be uint64_ts */ 1887 uint64_t val; 1888 const char *name; 1889 1890 ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64); 1891 VERIFY(nvpair_value_uint64(elem, &val) == 0); 1892 name = nvpair_name(elem); 1893 if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) { 1894 if (val < version) 1895 version = val; 1896 } else { 1897 error = zap_update(os, moid, name, 8, 1, &val, tx); 1898 } 1899 ASSERT(error == 0); 1900 if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0) 1901 norm = val; 1902 else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0) 1903 sense = val; 1904 } 1905 ASSERT(version != 0); 1906 error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); 1907 ASSERT(error == 0); 1908 1909 /* 1910 * Create zap object used for SA attribute registration 1911 */ 1912 1913 if (version >= ZPL_VERSION_SA) { 1914 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, 1915 DMU_OT_NONE, 0, tx); 1916 error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); 1917 ASSERT(error == 0); 1918 } else { 1919 sa_obj = 0; 1920 } 1921 /* 1922 * Create a delete queue. 1923 */ 1924 obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx); 1925 1926 error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx); 1927 ASSERT(error == 0); 1928 1929 /* 1930 * Create root znode. Create minimal znode/inode/zfsvfs/sb 1931 * to allow zfs_mknode to work. 1932 */ 1933 vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID; 1934 vattr.va_mode = S_IFDIR|0755; 1935 vattr.va_uid = crgetuid(cr); 1936 vattr.va_gid = crgetgid(cr); 1937 1938 rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP); 1939 rootzp->z_unlinked = B_FALSE; 1940 rootzp->z_atime_dirty = B_FALSE; 1941 rootzp->z_is_sa = USE_SA(version, os); 1942 rootzp->z_pflags = 0; 1943 1944 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 1945 zfsvfs->z_os = os; 1946 zfsvfs->z_parent = zfsvfs; 1947 zfsvfs->z_version = version; 1948 zfsvfs->z_use_fuids = USE_FUIDS(version, os); 1949 zfsvfs->z_use_sa = USE_SA(version, os); 1950 zfsvfs->z_norm = norm; 1951 1952 sb = kmem_zalloc(sizeof (struct super_block), KM_SLEEP); 1953 sb->s_fs_info = zfsvfs; 1954 1955 ZTOI(rootzp)->i_sb = sb; 1956 1957 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, 1958 &zfsvfs->z_attr_table); 1959 1960 ASSERT(error == 0); 1961 1962 /* 1963 * Fold case on file systems that are always or sometimes case 1964 * insensitive. 1965 */ 1966 if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED) 1967 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 1968 1969 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 1970 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 1971 offsetof(znode_t, z_link_node)); 1972 1973 size = MIN(1 << (highbit64(zfs_object_mutex_size)-1), ZFS_OBJ_MTX_MAX); 1974 zfsvfs->z_hold_size = size; 1975 zfsvfs->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size, 1976 KM_SLEEP); 1977 zfsvfs->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP); 1978 for (i = 0; i != size; i++) { 1979 avl_create(&zfsvfs->z_hold_trees[i], zfs_znode_hold_compare, 1980 sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node)); 1981 mutex_init(&zfsvfs->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL); 1982 } 1983 1984 VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, 1985 cr, NULL, &acl_ids, zfs_init_idmap)); 1986 zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids); 1987 ASSERT3P(zp, ==, rootzp); 1988 error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx); 1989 ASSERT(error == 0); 1990 zfs_acl_ids_free(&acl_ids); 1991 1992 atomic_set(&ZTOI(rootzp)->i_count, 0); 1993 sa_handle_destroy(rootzp->z_sa_hdl); 1994 kmem_cache_free(znode_cache, rootzp); 1995 1996 for (i = 0; i != size; i++) { 1997 avl_destroy(&zfsvfs->z_hold_trees[i]); 1998 mutex_destroy(&zfsvfs->z_hold_locks[i]); 1999 } 2000 2001 mutex_destroy(&zfsvfs->z_znodes_lock); 2002 2003 vmem_free(zfsvfs->z_hold_trees, sizeof (avl_tree_t) * size); 2004 vmem_free(zfsvfs->z_hold_locks, sizeof (kmutex_t) * size); 2005 kmem_free(sb, sizeof (struct super_block)); 2006 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 2007 } 2008 #endif /* _KERNEL */ 2009 2010 static int 2011 zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table) 2012 { 2013 uint64_t sa_obj = 0; 2014 int error; 2015 2016 error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); 2017 if (error != 0 && error != ENOENT) 2018 return (error); 2019 2020 error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table); 2021 return (error); 2022 } 2023 2024 static int 2025 zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, 2026 dmu_buf_t **db, const void *tag) 2027 { 2028 dmu_object_info_t doi; 2029 int error; 2030 2031 if ((error = sa_buf_hold(osp, obj, tag, db)) != 0) 2032 return (error); 2033 2034 dmu_object_info_from_db(*db, &doi); 2035 if ((doi.doi_bonus_type != DMU_OT_SA && 2036 doi.doi_bonus_type != DMU_OT_ZNODE) || 2037 (doi.doi_bonus_type == DMU_OT_ZNODE && 2038 doi.doi_bonus_size < sizeof (znode_phys_t))) { 2039 sa_buf_rele(*db, tag); 2040 return (SET_ERROR(ENOTSUP)); 2041 } 2042 2043 error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); 2044 if (error != 0) { 2045 sa_buf_rele(*db, tag); 2046 return (error); 2047 } 2048 2049 return (0); 2050 } 2051 2052 static void 2053 zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, const void *tag) 2054 { 2055 sa_handle_destroy(hdl); 2056 sa_buf_rele(db, tag); 2057 } 2058 2059 /* 2060 * Given an object number, return its parent object number and whether 2061 * or not the object is an extended attribute directory. 2062 */ 2063 static int 2064 zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table, 2065 uint64_t *pobjp, int *is_xattrdir) 2066 { 2067 uint64_t parent; 2068 uint64_t pflags; 2069 uint64_t mode; 2070 uint64_t parent_mode; 2071 sa_bulk_attr_t bulk[3]; 2072 sa_handle_t *sa_hdl; 2073 dmu_buf_t *sa_db; 2074 int count = 0; 2075 int error; 2076 2077 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL, 2078 &parent, sizeof (parent)); 2079 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL, 2080 &pflags, sizeof (pflags)); 2081 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, 2082 &mode, sizeof (mode)); 2083 2084 if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0) 2085 return (error); 2086 2087 /* 2088 * When a link is removed its parent pointer is not changed and will 2089 * be invalid. There are two cases where a link is removed but the 2090 * file stays around, when it goes to the delete queue and when there 2091 * are additional links. 2092 */ 2093 error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG); 2094 if (error != 0) 2095 return (error); 2096 2097 error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode)); 2098 zfs_release_sa_handle(sa_hdl, sa_db, FTAG); 2099 if (error != 0) 2100 return (error); 2101 2102 *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode); 2103 2104 /* 2105 * Extended attributes can be applied to files, directories, etc. 2106 * Otherwise the parent must be a directory. 2107 */ 2108 if (!*is_xattrdir && !S_ISDIR(parent_mode)) 2109 return (SET_ERROR(EINVAL)); 2110 2111 *pobjp = parent; 2112 2113 return (0); 2114 } 2115 2116 /* 2117 * Given an object number, return some zpl level statistics 2118 */ 2119 static int 2120 zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table, 2121 zfs_stat_t *sb) 2122 { 2123 sa_bulk_attr_t bulk[4]; 2124 int count = 0; 2125 2126 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, 2127 &sb->zs_mode, sizeof (sb->zs_mode)); 2128 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL, 2129 &sb->zs_gen, sizeof (sb->zs_gen)); 2130 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL, 2131 &sb->zs_links, sizeof (sb->zs_links)); 2132 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL, 2133 &sb->zs_ctime, sizeof (sb->zs_ctime)); 2134 2135 return (sa_bulk_lookup(hdl, bulk, count)); 2136 } 2137 2138 static int 2139 zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, 2140 sa_attr_type_t *sa_table, char *buf, int len) 2141 { 2142 sa_handle_t *sa_hdl; 2143 sa_handle_t *prevhdl = NULL; 2144 dmu_buf_t *prevdb = NULL; 2145 dmu_buf_t *sa_db = NULL; 2146 char *path = buf + len - 1; 2147 int error; 2148 2149 *path = '\0'; 2150 sa_hdl = hdl; 2151 2152 uint64_t deleteq_obj; 2153 VERIFY0(zap_lookup(osp, MASTER_NODE_OBJ, 2154 ZFS_UNLINKED_SET, sizeof (uint64_t), 1, &deleteq_obj)); 2155 error = zap_lookup_int(osp, deleteq_obj, obj); 2156 if (error == 0) { 2157 return (ESTALE); 2158 } else if (error != ENOENT) { 2159 return (error); 2160 } 2161 2162 for (;;) { 2163 uint64_t pobj = 0; 2164 char component[MAXNAMELEN + 2]; 2165 size_t complen; 2166 int is_xattrdir = 0; 2167 2168 if (prevdb) { 2169 ASSERT(prevhdl != NULL); 2170 zfs_release_sa_handle(prevhdl, prevdb, FTAG); 2171 } 2172 2173 if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj, 2174 &is_xattrdir)) != 0) 2175 break; 2176 2177 if (pobj == obj) { 2178 if (path[0] != '/') 2179 *--path = '/'; 2180 break; 2181 } 2182 2183 component[0] = '/'; 2184 if (is_xattrdir) { 2185 strcpy(component + 1, "<xattrdir>"); 2186 } else { 2187 error = zap_value_search(osp, pobj, obj, 2188 ZFS_DIRENT_OBJ(-1ULL), component + 1); 2189 if (error != 0) 2190 break; 2191 } 2192 2193 complen = strlen(component); 2194 path -= complen; 2195 ASSERT(path >= buf); 2196 memcpy(path, component, complen); 2197 obj = pobj; 2198 2199 if (sa_hdl != hdl) { 2200 prevhdl = sa_hdl; 2201 prevdb = sa_db; 2202 } 2203 error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG); 2204 if (error != 0) { 2205 sa_hdl = prevhdl; 2206 sa_db = prevdb; 2207 break; 2208 } 2209 } 2210 2211 if (sa_hdl != NULL && sa_hdl != hdl) { 2212 ASSERT(sa_db != NULL); 2213 zfs_release_sa_handle(sa_hdl, sa_db, FTAG); 2214 } 2215 2216 if (error == 0) 2217 (void) memmove(buf, path, buf + len - path); 2218 2219 return (error); 2220 } 2221 2222 int 2223 zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) 2224 { 2225 sa_attr_type_t *sa_table; 2226 sa_handle_t *hdl; 2227 dmu_buf_t *db; 2228 int error; 2229 2230 error = zfs_sa_setup(osp, &sa_table); 2231 if (error != 0) 2232 return (error); 2233 2234 error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); 2235 if (error != 0) 2236 return (error); 2237 2238 error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); 2239 2240 zfs_release_sa_handle(hdl, db, FTAG); 2241 return (error); 2242 } 2243 2244 int 2245 zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, 2246 char *buf, int len) 2247 { 2248 char *path = buf + len - 1; 2249 sa_attr_type_t *sa_table; 2250 sa_handle_t *hdl; 2251 dmu_buf_t *db; 2252 int error; 2253 2254 *path = '\0'; 2255 2256 error = zfs_sa_setup(osp, &sa_table); 2257 if (error != 0) 2258 return (error); 2259 2260 error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); 2261 if (error != 0) 2262 return (error); 2263 2264 error = zfs_obj_to_stats_impl(hdl, sa_table, sb); 2265 if (error != 0) { 2266 zfs_release_sa_handle(hdl, db, FTAG); 2267 return (error); 2268 } 2269 2270 error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); 2271 2272 zfs_release_sa_handle(hdl, db, FTAG); 2273 return (error); 2274 } 2275 2276 /* 2277 * Read a property stored within the master node. 2278 */ 2279 int 2280 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 2281 { 2282 uint64_t *cached_copy = NULL; 2283 2284 /* 2285 * Figure out where in the objset_t the cached copy would live, if it 2286 * is available for the requested property. 2287 */ 2288 if (os != NULL) { 2289 switch (prop) { 2290 case ZFS_PROP_VERSION: 2291 cached_copy = &os->os_version; 2292 break; 2293 case ZFS_PROP_NORMALIZE: 2294 cached_copy = &os->os_normalization; 2295 break; 2296 case ZFS_PROP_UTF8ONLY: 2297 cached_copy = &os->os_utf8only; 2298 break; 2299 case ZFS_PROP_CASE: 2300 cached_copy = &os->os_casesensitivity; 2301 break; 2302 default: 2303 break; 2304 } 2305 } 2306 if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) { 2307 *value = *cached_copy; 2308 return (0); 2309 } 2310 2311 /* 2312 * If the property wasn't cached, look up the file system's value for 2313 * the property. For the version property, we look up a slightly 2314 * different string. 2315 */ 2316 const char *pname; 2317 int error = ENOENT; 2318 if (prop == ZFS_PROP_VERSION) 2319 pname = ZPL_VERSION_STR; 2320 else 2321 pname = zfs_prop_to_name(prop); 2322 2323 if (os != NULL) { 2324 ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS); 2325 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 2326 } 2327 2328 if (error == ENOENT) { 2329 /* No value set, use the default value */ 2330 switch (prop) { 2331 case ZFS_PROP_VERSION: 2332 *value = ZPL_VERSION; 2333 break; 2334 case ZFS_PROP_NORMALIZE: 2335 case ZFS_PROP_UTF8ONLY: 2336 *value = 0; 2337 break; 2338 case ZFS_PROP_CASE: 2339 *value = ZFS_CASE_SENSITIVE; 2340 break; 2341 case ZFS_PROP_ACLTYPE: 2342 *value = ZFS_ACLTYPE_OFF; 2343 break; 2344 default: 2345 return (error); 2346 } 2347 error = 0; 2348 } 2349 2350 /* 2351 * If one of the methods for getting the property value above worked, 2352 * copy it into the objset_t's cache. 2353 */ 2354 if (error == 0 && cached_copy != NULL) { 2355 *cached_copy = *value; 2356 } 2357 2358 return (error); 2359 } 2360 2361 #if defined(_KERNEL) 2362 EXPORT_SYMBOL(zfs_create_fs); 2363 EXPORT_SYMBOL(zfs_obj_to_path); 2364 2365 /* CSTYLED */ 2366 module_param(zfs_object_mutex_size, uint, 0644); 2367 MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array"); 2368 module_param(zfs_unlink_suspend_progress, int, 0644); 2369 MODULE_PARM_DESC(zfs_unlink_suspend_progress, "Set to prevent async unlinks " 2370 "(debug - leaks space into the unlinked set)"); 2371 #endif 2372